mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-28 00:23:00 +00:00
Merge branch 'main' into rocksdb_pageserver
This commit is contained in:
268
Cargo.lock
generated
268
Cargo.lock
generated
@@ -91,19 +91,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "async-io"
|
||||
version = "1.3.1"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9315f8f07556761c3e48fec2e6b276004acf426e6dc068b2c2251854d65ee0fd"
|
||||
checksum = "fcb9af4888a70ad78ecb5efcb0ba95d66a3cf54a88b62ae81559954c7588c7a2"
|
||||
dependencies = [
|
||||
"concurrent-queue",
|
||||
"fastrand",
|
||||
"futures-lite",
|
||||
"libc",
|
||||
"log",
|
||||
"nb-connect",
|
||||
"once_cell",
|
||||
"parking",
|
||||
"polling",
|
||||
"socket2",
|
||||
"vec-arena",
|
||||
"waker-fn",
|
||||
"winapi",
|
||||
@@ -111,9 +111,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "async-lock"
|
||||
version = "2.3.0"
|
||||
version = "2.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1996609732bde4a9988bc42125f55f2af5f3c36370e27c778d5191a4a1b63bfb"
|
||||
checksum = "e6a8ea61bf9947a1007c5cada31e647dbc77b103c679858150003ba697ea798b"
|
||||
dependencies = [
|
||||
"event-listener",
|
||||
]
|
||||
@@ -162,9 +162,9 @@ checksum = "e91831deabf0d6d7ec49552e489aed63b7456a7a3c46cff62adad428110b0af0"
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.48"
|
||||
version = "0.1.50"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "36ea56748e10732c49404c153638a15ec3d6211ec5ff35d9bb20e13b93576adf"
|
||||
checksum = "0b98e84bbb4cbcdd97da190ba0c58a1bb0de2c1fdf67d159e192ed766aeca722"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -250,14 +250,18 @@ dependencies = [
|
||||
"bitflags",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"clap",
|
||||
"env_logger",
|
||||
"lazy_static",
|
||||
"lazycell",
|
||||
"log",
|
||||
"peeking_take_while",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -424,15 +428,22 @@ checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
||||
name = "control_plane"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"home",
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"fs_extra",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"pageserver",
|
||||
"postgres",
|
||||
"postgres_ffi",
|
||||
"rand 0.8.3",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"tar",
|
||||
"tokio-postgres",
|
||||
"toml",
|
||||
"walkeeper",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -468,9 +479,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.0"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
|
||||
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"crossbeam-utils",
|
||||
@@ -585,6 +596,19 @@ dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17392a012ea30ef05a610aa97dfb49496e71c9f676b27879922ea5bdf60d9d3f"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"humantime",
|
||||
"log",
|
||||
"regex",
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "event-listener"
|
||||
version = "2.5.1"
|
||||
@@ -606,6 +630,18 @@ dependencies = [
|
||||
"instant",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "filetime"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"libc",
|
||||
"redox_syscall 0.2.6",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@@ -648,10 +684,16 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.13"
|
||||
name = "fs_extra"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f55667319111d593ba876406af7c409c0ebb44dc4be6132a783ccf163ea14c1"
|
||||
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9d5813545e459ad3ca1bff9915e9ad7f1a47dc6a91b627ce321d5863b7dd253"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
@@ -664,9 +706,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c2dd2df839b57db9ab69c2c9d8f3e8c81984781937fe2807dc6dcf3b2ad2939"
|
||||
checksum = "ce79c6a52a299137a6013061e0cf0e688fce5d7f1bc60125f520912fdb29ec25"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
@@ -674,15 +716,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "15496a72fabf0e62bdc3df11a59a3787429221dd0710ba8ef163d6f7a9112c94"
|
||||
checksum = "098cd1c6dda6ca01650f1a37a794245eb73181d0d4d4e955e2f3c37db7af1815"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "891a4b7b96d84d5940084b2a37632dd65deeae662c114ceaa2c879629c9c0ad1"
|
||||
checksum = "10f6cb7042eda00f0049b1d2080aa4b93442997ee507eb3828e8bd7577f94c9d"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
@@ -691,9 +733,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d71c2c65c57704c32f5241c1223167c2c3294fd34ac020c807ddbe6db287ba59"
|
||||
checksum = "365a1a1fb30ea1c03a830fdb2158f5236833ac81fa0ad12fe35b29cddc35cb04"
|
||||
|
||||
[[package]]
|
||||
name = "futures-lite"
|
||||
@@ -712,9 +754,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ea405816a5139fb39af82c2beb921d52143f556038378d6db21183a5c37fbfb7"
|
||||
checksum = "668c6733a182cd7deb4f1de7ba3bf2120823835b3bcfbeacf7d2c4a773c1bb8b"
|
||||
dependencies = [
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
@@ -724,21 +766,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85754d98985841b7d4f5e8e6fbfa4a4ac847916893ec511a2917ccd8525b8bb3"
|
||||
checksum = "5c5629433c555de3d82861a7a4e3794a4c40040390907cfbfd7143a92a426c23"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa189ef211c15ee602667a6fcfe1c1fd9e07d42250d2156382820fba33c9df80"
|
||||
checksum = "ba7aa51095076f3ba6d9a1f702f74bd05ec65f555d70d2033d55ba8d69f581bc"
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1812c7ab8aedf8d6f2701a43e1243acdbcc2b36ab26e2ad421eb99ac963d96d1"
|
||||
checksum = "3c144ad54d60f23927f0a6b6d816e4271278b64f005ad65e4e35291d2de9c025"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
@@ -858,20 +900,11 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "home"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2456aef2e6b6a9784192ae780c0f15bc57df0e918585282325e8c8ac27737654"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.3"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7245cd7449cc792608c3c8a9eaf69bd4eabbabf802713748fd739c98b82f0747"
|
||||
checksum = "527e8c9ac747e28542699a951517aa9a6945af506cd1f2e1b53a576c17b6cc11"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fnv",
|
||||
@@ -891,9 +924,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "httparse"
|
||||
version = "1.3.5"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "615caabe2c3160b313d52ccc905335f4ed5f10881dd63dc5699d47e90be85691"
|
||||
checksum = "4a1ce40d6fc9764887c2fdc7305c3dcc429ba11ff981c1509416afd5697e4437"
|
||||
|
||||
[[package]]
|
||||
name = "httpdate"
|
||||
@@ -901,6 +934,12 @@ version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "494b4d60369511e7dea41cf646832512a94e542f68bb9c49e54518e0f468eb47"
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.5"
|
||||
@@ -940,9 +979,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.2.2"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89829a5d69c23d348314a7ac337fe39173b61149a9864deabd260983aed48c21"
|
||||
checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8"
|
||||
dependencies = [
|
||||
"matches",
|
||||
"unicode-bidi",
|
||||
@@ -974,9 +1013,11 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"control_plane",
|
||||
"lazy_static",
|
||||
"pageserver",
|
||||
"postgres",
|
||||
"rand 0.8.3",
|
||||
"tokio-postgres",
|
||||
"walkeeper",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -993,9 +1034,9 @@ checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.21"
|
||||
version = "0.1.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c71313ebb9439f74b00d9d2dcec36440beaf57a6aa0623068441dd7cd81a7f2"
|
||||
checksum = "972f5ae5d1cb9c6ae417789196c803205313edde988685da5e3aae0827b9e7fd"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
@@ -1032,9 +1073,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.92"
|
||||
version = "0.2.93"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714"
|
||||
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
@@ -1049,7 +1090,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "librocksdb-sys"
|
||||
version = "6.17.3"
|
||||
source = "git+https://github.com/rust-rocksdb/rust-rocksdb.git#0b700fe70da8ee30483fde79f44df549f8fe11ec"
|
||||
source = "git+https://github.com/rust-rocksdb/rust-rocksdb.git#7dd6258b07861b9332f827b416e50e5aee69aea1"
|
||||
dependencies = [
|
||||
"bindgen",
|
||||
"cc",
|
||||
@@ -1162,16 +1203,6 @@ dependencies = [
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nb-connect"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a19900e7eee95eb2b3c2e26d12a874cc80aaf750e31be6fcbe743ead369fa45d"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"socket2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "5.1.2"
|
||||
@@ -1294,12 +1325,15 @@ dependencies = [
|
||||
"crossbeam-channel",
|
||||
"daemonize",
|
||||
"fs2",
|
||||
"fs_extra",
|
||||
"futures",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"postgres",
|
||||
"postgres-protocol",
|
||||
"postgres-types",
|
||||
"postgres_ffi",
|
||||
"rand 0.8.3",
|
||||
"regex",
|
||||
"rocksdb",
|
||||
@@ -1309,6 +1343,7 @@ dependencies = [
|
||||
"slog-scope",
|
||||
"slog-stdlog",
|
||||
"slog-term",
|
||||
"tar",
|
||||
"termion",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
@@ -1344,7 +1379,7 @@ dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"instant",
|
||||
"libc",
|
||||
"redox_syscall 0.2.5",
|
||||
"redox_syscall 0.2.6",
|
||||
"smallvec",
|
||||
"winapi",
|
||||
]
|
||||
@@ -1381,18 +1416,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pin-project"
|
||||
version = "1.0.6"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc174859768806e91ae575187ada95c91a29e96a98dc5d2cd9a1fed039501ba6"
|
||||
checksum = "c7509cc106041c40a4518d2af7a61530e1eed0e6285296a3d8c5472806ccc4a4"
|
||||
dependencies = [
|
||||
"pin-project-internal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-internal"
|
||||
version = "1.0.6"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a490329918e856ed1b083f244e3bfe2d8c4f336407e4ea9e1a9f479ff09049e5"
|
||||
checksum = "48c950132583b500556b1efd71d45b319029f2b71518d979fcc208e16b42426f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -1470,6 +1505,20 @@ dependencies = [
|
||||
"postgres-protocol",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres_ffi"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bindgen",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"crc32c",
|
||||
"hex",
|
||||
"rand 0.8.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.10"
|
||||
@@ -1595,9 +1644,9 @@ checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.5"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94341e4e44e24f6b591b59e47a8a027df12e008d73fd5672dbea9cc22f4507d9"
|
||||
checksum = "8270314b5ccceb518e7e578952f0b72b88222d02e8f77f5ecf7abbb673539041"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
@@ -1608,7 +1657,7 @@ version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8440d8acb4fd3d277125b4bd01a6f38aee8d814b3b5fc09b3f2b825d37d3fe8f"
|
||||
dependencies = [
|
||||
"redox_syscall 0.2.5",
|
||||
"redox_syscall 0.2.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1629,7 +1678,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64"
|
||||
dependencies = [
|
||||
"getrandom 0.2.2",
|
||||
"redox_syscall 0.2.5",
|
||||
"redox_syscall 0.2.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1660,9 +1709,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.11.2"
|
||||
version = "0.11.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf12057f289428dbf5c591c74bf10392e4a8003f993405a902f20117019022d4"
|
||||
checksum = "2296f2fac53979e8ccbc4a1136b25dcefd37be9ed7e4a1f6b05a6029c84ff124"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"bytes",
|
||||
@@ -1695,8 +1744,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rocksdb"
|
||||
version = "0.15.0"
|
||||
source = "git+https://github.com/rust-rocksdb/rust-rocksdb.git#0b700fe70da8ee30483fde79f44df549f8fe11ec"
|
||||
version = "0.16.0"
|
||||
source = "git+https://github.com/rust-rocksdb/rust-rocksdb.git#7dd6258b07861b9332f827b416e50e5aee69aea1"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"librocksdb-sys",
|
||||
@@ -1939,9 +1988,9 @@ checksum = "cbce6d4507c7e4a3962091436e56e95290cb71fa302d0d270e32130b75fbff27"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.2"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
|
||||
checksum = "f173ac3d1a7e3b28003f40de0b5ce7fe2710f9b9dc3fc38664cebee46b3b6527"
|
||||
|
||||
[[package]]
|
||||
name = "slog"
|
||||
@@ -2036,9 +2085,9 @@ checksum = "1e81da0851ada1f3e9d4312c704aa4f8806f0f9d69faaf8df2f3464b4a9437c2"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.68"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3ce15dd3ed8aa2f8eeac4716d6ef5ab58b6b9256db41d7e1a0224c2788e8fd87"
|
||||
checksum = "48fe99c6bd8b1cc636890bcc071842de909d902c81ac7dab53ba33c421ab8ffb"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -2051,6 +2100,17 @@ version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
|
||||
|
||||
[[package]]
|
||||
name = "tar"
|
||||
version = "0.4.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0bcfbd6a598361fda270d82469fff3d65089dc33e175c9a131f7b4cd395f228"
|
||||
dependencies = [
|
||||
"filetime",
|
||||
"libc",
|
||||
"xattr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.2.0"
|
||||
@@ -2060,7 +2120,7 @@ dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"libc",
|
||||
"rand 0.8.3",
|
||||
"redox_syscall 0.2.5",
|
||||
"redox_syscall 0.2.6",
|
||||
"remove_dir_all",
|
||||
"winapi",
|
||||
]
|
||||
@@ -2076,6 +2136,15 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termion"
|
||||
version = "1.5.6"
|
||||
@@ -2084,7 +2153,7 @@ checksum = "077185e2eac69c3f8379a4298e1e07cd36beb962290d4a51199acf0fdc10607e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"numtoa",
|
||||
"redox_syscall 0.2.5",
|
||||
"redox_syscall 0.2.6",
|
||||
"redox_termios",
|
||||
]
|
||||
|
||||
@@ -2154,9 +2223,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.4.0"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "134af885d758d645f0f0505c9a8b3f9bf8a348fd822e112ab5248138348f1722"
|
||||
checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"bytes",
|
||||
@@ -2228,9 +2297,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tokio-util"
|
||||
version = "0.6.5"
|
||||
version = "0.6.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5143d049e85af7fbc36f5454d990e62c2df705b3589f123b71f441b6b59f443f"
|
||||
checksum = "940a12c99365c31ea8dd9ba04ec1be183ffe4920102bb7122c2f515437601e8e"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-core",
|
||||
@@ -2302,9 +2371,9 @@ checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.3.4"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
|
||||
checksum = "eeb8be209bb1c96b7c177c7420d26e04eccacb0eeae6b980e35fcb74678107e0"
|
||||
dependencies = [
|
||||
"matches",
|
||||
]
|
||||
@@ -2359,9 +2428,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.11"
|
||||
version = "0.2.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b00bca6106a5e23f3eee943593759b7fcddb00554332e856d990c893966879fb"
|
||||
checksum = "cbdbff6266a24120518560b5dc983096efb98462e51d0d68169895b237be3e5d"
|
||||
|
||||
[[package]]
|
||||
name = "vec-arena"
|
||||
@@ -2414,6 +2483,7 @@ dependencies = [
|
||||
"futures",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"pageserver",
|
||||
"postgres",
|
||||
"postgres-protocol",
|
||||
"rand 0.8.3",
|
||||
@@ -2540,6 +2610,15 @@ dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "which"
|
||||
version = "3.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wildmatch"
|
||||
version = "1.1.0"
|
||||
@@ -2586,6 +2665,15 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xattr"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xml-rs"
|
||||
version = "0.8.3"
|
||||
@@ -2596,6 +2684,14 @@ checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a"
|
||||
name = "zenith"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"control_plane",
|
||||
"pageserver",
|
||||
"postgres_ffi",
|
||||
"walkeeper",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zenith_utils"
|
||||
version = "0.1.0"
|
||||
|
||||
@@ -5,4 +5,6 @@ members = [
|
||||
"walkeeper",
|
||||
"zenith",
|
||||
"control_plane",
|
||||
"postgres_ffi",
|
||||
"zenith_utils",
|
||||
]
|
||||
|
||||
@@ -19,7 +19,7 @@ cargo build
|
||||
>./target/debug/zenith init
|
||||
|
||||
# start pageserver
|
||||
> ./target/debug/zenith start
|
||||
> ./target/debug/zenith pageserver start
|
||||
Starting pageserver at '127.0.0.1:64000'
|
||||
|
||||
# create and configure postgres data dir
|
||||
|
||||
188
cli-v2-story.md
Normal file
188
cli-v2-story.md
Normal file
@@ -0,0 +1,188 @@
|
||||
Create a new Zenith repository in the current directory:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
|
||||
The files belonging to this database system will be owned by user "heikki".
|
||||
This user must also own the server process.
|
||||
|
||||
The database cluster will be initialized with locale "en_GB.UTF-8".
|
||||
The default database encoding has accordingly been set to "UTF8".
|
||||
The default text search configuration will be set to "english".
|
||||
|
||||
Data page checksums are disabled.
|
||||
|
||||
creating directory tmp ... ok
|
||||
creating subdirectories ... ok
|
||||
selecting dynamic shared memory implementation ... posix
|
||||
selecting default max_connections ... 100
|
||||
selecting default shared_buffers ... 128MB
|
||||
selecting default time zone ... Europe/Helsinki
|
||||
creating configuration files ... ok
|
||||
running bootstrap script ... ok
|
||||
performing post-bootstrap initialization ... ok
|
||||
syncing data to disk ... ok
|
||||
|
||||
initdb: warning: enabling "trust" authentication for local connections
|
||||
You can change this by editing pg_hba.conf or using the option -A, or
|
||||
--auth-local and --auth-host, the next time you run initdb.
|
||||
new zenith repository was created in .zenith
|
||||
|
||||
Initially, there is only one branch:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
|
||||
main
|
||||
|
||||
Start a local Postgres instance on the branch:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
|
||||
Creating data directory from snapshot at 0/15FFB08...
|
||||
waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
||||
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv6 address "::1", port 5432
|
||||
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv4 address "127.0.0.1", port 5432
|
||||
2021-04-13 09:27:43.927 EEST [984664] LOG: listening on Unix socket "/tmp/.s.PGSQL.5432"
|
||||
2021-04-13 09:27:43.939 EEST [984665] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
||||
2021-04-13 09:27:43.939 EEST [984665] LOG: creating missing WAL directory "pg_wal/archive_status"
|
||||
2021-04-13 09:27:44.189 EEST [984665] LOG: database system was not properly shut down; automatic recovery in progress
|
||||
2021-04-13 09:27:44.195 EEST [984665] LOG: invalid record length at 0/15FFB80: wanted 24, got 0
|
||||
2021-04-13 09:27:44.195 EEST [984665] LOG: redo is not required
|
||||
2021-04-13 09:27:44.225 EEST [984664] LOG: database system is ready to accept connections
|
||||
done
|
||||
server started
|
||||
|
||||
Run some commands against it:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);"
|
||||
CREATE TABLE
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');"
|
||||
INSERT 0 1
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo"
|
||||
t
|
||||
-----------------------------
|
||||
inserted on the main branch
|
||||
(1 row)
|
||||
|
||||
Create a new branch called 'experimental'. We create it from the
|
||||
current end of the 'main' branch, but you could specify a different
|
||||
LSN as the start point instead.
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
|
||||
branching at end of WAL: 0/161F478
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
|
||||
experimental
|
||||
main
|
||||
|
||||
Start another Postgres instance off the 'experimental' branch:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
|
||||
Creating data directory from snapshot at 0/15FFB08...
|
||||
waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
||||
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv6 address "::1", port 5433
|
||||
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv4 address "127.0.0.1", port 5433
|
||||
2021-04-13 09:28:41.883 EEST [984766] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
|
||||
2021-04-13 09:28:41.896 EEST [984767] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
||||
2021-04-13 09:28:42.265 EEST [984767] LOG: database system was not properly shut down; automatic recovery in progress
|
||||
2021-04-13 09:28:42.269 EEST [984767] LOG: redo starts at 0/15FFB80
|
||||
2021-04-13 09:28:42.272 EEST [984767] LOG: invalid record length at 0/161F4B0: wanted 24, got 0
|
||||
2021-04-13 09:28:42.272 EEST [984767] LOG: redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
|
||||
2021-04-13 09:28:42.321 EEST [984766] LOG: database system is ready to accept connections
|
||||
done
|
||||
server started
|
||||
|
||||
Insert some a row on the 'experimental' branch:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
||||
t
|
||||
-----------------------------
|
||||
inserted on the main branch
|
||||
(1 row)
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')"
|
||||
INSERT 0 1
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
||||
t
|
||||
-----------------------------
|
||||
inserted on the main branch
|
||||
inserted on experimental
|
||||
(2 rows)
|
||||
|
||||
See that the other Postgres instance is still running on 'main' branch on port 5432:
|
||||
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo"
|
||||
t
|
||||
-----------------------------
|
||||
inserted on the main branch
|
||||
(1 row)
|
||||
|
||||
|
||||
|
||||
|
||||
Everything is stored in the .zenith directory:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
|
||||
total 12
|
||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
|
||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
|
||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
|
||||
|
||||
The 'datadirs' directory contains the datadirs of the running instances:
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
|
||||
total 8
|
||||
drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
|
||||
drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
|
||||
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
|
||||
total 124
|
||||
drwxr-xr-x 5 heikki heikki 4096 Apr 13 09:27 base
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 global
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_commit_ts
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_dynshmem
|
||||
-rw------- 1 heikki heikki 4760 Apr 13 09:27 pg_hba.conf
|
||||
-rw------- 1 heikki heikki 1636 Apr 13 09:27 pg_ident.conf
|
||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:32 pg_logical
|
||||
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 pg_multixact
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_notify
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_replslot
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_serial
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_snapshots
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_stat
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:34 pg_stat_tmp
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_subtrans
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_tblspc
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_twophase
|
||||
-rw------- 1 heikki heikki 3 Apr 13 09:27 PG_VERSION
|
||||
lrwxrwxrwx 1 heikki heikki 52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
|
||||
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_xact
|
||||
-rw------- 1 heikki heikki 88 Apr 13 09:27 postgresql.auto.conf
|
||||
-rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
|
||||
-rw------- 1 heikki heikki 96 Apr 13 09:27 postmaster.opts
|
||||
-rw------- 1 heikki heikki 149 Apr 13 09:27 postmaster.pid
|
||||
|
||||
Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
|
||||
datadir is ephemeral, you can delete it at any time, and it can be reconstructed
|
||||
from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
|
||||
the repository, the 'datadirs' are not included. (They are like git working trees)
|
||||
|
||||
~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
|
||||
~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
|
||||
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
|
||||
Creating data directory from snapshot at 0/15FFB08...
|
||||
waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
||||
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv6 address "::1", port 5433
|
||||
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv4 address "127.0.0.1", port 5433
|
||||
2021-04-13 09:37:05.487 EEST [985340] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
|
||||
2021-04-13 09:37:05.498 EEST [985341] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
||||
2021-04-13 09:37:05.808 EEST [985341] LOG: database system was not properly shut down; automatic recovery in progress
|
||||
2021-04-13 09:37:05.813 EEST [985341] LOG: redo starts at 0/15FFB80
|
||||
2021-04-13 09:37:05.815 EEST [985341] LOG: invalid record length at 0/161F770: wanted 24, got 0
|
||||
2021-04-13 09:37:05.815 EEST [985341] LOG: redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
|
||||
2021-04-13 09:37:05.866 EEST [985340] LOG: database system is ready to accept connections
|
||||
done
|
||||
server started
|
||||
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
||||
t
|
||||
-----------------------------
|
||||
inserted on the main branch
|
||||
inserted on experimental
|
||||
(2 rows)
|
||||
|
||||
@@ -8,12 +8,20 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
rand = "0.8.3"
|
||||
tar = "0.4.33"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||
|
||||
serde = ""
|
||||
serde_derive = ""
|
||||
toml = ""
|
||||
home = "0.5.3"
|
||||
lazy_static = ""
|
||||
regex = "1"
|
||||
anyhow = "1.0"
|
||||
hex = "0.4.3"
|
||||
bytes = "1.0.1"
|
||||
fs_extra = "1.2.0"
|
||||
|
||||
pageserver = { path = "../pageserver" }
|
||||
walkeeper = { path = "../walkeeper" }
|
||||
postgres_ffi = { path = "../postgres_ffi" }
|
||||
|
||||
@@ -1,22 +1,24 @@
|
||||
use std::error;
|
||||
use std::fs::File;
|
||||
use std::fs::{self, OpenOptions};
|
||||
use std::io::{Read, Write};
|
||||
use std::net::SocketAddr;
|
||||
use std::net::TcpStream;
|
||||
use std::process::{Command, Stdio};
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::{collections::BTreeMap, path::PathBuf};
|
||||
use std::{io::Write, net::SocketAddr};
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use postgres::{Client, NoTls};
|
||||
use regex::Regex;
|
||||
use tar;
|
||||
|
||||
use crate::local_env::{self, LocalEnv};
|
||||
use postgres::{Client, NoTls};
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::storage::{PageServerNode, WalProposerNode};
|
||||
|
||||
type Result<T> = std::result::Result<T, Box<dyn error::Error>>;
|
||||
use pageserver::ZTimelineId;
|
||||
|
||||
//
|
||||
// ComputeControlPlane
|
||||
@@ -35,14 +37,9 @@ impl ComputeControlPlane {
|
||||
// it is running on default port. Change that when pageserver will have config.
|
||||
let pageserver = Arc::new(PageServerNode::from_env(&env));
|
||||
|
||||
let nodes: Result<BTreeMap<_, _>> = fs::read_dir(env.compute_dir())
|
||||
.map_err(|e| {
|
||||
format!(
|
||||
"failed to list {}: {}",
|
||||
env.compute_dir().to_str().unwrap(),
|
||||
e
|
||||
)
|
||||
})?
|
||||
let pgdatadirspath = env.repo_path.join("pgdatadirs");
|
||||
let nodes: Result<BTreeMap<_, _>> = fs::read_dir(&pgdatadirspath)
|
||||
.with_context(|| format!("failed to list {}", pgdatadirspath.display()))?
|
||||
.into_iter()
|
||||
.map(|f| {
|
||||
PostgresNode::from_dir_entry(f?, &env, &pageserver)
|
||||
@@ -68,43 +65,50 @@ impl ComputeControlPlane {
|
||||
.unwrap_or(self.base_port)
|
||||
}
|
||||
|
||||
pub fn local(pageserver: &Arc<PageServerNode>) -> ComputeControlPlane {
|
||||
let env = local_env::test_env();
|
||||
pub fn local(local_env: &LocalEnv, pageserver: &Arc<PageServerNode>) -> ComputeControlPlane {
|
||||
ComputeControlPlane {
|
||||
base_port: 65431,
|
||||
pageserver: Arc::clone(pageserver),
|
||||
nodes: BTreeMap::new(),
|
||||
env,
|
||||
env: local_env.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_vanilla_node(&mut self, is_test: bool) -> Result<Arc<PostgresNode>> {
|
||||
// allocate new node entry with generated port
|
||||
/// Connect to a page server, get base backup, and untar it to initialize a
|
||||
/// new data directory
|
||||
pub fn new_from_page_server(
|
||||
&mut self,
|
||||
is_test: bool,
|
||||
timelineid: ZTimelineId,
|
||||
) -> Result<Arc<PostgresNode>> {
|
||||
let node_id = self.nodes.len() as u32 + 1;
|
||||
|
||||
let node = Arc::new(PostgresNode {
|
||||
name: format!("pg{}", node_id),
|
||||
address: SocketAddr::new("127.0.0.1".parse().unwrap(), self.get_port()),
|
||||
env: self.env.clone(),
|
||||
pageserver: Arc::clone(&self.pageserver),
|
||||
is_test,
|
||||
timelineid,
|
||||
});
|
||||
node.init_vanilla()?;
|
||||
|
||||
node.init_from_page_server()?;
|
||||
self.nodes.insert(node.name.clone(), Arc::clone(&node));
|
||||
|
||||
Ok(node)
|
||||
}
|
||||
|
||||
pub fn new_test_node(&mut self) -> Arc<PostgresNode> {
|
||||
let addr = self.pageserver.address().clone();
|
||||
let node = self.new_vanilla_node(true).unwrap();
|
||||
pub fn new_test_node(&mut self, timelineid: ZTimelineId) -> Arc<PostgresNode> {
|
||||
let node = self.new_from_page_server(true, timelineid);
|
||||
assert!(node.is_ok());
|
||||
let node = node.unwrap();
|
||||
|
||||
// Configure that node to take pages from pageserver
|
||||
// Configure the node to stream WAL directly to the pageserver
|
||||
node.append_conf(
|
||||
"postgresql.conf",
|
||||
format!(
|
||||
"page_server_connstring = 'host={} port={}'\n",
|
||||
addr.ip(),
|
||||
addr.port()
|
||||
"callmemaybe_connstring = '{}'\n", // FIXME escaping
|
||||
node.connstr()
|
||||
)
|
||||
.as_str(),
|
||||
);
|
||||
@@ -112,9 +116,9 @@ impl ComputeControlPlane {
|
||||
node
|
||||
}
|
||||
|
||||
pub fn new_test_master_node(&mut self) -> Arc<PostgresNode> {
|
||||
let node = self.new_vanilla_node(true).unwrap();
|
||||
println!("Create vanilla node at {:?}", node.address);
|
||||
pub fn new_test_master_node(&mut self, timelineid: ZTimelineId) -> Arc<PostgresNode> {
|
||||
let node = self.new_from_page_server(true, timelineid).unwrap();
|
||||
|
||||
node.append_conf(
|
||||
"postgresql.conf",
|
||||
"synchronous_standby_names = 'safekeeper_proxy'\n",
|
||||
@@ -123,17 +127,15 @@ impl ComputeControlPlane {
|
||||
node
|
||||
}
|
||||
|
||||
pub fn new_node(&mut self) -> Result<Arc<PostgresNode>> {
|
||||
let addr = self.pageserver.address().clone();
|
||||
let node = self.new_vanilla_node(false)?;
|
||||
pub fn new_node(&mut self, timelineid: ZTimelineId) -> Result<Arc<PostgresNode>> {
|
||||
let node = self.new_from_page_server(false, timelineid).unwrap();
|
||||
|
||||
// Configure that node to take pages from pageserver
|
||||
// Configure the node to stream WAL directly to the pageserver
|
||||
node.append_conf(
|
||||
"postgresql.conf",
|
||||
format!(
|
||||
"page_server_connstring = 'host={} port={}'\n",
|
||||
addr.ip(),
|
||||
addr.port()
|
||||
"callmemaybe_connstring = '{}'\n", // FIXME escaping
|
||||
node.connstr()
|
||||
)
|
||||
.as_str(),
|
||||
);
|
||||
@@ -150,6 +152,7 @@ pub struct PostgresNode {
|
||||
pub env: LocalEnv,
|
||||
pageserver: Arc<PageServerNode>,
|
||||
is_test: bool,
|
||||
timelineid: ZTimelineId,
|
||||
}
|
||||
|
||||
impl PostgresNode {
|
||||
@@ -159,11 +162,10 @@ impl PostgresNode {
|
||||
pageserver: &Arc<PageServerNode>,
|
||||
) -> Result<PostgresNode> {
|
||||
if !entry.file_type()?.is_dir() {
|
||||
let err_msg = format!(
|
||||
anyhow::bail!(
|
||||
"PostgresNode::from_dir_entry failed: '{}' is not a directory",
|
||||
entry.path().to_str().unwrap()
|
||||
entry.path().display()
|
||||
);
|
||||
return Err(err_msg.into());
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
@@ -176,11 +178,10 @@ impl PostgresNode {
|
||||
|
||||
// find out tcp port in config file
|
||||
let cfg_path = entry.path().join("postgresql.conf");
|
||||
let config = fs::read_to_string(cfg_path.clone()).map_err(|e| {
|
||||
let config = fs::read_to_string(cfg_path.clone()).with_context(|| {
|
||||
format!(
|
||||
"failed to read config file in {}: {}",
|
||||
cfg_path.to_str().unwrap(),
|
||||
e
|
||||
"failed to read config file in {}",
|
||||
cfg_path.to_str().unwrap()
|
||||
)
|
||||
})?;
|
||||
|
||||
@@ -190,14 +191,20 @@ impl PostgresNode {
|
||||
);
|
||||
let port: u16 = CONF_PORT_RE
|
||||
.captures(config.as_str())
|
||||
.ok_or(err_msg.clone() + " 1")?
|
||||
.ok_or(anyhow::Error::msg(err_msg.clone() + " 1"))?
|
||||
.iter()
|
||||
.last()
|
||||
.ok_or(err_msg.clone() + " 3")?
|
||||
.ok_or(err_msg.clone() + " 3")?
|
||||
.ok_or(anyhow::Error::msg(err_msg.clone() + " 2"))?
|
||||
.ok_or(anyhow::Error::msg(err_msg.clone() + " 3"))?
|
||||
.as_str()
|
||||
.parse()
|
||||
.map_err(|e| format!("{}: {}", err_msg, e))?;
|
||||
.with_context(|| err_msg)?;
|
||||
|
||||
// FIXME: What timeline is this server on? Would have to parse the postgresql.conf
|
||||
// file for that, too. It's currently not needed for anything, but it would be
|
||||
// nice to list the timeline in "zenith pg list"
|
||||
let timelineid_buf = [0u8; 16];
|
||||
let timelineid = ZTimelineId::from(timelineid_buf);
|
||||
|
||||
// ok now
|
||||
Ok(PostgresNode {
|
||||
@@ -206,65 +213,107 @@ impl PostgresNode {
|
||||
env: env.clone(),
|
||||
pageserver: Arc::clone(pageserver),
|
||||
is_test: false,
|
||||
timelineid,
|
||||
})
|
||||
}
|
||||
|
||||
fn init_vanilla(&self) -> Result<()> {
|
||||
// Connect to a page server, get base backup, and untar it to initialize a
|
||||
// new data directory
|
||||
pub fn init_from_page_server(&self) -> Result<()> {
|
||||
let pgdata = self.pgdata();
|
||||
|
||||
println!(
|
||||
"Creating new postgres: path={} port={}",
|
||||
self.pgdata().to_str().unwrap(),
|
||||
"Extracting base backup to create postgres instance: path={} port={}",
|
||||
pgdata.display(),
|
||||
self.address.port()
|
||||
);
|
||||
|
||||
// initialize data directory
|
||||
|
||||
if self.is_test {
|
||||
fs::remove_dir_all(self.pgdata().to_str().unwrap()).ok();
|
||||
fs::remove_dir_all(&pgdata).ok();
|
||||
}
|
||||
|
||||
fs::create_dir_all(self.pgdata().to_str().unwrap())?;
|
||||
let sql = format!("basebackup {}", self.timelineid);
|
||||
let mut client = self
|
||||
.pageserver
|
||||
.page_server_psql_client()
|
||||
.with_context(|| "connecting to page erver failed")?;
|
||||
|
||||
let initdb_path = self.env.pg_bin_dir().join("initdb");
|
||||
let initdb = Command::new(initdb_path)
|
||||
.args(&["-D", self.pgdata().to_str().unwrap()])
|
||||
.arg("-N")
|
||||
.arg("-A trust")
|
||||
.arg("--no-instructions")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
|
||||
.stdout(Stdio::null())
|
||||
.status()?;
|
||||
fs::create_dir_all(&pgdata)
|
||||
.with_context(|| format!("could not create data directory {}", pgdata.display()))?;
|
||||
fs::set_permissions(pgdata.as_path(), fs::Permissions::from_mode(0o700)).with_context(
|
||||
|| {
|
||||
format!(
|
||||
"could not set permissions in data directory {}",
|
||||
pgdata.display()
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
if !initdb.success() {
|
||||
return Err("initdb failed".into());
|
||||
}
|
||||
// FIXME: The compute node should be able to stream the WAL it needs from the WAL safekeepers or archive.
|
||||
// But that's not implemented yet. For now, 'pg_wal' is included in the base backup tarball that
|
||||
// we receive from the Page Server, so we don't need to create the empty 'pg_wal' directory here.
|
||||
//fs::create_dir_all(pgdata.join("pg_wal"))?;
|
||||
|
||||
let mut copyreader = client
|
||||
.copy_out(sql.as_str())
|
||||
.with_context(|| "page server 'basebackup' command failed")?;
|
||||
|
||||
// FIXME: Currently, we slurp the whole tarball into memory, and then extract it,
|
||||
// but we really should do this:
|
||||
//let mut ar = tar::Archive::new(copyreader);
|
||||
let mut buf = vec![];
|
||||
copyreader
|
||||
.read_to_end(&mut buf)
|
||||
.with_context(|| "reading base backup from page server failed")?;
|
||||
let mut ar = tar::Archive::new(buf.as_slice());
|
||||
ar.unpack(&pgdata)
|
||||
.with_context(|| "extracting page backup failed")?;
|
||||
|
||||
// listen for selected port
|
||||
self.append_conf(
|
||||
"postgresql.conf",
|
||||
format!(
|
||||
&format!(
|
||||
"max_wal_senders = 10\n\
|
||||
max_replication_slots = 10\n\
|
||||
hot_standby = on\n\
|
||||
shared_buffers = 1MB\n\
|
||||
fsync = off\n\
|
||||
max_connections = 100\n\
|
||||
wal_sender_timeout = 0\n\
|
||||
wal_level = replica\n\
|
||||
listen_addresses = '{address}'\n\
|
||||
port = {port}\n",
|
||||
max_replication_slots = 10\n\
|
||||
hot_standby = on\n\
|
||||
shared_buffers = 1MB\n\
|
||||
fsync = off\n\
|
||||
max_connections = 100\n\
|
||||
wal_sender_timeout = 0\n\
|
||||
wal_level = replica\n\
|
||||
listen_addresses = '{address}'\n\
|
||||
port = {port}\n",
|
||||
address = self.address.ip(),
|
||||
port = self.address.port()
|
||||
)
|
||||
.as_str(),
|
||||
),
|
||||
);
|
||||
|
||||
// Never clean up old WAL. TODO: We should use a replication
|
||||
// slot or something proper, to prevent the compute node
|
||||
// from removing WAL that hasn't been streamed to the safekeepr or
|
||||
// page server yet. But this will do for now.
|
||||
self.append_conf("postgresql.conf", &format!("wal_keep_size='10TB'\n"));
|
||||
|
||||
// Connect it to the page server.
|
||||
|
||||
// Configure that node to take pages from pageserver
|
||||
self.append_conf(
|
||||
"postgresql.conf",
|
||||
&format!(
|
||||
"page_server_connstring = 'host={} port={}'\n\
|
||||
zenith_timeline='{}'\n",
|
||||
self.pageserver.address().ip(),
|
||||
self.pageserver.address().port(),
|
||||
self.timelineid
|
||||
),
|
||||
);
|
||||
|
||||
println!("Database initialized");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn pgdata(&self) -> PathBuf {
|
||||
self.env.compute_dir().join(self.name.clone())
|
||||
fn pgdata(&self) -> PathBuf {
|
||||
self.env.repo_path.join("pgdatadirs").join(&self.name)
|
||||
}
|
||||
|
||||
pub fn status(&self) -> &str {
|
||||
@@ -291,6 +340,7 @@ impl PostgresNode {
|
||||
|
||||
fn pg_ctl(&self, args: &[&str]) -> Result<()> {
|
||||
let pg_ctl_path = self.env.pg_bin_dir().join("pg_ctl");
|
||||
|
||||
let pg_ctl = Command::new(pg_ctl_path)
|
||||
.args(
|
||||
[
|
||||
@@ -306,19 +356,15 @@ impl PostgresNode {
|
||||
)
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
|
||||
.status()?;
|
||||
|
||||
.status()
|
||||
.with_context(|| "pg_ctl failed")?;
|
||||
if !pg_ctl.success() {
|
||||
Err("pg_ctl failed".into())
|
||||
} else {
|
||||
Ok(())
|
||||
anyhow::bail!("pg_ctl failed");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn start(&self) -> Result<()> {
|
||||
let _res = self
|
||||
.pageserver
|
||||
.page_server_psql(format!("callmemaybe {}", self.connstr()).as_str());
|
||||
println!("Starting postgres node at '{}'", self.connstr());
|
||||
self.pg_ctl(&["start"])
|
||||
}
|
||||
@@ -378,39 +424,21 @@ impl PostgresNode {
|
||||
Client::connect(connstring.as_str(), NoTls).unwrap()
|
||||
}
|
||||
|
||||
/* Create stub controlfile and respective xlog to start computenode */
|
||||
pub fn setup_controlfile(&self) {
|
||||
let filepath = format!("{}/global/pg_control", self.pgdata().to_str().unwrap());
|
||||
|
||||
{
|
||||
File::create(filepath).unwrap();
|
||||
}
|
||||
|
||||
let pg_resetwal_path = self.env.pg_bin_dir().join("pg_resetwal");
|
||||
|
||||
let pg_resetwal = Command::new(pg_resetwal_path)
|
||||
.args(&["-D", self.pgdata().to_str().unwrap()])
|
||||
.arg("-f")
|
||||
// TODO probably we will have to modify pg_resetwal
|
||||
// .arg("--compute-node")
|
||||
.status()
|
||||
.expect("failed to execute pg_resetwal");
|
||||
|
||||
if !pg_resetwal.success() {
|
||||
panic!("pg_resetwal failed");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_proxy(&self, wal_acceptors: String) -> WalProposerNode {
|
||||
pub fn start_proxy(&self, wal_acceptors: &str) -> WalProposerNode {
|
||||
let proxy_path = self.env.pg_bin_dir().join("safekeeper_proxy");
|
||||
match Command::new(proxy_path.as_path())
|
||||
.args(&["-s", &wal_acceptors])
|
||||
.args(&["--ztimelineid", &self.timelineid.to_string()])
|
||||
.args(&["-s", wal_acceptors])
|
||||
.args(&["-h", &self.address.ip().to_string()])
|
||||
.args(&["-p", &self.address.port().to_string()])
|
||||
.arg("-v")
|
||||
.stderr(OpenOptions::new()
|
||||
.append(true)
|
||||
.open(self.env.data_dir.join("safepkeeper_proxy.log")).unwrap())
|
||||
.stderr(
|
||||
OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(self.pgdata().join("safekeeper_proxy.log"))
|
||||
.unwrap(),
|
||||
)
|
||||
.spawn()
|
||||
{
|
||||
Ok(child) => WalProposerNode { pid: child.id() },
|
||||
|
||||
@@ -4,14 +4,19 @@
|
||||
// Now it also provides init method which acts like a stub for proper installation
|
||||
// script which will use local paths.
|
||||
//
|
||||
use anyhow::Context;
|
||||
use bytes::Bytes;
|
||||
use rand::Rng;
|
||||
use std::env;
|
||||
use std::error;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
use anyhow::Result;
|
||||
use serde_derive::{Deserialize, Serialize};
|
||||
|
||||
type Result<T> = std::result::Result<T, Box<dyn error::Error>>;
|
||||
use pageserver::ZTimelineId;
|
||||
use walkeeper::xlog_utils;
|
||||
|
||||
//
|
||||
// This data structure represents deserialized zenith config, which should be
|
||||
@@ -21,11 +26,14 @@ type Result<T> = std::result::Result<T, Box<dyn error::Error>>;
|
||||
//
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct LocalEnv {
|
||||
// Here page server and compute nodes will create and store their data.
|
||||
pub data_dir: PathBuf,
|
||||
// Path to the Repository. Here page server and compute nodes will create and store their data.
|
||||
pub repo_path: PathBuf,
|
||||
|
||||
// Path to postgres distribution. It expected that "bin", "include",
|
||||
// "lib", "share" from postgres distribution will be there. If at some point
|
||||
// System identifier, from the PostgreSQL control file
|
||||
pub systemid: u64,
|
||||
|
||||
// Path to postgres distribution. It's expected that "bin", "include",
|
||||
// "lib", "share" from postgres distribution are there. If at some point
|
||||
// in time we will be able to run against vanilla postgres we may split that
|
||||
// to four separate paths and match OS-specific installation layout.
|
||||
pub pg_distrib_dir: PathBuf,
|
||||
@@ -42,53 +50,37 @@ impl LocalEnv {
|
||||
pub fn pg_lib_dir(&self) -> PathBuf {
|
||||
self.pg_distrib_dir.join("lib")
|
||||
}
|
||||
}
|
||||
|
||||
// pageserver
|
||||
pub fn pageserver_data_dir(&self) -> PathBuf {
|
||||
self.data_dir.join("pageserver")
|
||||
}
|
||||
pub fn pageserver_log(&self) -> PathBuf {
|
||||
self.pageserver_data_dir().join("pageserver.log")
|
||||
}
|
||||
pub fn pageserver_pidfile(&self) -> PathBuf {
|
||||
self.pageserver_data_dir().join("pageserver.pid")
|
||||
}
|
||||
|
||||
// compute nodes
|
||||
pub fn compute_dir(&self) -> PathBuf {
|
||||
self.data_dir.join("compute")
|
||||
fn zenith_repo_dir() -> PathBuf {
|
||||
// Find repository path
|
||||
match std::env::var_os("ZENITH_REPO_DIR") {
|
||||
Some(val) => PathBuf::from(val.to_str().unwrap()),
|
||||
None => ".zenith".into(),
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Issues in rust-lang repo has several discussions about proper library to check
|
||||
// home directory in a cross-platform way. Seems that current consensus is around
|
||||
// home crate and cargo uses it.
|
||||
// Initialize a new Zenith repository
|
||||
//
|
||||
fn get_home() -> Result<PathBuf> {
|
||||
home::home_dir().ok_or("can not determine home directory path".into())
|
||||
}
|
||||
|
||||
pub fn init() -> Result<()> {
|
||||
let home_dir = get_home()?;
|
||||
|
||||
// check if config already exists
|
||||
let cfg_path = home_dir.join(".zenith");
|
||||
if cfg_path.exists() {
|
||||
let err_msg = format!(
|
||||
let repo_path = zenith_repo_dir();
|
||||
if repo_path.exists() {
|
||||
anyhow::bail!(
|
||||
"{} already exists. Perhaps already initialized?",
|
||||
cfg_path.to_str().unwrap()
|
||||
repo_path.to_str().unwrap()
|
||||
);
|
||||
return Err(err_msg.into());
|
||||
}
|
||||
|
||||
// Now we can run init only from crate directory, so check that current dir is our crate.
|
||||
// Use 'pageserver/Cargo.toml' existence as evidendce.
|
||||
let cargo_path = env::current_dir()?;
|
||||
if !cargo_path.join("pageserver/Cargo.toml").exists() {
|
||||
let err_msg = "Current dirrectory does not look like a zenith repo. \
|
||||
Please, run 'init' from zenith repo root.";
|
||||
return Err(err_msg.into());
|
||||
anyhow::bail!(
|
||||
"Current dirrectory does not look like a zenith repo. \
|
||||
Please, run 'init' from zenith repo root."
|
||||
);
|
||||
}
|
||||
|
||||
// ok, now check that expected binaries are present
|
||||
@@ -97,81 +89,172 @@ pub fn init() -> Result<()> {
|
||||
let pg_distrib_dir = cargo_path.join("tmp_install");
|
||||
let pg_path = pg_distrib_dir.join("bin/postgres");
|
||||
if !pg_path.exists() {
|
||||
let err_msg = format!(
|
||||
anyhow::bail!(
|
||||
"Can't find postres binary at {}. \
|
||||
Perhaps './pgbuild.sh' is needed to build it first.",
|
||||
Perhaps './pgbuild.sh' is needed to build it first.",
|
||||
pg_path.to_str().unwrap()
|
||||
);
|
||||
return Err(err_msg.into());
|
||||
}
|
||||
|
||||
// check pageserver
|
||||
let zenith_distrib_dir = cargo_path.join("target/debug/");
|
||||
let pageserver_path = zenith_distrib_dir.join("pageserver");
|
||||
if !pageserver_path.exists() {
|
||||
let err_msg = format!(
|
||||
anyhow::bail!(
|
||||
"Can't find pageserver binary at {}. Please build it.",
|
||||
pageserver_path.to_str().unwrap()
|
||||
);
|
||||
return Err(err_msg.into());
|
||||
}
|
||||
|
||||
// ok, we are good to go
|
||||
|
||||
// create dirs
|
||||
let data_dir = cargo_path.join("tmp_check_cli");
|
||||
|
||||
for &dir in &["compute", "pageserver"] {
|
||||
fs::create_dir_all(data_dir.join(dir)).map_err(|e| {
|
||||
format!(
|
||||
"Failed to create directory in '{}': {}",
|
||||
data_dir.to_str().unwrap(),
|
||||
e
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
// write config
|
||||
let conf = LocalEnv {
|
||||
data_dir,
|
||||
let mut conf = LocalEnv {
|
||||
repo_path: repo_path.clone(),
|
||||
pg_distrib_dir,
|
||||
zenith_distrib_dir,
|
||||
systemid: 0,
|
||||
};
|
||||
let toml = toml::to_string(&conf)?;
|
||||
fs::write(cfg_path, toml)?;
|
||||
init_repo(&mut conf)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn init_repo(local_env: &mut LocalEnv) -> Result<()> {
|
||||
let repopath = &local_env.repo_path;
|
||||
fs::create_dir(&repopath)
|
||||
.with_context(|| format!("could not create directory {}", repopath.display()))?;
|
||||
fs::create_dir(repopath.join("pgdatadirs"))?;
|
||||
fs::create_dir(repopath.join("timelines"))?;
|
||||
fs::create_dir(repopath.join("refs"))?;
|
||||
fs::create_dir(repopath.join("refs").join("branches"))?;
|
||||
fs::create_dir(repopath.join("refs").join("tags"))?;
|
||||
println!("created directory structure in {}", repopath.display());
|
||||
|
||||
// Create initial timeline
|
||||
let tli = create_timeline(&local_env, None)?;
|
||||
let timelinedir = repopath.join("timelines").join(tli.to_string());
|
||||
println!("created initial timeline {}", timelinedir.display());
|
||||
|
||||
// Run initdb
|
||||
//
|
||||
// FIXME: we create it temporarily in "tmp" directory, and move it into
|
||||
// the repository. Use "tempdir()" or something? Or just create it directly
|
||||
// in the repo?
|
||||
let initdb_path = local_env.pg_bin_dir().join("initdb");
|
||||
let _initdb = Command::new(initdb_path)
|
||||
.args(&["-D", "tmp"])
|
||||
.arg("--no-instructions")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", local_env.pg_lib_dir().to_str().unwrap())
|
||||
.stdout(Stdio::null())
|
||||
.status()
|
||||
.with_context(|| "failed to execute initdb")?;
|
||||
println!("initdb succeeded");
|
||||
|
||||
// Read control file to extract the LSN and system id
|
||||
let controlfile =
|
||||
postgres_ffi::decode_pg_control(Bytes::from(fs::read("tmp/global/pg_control")?))?;
|
||||
let systemid = controlfile.system_identifier;
|
||||
let lsn = controlfile.checkPoint;
|
||||
let lsnstr = format!("{:016X}", lsn);
|
||||
|
||||
// Move the initial WAL file
|
||||
fs::rename(
|
||||
"tmp/pg_wal/000000010000000000000001",
|
||||
timelinedir
|
||||
.join("wal")
|
||||
.join("000000010000000000000001.partial"),
|
||||
)?;
|
||||
println!("moved initial WAL file");
|
||||
|
||||
// Remove pg_wal
|
||||
fs::remove_dir_all("tmp/pg_wal")?;
|
||||
println!("removed tmp/pg_wal");
|
||||
|
||||
force_crash_recovery(&PathBuf::from("tmp"))?;
|
||||
println!("updated pg_control");
|
||||
|
||||
let target = timelinedir.join("snapshots").join(&lsnstr);
|
||||
fs::rename("tmp", &target)?;
|
||||
println!("moved 'tmp' to {}", target.display());
|
||||
|
||||
// Create 'main' branch to refer to the initial timeline
|
||||
let data = tli.to_string();
|
||||
fs::write(repopath.join("refs").join("branches").join("main"), data)?;
|
||||
println!("created main branch");
|
||||
|
||||
// Also update the system id in the LocalEnv
|
||||
local_env.systemid = systemid;
|
||||
|
||||
// write config
|
||||
let toml = toml::to_string(&local_env)?;
|
||||
fs::write(repopath.join("config"), toml)?;
|
||||
|
||||
println!(
|
||||
"new zenith repository was created in {}",
|
||||
repopath.display()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// If control file says the cluster was shut down cleanly, modify it, to mark
|
||||
// it as crashed. That forces crash recovery when you start the cluster.
|
||||
//
|
||||
// FIXME:
|
||||
// We currently do this to the initial snapshot in "zenith init". It would
|
||||
// be more natural to do this when the snapshot is restored instead, but we
|
||||
// currently don't have any code to create new snapshots, so it doesn't matter
|
||||
// Or better yet, use a less hacky way of putting the cluster into recovery.
|
||||
// Perhaps create a backup label file in the data directory when it's restored.
|
||||
fn force_crash_recovery(datadir: &Path) -> Result<()> {
|
||||
// Read in the control file
|
||||
let controlfilepath = datadir.to_path_buf().join("global").join("pg_control");
|
||||
let mut controlfile =
|
||||
postgres_ffi::decode_pg_control(Bytes::from(fs::read(controlfilepath.as_path())?))?;
|
||||
|
||||
controlfile.state = postgres_ffi::DBState_DB_IN_PRODUCTION;
|
||||
|
||||
fs::write(
|
||||
controlfilepath.as_path(),
|
||||
postgres_ffi::encode_pg_control(controlfile),
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// check that config file is present
|
||||
pub fn load_config() -> Result<LocalEnv> {
|
||||
// home
|
||||
let home_dir = get_home()?;
|
||||
|
||||
// check file exists
|
||||
let cfg_path = home_dir.join(".zenith");
|
||||
if !cfg_path.exists() {
|
||||
let err_msg = format!(
|
||||
pub fn load_config(repopath: &Path) -> Result<LocalEnv> {
|
||||
if !repopath.exists() {
|
||||
anyhow::bail!(
|
||||
"Zenith config is not found in {}. You need to run 'zenith init' first",
|
||||
cfg_path.to_str().unwrap()
|
||||
repopath.to_str().unwrap()
|
||||
);
|
||||
return Err(err_msg.into());
|
||||
}
|
||||
|
||||
// load and parse file
|
||||
let config = fs::read_to_string(cfg_path)?;
|
||||
let config = fs::read_to_string(repopath.join("config"))?;
|
||||
toml::from_str(config.as_str()).map_err(|e| e.into())
|
||||
}
|
||||
|
||||
// local env for tests
|
||||
pub fn test_env() -> LocalEnv {
|
||||
let data_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("../tmp_check");
|
||||
fs::create_dir_all(data_dir.clone()).unwrap();
|
||||
LocalEnv {
|
||||
data_dir,
|
||||
pub fn test_env(testname: &str) -> LocalEnv {
|
||||
fs::create_dir_all("../tmp_check").expect("could not create directory ../tmp_check");
|
||||
|
||||
let repo_path = Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("../tmp_check/")
|
||||
.join(testname);
|
||||
|
||||
// Remove remnants of old test repo
|
||||
let _ = fs::remove_dir_all(&repo_path);
|
||||
|
||||
let mut local_env = LocalEnv {
|
||||
repo_path,
|
||||
pg_distrib_dir: Path::new(env!("CARGO_MANIFEST_DIR")).join("../tmp_install"),
|
||||
zenith_distrib_dir: cargo_bin_dir(),
|
||||
}
|
||||
systemid: 0,
|
||||
};
|
||||
init_repo(&mut local_env).expect("could not initialize zenith repository");
|
||||
return local_env;
|
||||
}
|
||||
|
||||
// Find the directory where the binaries were put (i.e. target/debug/)
|
||||
@@ -185,3 +268,122 @@ pub fn cargo_bin_dir() -> PathBuf {
|
||||
|
||||
return pathbuf;
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct PointInTime {
|
||||
pub timelineid: ZTimelineId,
|
||||
pub lsn: u64,
|
||||
}
|
||||
|
||||
fn create_timeline(local_env: &LocalEnv, ancestor: Option<PointInTime>) -> Result<ZTimelineId> {
|
||||
let repopath = &local_env.repo_path;
|
||||
|
||||
// Create initial timeline
|
||||
let mut tli_buf = [0u8; 16];
|
||||
rand::thread_rng().fill(&mut tli_buf);
|
||||
let timelineid = ZTimelineId::from(tli_buf);
|
||||
|
||||
let timelinedir = repopath.join("timelines").join(timelineid.to_string());
|
||||
|
||||
fs::create_dir(&timelinedir)?;
|
||||
fs::create_dir(&timelinedir.join("snapshots"))?;
|
||||
fs::create_dir(&timelinedir.join("wal"))?;
|
||||
|
||||
if let Some(ancestor) = ancestor {
|
||||
let data = format!(
|
||||
"{}@{:X}/{:X}",
|
||||
ancestor.timelineid,
|
||||
ancestor.lsn >> 32,
|
||||
ancestor.lsn & 0xffffffff
|
||||
);
|
||||
fs::write(timelinedir.join("ancestor"), data)?;
|
||||
}
|
||||
|
||||
Ok(timelineid)
|
||||
}
|
||||
|
||||
// Parse an LSN in the format used in filenames
|
||||
//
|
||||
// For example: 00000000015D3DD8
|
||||
//
|
||||
fn parse_lsn(s: &str) -> std::result::Result<u64, std::num::ParseIntError> {
|
||||
u64::from_str_radix(s, 16)
|
||||
}
|
||||
|
||||
// Create a new branch in the repository (for the "zenith branch" subcommand)
|
||||
pub fn create_branch(
|
||||
local_env: &LocalEnv,
|
||||
branchname: &str,
|
||||
startpoint: PointInTime,
|
||||
) -> Result<()> {
|
||||
let repopath = &local_env.repo_path;
|
||||
|
||||
// create a new timeline for it
|
||||
let newtli = create_timeline(local_env, Some(startpoint))?;
|
||||
let newtimelinedir = repopath.join("timelines").join(newtli.to_string());
|
||||
|
||||
let data = newtli.to_string();
|
||||
fs::write(
|
||||
repopath.join("refs").join("branches").join(branchname),
|
||||
data,
|
||||
)?;
|
||||
|
||||
// Copy the latest snapshot (TODO: before the startpoint) and all WAL
|
||||
// TODO: be smarter and avoid the copying...
|
||||
let (_maxsnapshot, oldsnapshotdir) = find_latest_snapshot(local_env, startpoint.timelineid)?;
|
||||
let copy_opts = fs_extra::dir::CopyOptions::new();
|
||||
fs_extra::dir::copy(oldsnapshotdir, newtimelinedir.join("snapshots"), ©_opts)?;
|
||||
|
||||
let oldtimelinedir = repopath
|
||||
.join("timelines")
|
||||
.join(startpoint.timelineid.to_string());
|
||||
let mut copy_opts = fs_extra::dir::CopyOptions::new();
|
||||
copy_opts.content_only = true;
|
||||
fs_extra::dir::copy(
|
||||
oldtimelinedir.join("wal"),
|
||||
newtimelinedir.join("wal"),
|
||||
©_opts,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Find the end of valid WAL in a wal directory
|
||||
pub fn find_end_of_wal(local_env: &LocalEnv, timeline: ZTimelineId) -> Result<u64> {
|
||||
let repopath = &local_env.repo_path;
|
||||
let waldir = repopath
|
||||
.join("timelines")
|
||||
.join(timeline.to_string())
|
||||
.join("wal");
|
||||
|
||||
let (lsn, _tli) = xlog_utils::find_end_of_wal(&waldir, 16 * 1024 * 1024, true);
|
||||
|
||||
return Ok(lsn);
|
||||
}
|
||||
|
||||
// Find the latest snapshot for a timeline
|
||||
fn find_latest_snapshot(local_env: &LocalEnv, timeline: ZTimelineId) -> Result<(u64, PathBuf)> {
|
||||
let repopath = &local_env.repo_path;
|
||||
|
||||
let snapshotsdir = repopath
|
||||
.join("timelines")
|
||||
.join(timeline.to_string())
|
||||
.join("snapshots");
|
||||
let paths = fs::read_dir(&snapshotsdir)?;
|
||||
let mut maxsnapshot: u64 = 0;
|
||||
let mut snapshotdir: Option<PathBuf> = None;
|
||||
for path in paths {
|
||||
let path = path?;
|
||||
let filename = path.file_name().to_str().unwrap().to_owned();
|
||||
if let Ok(lsn) = parse_lsn(&filename) {
|
||||
maxsnapshot = std::cmp::max(lsn, maxsnapshot);
|
||||
snapshotdir = Some(path.path());
|
||||
}
|
||||
}
|
||||
if maxsnapshot == 0 {
|
||||
// TODO: check ancestor timeline
|
||||
anyhow::bail!("no snapshot found in {}", snapshotsdir.display());
|
||||
}
|
||||
|
||||
Ok((maxsnapshot, snapshotdir.unwrap()))
|
||||
}
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
use std::error;
|
||||
use anyhow::Result;
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::net::SocketAddr;
|
||||
use std::net::TcpStream;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
@@ -12,9 +13,9 @@ use std::time::Duration;
|
||||
|
||||
use postgres::{Client, NoTls};
|
||||
|
||||
use crate::local_env::{self, LocalEnv};
|
||||
|
||||
type Result<T> = std::result::Result<T, Box<dyn error::Error>>;
|
||||
use crate::compute::PostgresNode;
|
||||
use crate::local_env::LocalEnv;
|
||||
use pageserver::ZTimelineId;
|
||||
|
||||
//
|
||||
// Collection of several example deployments useful for tests.
|
||||
@@ -26,63 +27,70 @@ pub struct TestStorageControlPlane {
|
||||
pub wal_acceptors: Vec<WalAcceptorNode>,
|
||||
pub pageserver: Arc<PageServerNode>,
|
||||
pub test_done: AtomicBool,
|
||||
pub repopath: PathBuf,
|
||||
}
|
||||
|
||||
impl TestStorageControlPlane {
|
||||
// Peek into the repository, to grab the timeline ID of given branch
|
||||
pub fn get_branch_timeline(&self, branchname: &str) -> ZTimelineId {
|
||||
let branchpath = self.repopath.join("refs/branches/".to_owned() + branchname);
|
||||
|
||||
ZTimelineId::from_str(&(fs::read_to_string(&branchpath).unwrap())).unwrap()
|
||||
}
|
||||
|
||||
// postgres <-> page_server
|
||||
pub fn one_page_server(pgdata_base_path: String) -> TestStorageControlPlane {
|
||||
let env = local_env::test_env();
|
||||
//
|
||||
// Initialize a new repository and configure a page server to run in it
|
||||
//
|
||||
pub fn one_page_server(local_env: &LocalEnv) -> TestStorageControlPlane {
|
||||
let repopath = local_env.repo_path.clone();
|
||||
|
||||
let pserver = Arc::new(PageServerNode {
|
||||
env: env.clone(),
|
||||
env: local_env.clone(),
|
||||
kill_on_exit: true,
|
||||
listen_address: None,
|
||||
});
|
||||
pserver.init();
|
||||
|
||||
if pgdata_base_path.is_empty() {
|
||||
pserver.start().unwrap();
|
||||
} else {
|
||||
pserver.start_fromdatadir(pgdata_base_path).unwrap();
|
||||
}
|
||||
pserver.start().unwrap();
|
||||
|
||||
TestStorageControlPlane {
|
||||
wal_acceptors: Vec::new(),
|
||||
pageserver: pserver,
|
||||
test_done: AtomicBool::new(false),
|
||||
repopath: repopath,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one_page_server_no_start() -> TestStorageControlPlane {
|
||||
let env = local_env::test_env();
|
||||
pub fn one_page_server_no_start(local_env: &LocalEnv) -> TestStorageControlPlane {
|
||||
let repopath = local_env.repo_path.clone();
|
||||
|
||||
let pserver = Arc::new(PageServerNode {
|
||||
env,
|
||||
env: local_env.clone(),
|
||||
kill_on_exit: true,
|
||||
listen_address: None,
|
||||
});
|
||||
pserver.init();
|
||||
|
||||
TestStorageControlPlane {
|
||||
wal_acceptors: Vec::new(),
|
||||
pageserver: pserver,
|
||||
test_done: AtomicBool::new(false),
|
||||
repopath: repopath,
|
||||
}
|
||||
}
|
||||
|
||||
// postgres <-> {wal_acceptor1, wal_acceptor2, ...}
|
||||
pub fn fault_tolerant(redundancy: usize) -> TestStorageControlPlane {
|
||||
let env = local_env::test_env();
|
||||
pub fn fault_tolerant(local_env: &LocalEnv, redundancy: usize) -> TestStorageControlPlane {
|
||||
let repopath = local_env.repo_path.clone();
|
||||
|
||||
let mut cplane = TestStorageControlPlane {
|
||||
wal_acceptors: Vec::new(),
|
||||
pageserver: Arc::new(PageServerNode {
|
||||
env: env.clone(),
|
||||
env: local_env.clone(),
|
||||
kill_on_exit: true,
|
||||
listen_address: None,
|
||||
}),
|
||||
test_done: AtomicBool::new(false),
|
||||
repopath: repopath,
|
||||
};
|
||||
cplane.pageserver.init();
|
||||
cplane.pageserver.start().unwrap();
|
||||
|
||||
const WAL_ACCEPTOR_PORT: usize = 54321;
|
||||
@@ -92,8 +100,8 @@ impl TestStorageControlPlane {
|
||||
listen: format!("127.0.0.1:{}", WAL_ACCEPTOR_PORT + i)
|
||||
.parse()
|
||||
.unwrap(),
|
||||
data_dir: env.data_dir.join(format!("wal_acceptor_{}", i)),
|
||||
env: env.clone(),
|
||||
data_dir: local_env.repo_path.join(format!("wal_acceptor_{}", i)),
|
||||
env: local_env.clone(),
|
||||
};
|
||||
wal_acceptor.init();
|
||||
wal_acceptor.start();
|
||||
@@ -155,58 +163,53 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn init(&self) {
|
||||
fs::create_dir_all(self.env.pageserver_data_dir()).unwrap();
|
||||
pub fn repo_path(&self) -> PathBuf {
|
||||
self.env.repo_path.clone()
|
||||
}
|
||||
|
||||
pub fn pid_file(&self) -> PathBuf {
|
||||
self.env.repo_path.join("pageserver.pid")
|
||||
}
|
||||
|
||||
pub fn start(&self) -> Result<()> {
|
||||
println!("Starting pageserver at '{}'", self.address());
|
||||
println!(
|
||||
"Starting pageserver at '{}' in {}",
|
||||
self.address(),
|
||||
self.repo_path().display()
|
||||
);
|
||||
|
||||
let status = Command::new(self.env.zenith_distrib_dir.join("pageserver")) // XXX -> method
|
||||
.args(&["-D", self.env.pageserver_data_dir().to_str().unwrap()])
|
||||
.args(&["-l", self.address().to_string().as_str()])
|
||||
let mut cmd = Command::new(self.env.zenith_distrib_dir.join("pageserver"));
|
||||
cmd.args(&["-l", self.address().to_string().as_str()])
|
||||
.arg("-d")
|
||||
.env_clear()
|
||||
.env("RUST_BACKTRACE", "1")
|
||||
.env("ZENITH_REPO_DIR", self.repo_path())
|
||||
.env("PATH", self.env.pg_bin_dir().to_str().unwrap()) // needs postres-wal-redo binary
|
||||
.env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
|
||||
.status()?;
|
||||
.env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap());
|
||||
|
||||
if !status.success() {
|
||||
return Err(Box::<dyn error::Error>::from(format!(
|
||||
if !cmd.status()?.success() {
|
||||
anyhow::bail!(
|
||||
"Pageserver failed to start. See '{}' for details.",
|
||||
self.env.pageserver_log().to_str().unwrap()
|
||||
)));
|
||||
} else {
|
||||
return Ok(());
|
||||
self.repo_path().join("pageserver.log").display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_fromdatadir(&self, pgdata_base_path: String) -> Result<()> {
|
||||
println!("Starting pageserver at '{}'", self.address());
|
||||
|
||||
let status = Command::new(self.env.zenith_distrib_dir.join("pageserver")) // XXX -> method
|
||||
.args(&["-D", self.env.pageserver_data_dir().to_str().unwrap()])
|
||||
.args(&["-l", self.address().to_string().as_str()])
|
||||
.arg("-d")
|
||||
.args(&["--restore-from", "local"])
|
||||
.env_clear()
|
||||
.env("PATH", self.env.pg_bin_dir().to_str().unwrap()) // needs postres-wal-redo binary
|
||||
.env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
|
||||
.env("PGDATA_BASE_PATH", pgdata_base_path)
|
||||
.status()?;
|
||||
|
||||
if !status.success() {
|
||||
return Err(Box::<dyn error::Error>::from(format!(
|
||||
"Pageserver failed to start. See '{}' for details.",
|
||||
self.env.pageserver_log().to_str().unwrap()
|
||||
)));
|
||||
} else {
|
||||
return Ok(());
|
||||
// It takes a while for the page server to start up. Wait until it is
|
||||
// open for business.
|
||||
for retries in 1..15 {
|
||||
let client = self.page_server_psql_client();
|
||||
if client.is_ok() {
|
||||
break;
|
||||
} else {
|
||||
println!("page server not responding yet, retrying ({})...", retries);
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn stop(&self) -> Result<()> {
|
||||
let pidfile = self.env.pageserver_pidfile();
|
||||
let pidfile = self.pid_file();
|
||||
let pid = read_pidfile(&pidfile)?;
|
||||
|
||||
let status = Command::new("kill")
|
||||
@@ -216,10 +219,7 @@ impl PageServerNode {
|
||||
.expect("failed to execute kill");
|
||||
|
||||
if !status.success() {
|
||||
return Err(Box::<dyn error::Error>::from(format!(
|
||||
"Failed to kill pageserver with pid {}",
|
||||
pid
|
||||
)));
|
||||
anyhow::bail!("Failed to kill pageserver with pid {}", pid);
|
||||
}
|
||||
|
||||
// await for pageserver stop
|
||||
@@ -234,10 +234,7 @@ impl PageServerNode {
|
||||
|
||||
// ok, we failed to stop pageserver, let's panic
|
||||
if !status.success() {
|
||||
return Err(Box::<dyn error::Error>::from(format!(
|
||||
"Failed to stop pageserver with pid {}",
|
||||
pid
|
||||
)));
|
||||
anyhow::bail!("Failed to stop pageserver with pid {}", pid);
|
||||
} else {
|
||||
return Ok(());
|
||||
}
|
||||
@@ -256,6 +253,19 @@ impl PageServerNode {
|
||||
println!("Pageserver query: '{}'", sql);
|
||||
client.simple_query(sql).unwrap()
|
||||
}
|
||||
|
||||
pub fn page_server_psql_client(
|
||||
&self,
|
||||
) -> std::result::Result<postgres::Client, postgres::Error> {
|
||||
let connstring = format!(
|
||||
"host={} port={} dbname={} user={}",
|
||||
self.address().ip(),
|
||||
self.address().port(),
|
||||
"no_db",
|
||||
"no_user",
|
||||
);
|
||||
Client::connect(connstring.as_str(), NoTls)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PageServerNode {
|
||||
@@ -295,6 +305,12 @@ impl WalAcceptorNode {
|
||||
let status = Command::new(self.env.zenith_distrib_dir.join("wal_acceptor"))
|
||||
.args(&["-D", self.data_dir.to_str().unwrap()])
|
||||
.args(&["-l", self.listen.to_string().as_str()])
|
||||
.args(&["--systemid", &self.env.systemid.to_string()])
|
||||
// Tell page server it can receive WAL from this WAL safekeeper
|
||||
// FIXME: If there are multiple safekeepers, they will all inform
|
||||
// the page server. Only the last "notification" will stay in effect.
|
||||
// So it's pretty random which safekeeper the page server will connect to
|
||||
.args(&["--pageserver", "127.0.0.1:64000"])
|
||||
.arg("-d")
|
||||
.arg("-n")
|
||||
.status()
|
||||
|
||||
@@ -12,4 +12,6 @@ rand = "0.8.3"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||
|
||||
pageserver = { path = "../pageserver" }
|
||||
walkeeper = { path = "../walkeeper" }
|
||||
control_plane = { path = "../control_plane" }
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
// test node resettlement to an empty datadir
|
||||
|
||||
// TODO
|
||||
/*
|
||||
#[test]
|
||||
fn test_resettlement() {}
|
||||
|
||||
// test seq scan of everythin after restart
|
||||
#[test]
|
||||
fn test_cold_seqscan() {}
|
||||
*/
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
// TODO
|
||||
/*
|
||||
#[test]
|
||||
fn test_actions() {}
|
||||
|
||||
#[test]
|
||||
fn test_regress() {}
|
||||
*/
|
||||
|
||||
@@ -1,23 +1,24 @@
|
||||
// mod control_plane;
|
||||
use control_plane::compute::ComputeControlPlane;
|
||||
use control_plane::local_env;
|
||||
use control_plane::local_env::PointInTime;
|
||||
use control_plane::storage::TestStorageControlPlane;
|
||||
|
||||
use std::thread::sleep;
|
||||
use std::time::Duration;
|
||||
|
||||
// XXX: force all redo at the end
|
||||
// -- restart + seqscan won't read deleted stuff
|
||||
// -- pageserver api endpoint to check all rels
|
||||
|
||||
// Handcrafted cases with wal records that are (were) problematic for redo.
|
||||
/*
|
||||
#[test]
|
||||
fn test_redo_cases() {
|
||||
let local_env = local_env::test_env("test_redo_cases");
|
||||
|
||||
// Start pageserver that reads WAL directly from that postgres
|
||||
let storage_cplane = TestStorageControlPlane::one_page_server(String::new());
|
||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane.pageserver);
|
||||
let storage_cplane = TestStorageControlPlane::one_page_server(&local_env);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||
|
||||
// start postgres
|
||||
let node = compute_cplane.new_test_node();
|
||||
let maintli = storage_cplane.get_branch_timeline("main");
|
||||
let node = compute_cplane.new_test_node(maintli);
|
||||
node.start().unwrap();
|
||||
|
||||
// check basic work with table
|
||||
@@ -47,16 +48,19 @@ fn test_redo_cases() {
|
||||
println!("sum = {}", count);
|
||||
assert_eq!(count, 5000050000);
|
||||
}
|
||||
|
||||
*/
|
||||
// Runs pg_regress on a compute node
|
||||
#[test]
|
||||
fn test_regress() {
|
||||
let local_env = local_env::test_env("test_regress");
|
||||
|
||||
// Start pageserver that reads WAL directly from that postgres
|
||||
let storage_cplane = TestStorageControlPlane::one_page_server(String::new());
|
||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane.pageserver);
|
||||
let storage_cplane = TestStorageControlPlane::one_page_server(&local_env);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||
|
||||
// start postgres
|
||||
let node = compute_cplane.new_test_node();
|
||||
let maintli = storage_cplane.get_branch_timeline("main");
|
||||
let node = compute_cplane.new_test_node(maintli);
|
||||
node.start().unwrap();
|
||||
|
||||
node.pg_regress();
|
||||
@@ -76,16 +80,33 @@ fn pgbench() {
|
||||
node.pg_bench(10, 100);
|
||||
}
|
||||
|
||||
// Run two postgres instances on one pageserver
|
||||
// Run two postgres instances on one pageserver, on different timelines
|
||||
#[test]
|
||||
fn test_pageserver_multitenancy() {
|
||||
// Start pageserver that reads WAL directly from that postgres
|
||||
let storage_cplane = TestStorageControlPlane::one_page_server(String::new());
|
||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane.pageserver);
|
||||
fn test_pageserver_two_timelines() {
|
||||
let local_env = local_env::test_env("test_pageserver_two_timelines");
|
||||
|
||||
// Allocate postgres instance, but don't start
|
||||
let node1 = compute_cplane.new_test_node();
|
||||
let node2 = compute_cplane.new_test_node();
|
||||
// Start pageserver that reads WAL directly from that postgres
|
||||
let storage_cplane = TestStorageControlPlane::one_page_server(&local_env);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||
|
||||
let maintli = storage_cplane.get_branch_timeline("main");
|
||||
|
||||
// Create new branch at the end of 'main'
|
||||
let startpoint = local_env::find_end_of_wal(&local_env, maintli).unwrap();
|
||||
local_env::create_branch(
|
||||
&local_env,
|
||||
"experimental",
|
||||
PointInTime {
|
||||
timelineid: maintli,
|
||||
lsn: startpoint,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let experimentaltli = storage_cplane.get_branch_timeline("experimental");
|
||||
|
||||
// Launch postgres instances on both branches
|
||||
let node1 = compute_cplane.new_test_node(maintli);
|
||||
let node2 = compute_cplane.new_test_node(experimentaltli);
|
||||
node1.start().unwrap();
|
||||
node2.start().unwrap();
|
||||
|
||||
@@ -123,36 +144,3 @@ fn test_pageserver_multitenancy() {
|
||||
println!("sum = {}", count);
|
||||
assert_eq!(count, 15000150000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_upload_pageserver_local() {
|
||||
// Init pageserver that reads WAL directly from that postgres
|
||||
// Don't start yet
|
||||
|
||||
let storage_cplane = TestStorageControlPlane::one_page_server_no_start();
|
||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane.pageserver);
|
||||
|
||||
// init postgres node
|
||||
let node = compute_cplane.new_test_node();
|
||||
|
||||
//upload data to pageserver & start it
|
||||
&storage_cplane
|
||||
.pageserver
|
||||
.start_fromdatadir(node.pgdata().to_str().unwrap().to_string())
|
||||
.unwrap();
|
||||
|
||||
sleep(Duration::from_secs(10));
|
||||
|
||||
// start postgres node
|
||||
node.start().unwrap();
|
||||
|
||||
// check basic work with table
|
||||
node.safe_psql(
|
||||
"postgres",
|
||||
"CREATE TABLE t(key int primary key, value text)",
|
||||
);
|
||||
node.safe_psql(
|
||||
"postgres",
|
||||
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
// Restart acceptors one by one while compute is under the load.
|
||||
use control_plane::compute::ComputeControlPlane;
|
||||
use control_plane::local_env;
|
||||
use control_plane::local_env::PointInTime;
|
||||
use control_plane::storage::TestStorageControlPlane;
|
||||
use pageserver::ZTimelineId;
|
||||
|
||||
use rand::Rng;
|
||||
use std::sync::Arc;
|
||||
@@ -9,18 +12,20 @@ use std::{thread, time};
|
||||
|
||||
#[test]
|
||||
fn test_acceptors_normal_work() {
|
||||
// Start pageserver that reads WAL directly from that postgres
|
||||
let local_env = local_env::test_env("test_acceptors_normal_work");
|
||||
|
||||
const REDUNDANCY: usize = 3;
|
||||
let storage_cplane = TestStorageControlPlane::fault_tolerant(REDUNDANCY);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane.pageserver);
|
||||
let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
||||
|
||||
// start postgres
|
||||
let node = compute_cplane.new_test_master_node();
|
||||
let maintli = storage_cplane.get_branch_timeline("main");
|
||||
let node = compute_cplane.new_test_master_node(maintli);
|
||||
node.start().unwrap();
|
||||
|
||||
// start proxy
|
||||
let _proxy = node.start_proxy(wal_acceptors);
|
||||
let _proxy = node.start_proxy(&wal_acceptors);
|
||||
|
||||
// check basic work with table
|
||||
node.safe_psql(
|
||||
@@ -41,71 +46,97 @@ fn test_acceptors_normal_work() {
|
||||
// check wal files equality
|
||||
}
|
||||
|
||||
// Run page server and multiple safekeepers, and multiple compute nodes running
|
||||
// against different timelines.
|
||||
#[test]
|
||||
fn test_multitenancy() {
|
||||
// Start pageserver that reads WAL directly from that postgres
|
||||
fn test_many_timelines() {
|
||||
// Initialize a new repository, and set up WAL safekeepers and page server.
|
||||
const REDUNDANCY: usize = 3;
|
||||
const N_NODES: usize = 5;
|
||||
let storage_cplane = TestStorageControlPlane::fault_tolerant(REDUNDANCY);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane.pageserver);
|
||||
const N_TIMELINES: usize = 5;
|
||||
let local_env = local_env::test_env("test_many_timelines");
|
||||
let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
||||
|
||||
// start postgres
|
||||
let mut nodes = Vec::new();
|
||||
let mut proxies = Vec::new();
|
||||
for _ in 0..N_NODES {
|
||||
let node = compute_cplane.new_test_master_node();
|
||||
nodes.push(node);
|
||||
nodes.last().unwrap().start().unwrap();
|
||||
proxies.push(nodes.last().unwrap().start_proxy(wal_acceptors.clone()));
|
||||
}
|
||||
// Create branches
|
||||
let mut timelines: Vec<ZTimelineId> = Vec::new();
|
||||
let maintli = storage_cplane.get_branch_timeline("main"); // main branch
|
||||
timelines.push(maintli);
|
||||
let startpoint = local_env::find_end_of_wal(&local_env, maintli).unwrap();
|
||||
for i in 1..N_TIMELINES {
|
||||
// additional branches
|
||||
let branchname = format!("experimental{}", i);
|
||||
local_env::create_branch(
|
||||
&local_env,
|
||||
&branchname,
|
||||
PointInTime {
|
||||
timelineid: maintli,
|
||||
lsn: startpoint,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let tli = storage_cplane.get_branch_timeline(&branchname);
|
||||
timelines.push(tli);
|
||||
}
|
||||
|
||||
// start postgres on each timeline
|
||||
let mut nodes = Vec::new();
|
||||
for tli in timelines {
|
||||
let node = compute_cplane.new_test_node(tli);
|
||||
nodes.push(node.clone());
|
||||
node.start().unwrap();
|
||||
node.start_proxy(&wal_acceptors);
|
||||
}
|
||||
|
||||
// create schema
|
||||
for node in &nodes {
|
||||
node.safe_psql(
|
||||
"postgres",
|
||||
"CREATE TABLE t(key int primary key, value text)",
|
||||
);
|
||||
}
|
||||
for node in &nodes {
|
||||
node.safe_psql(
|
||||
"postgres",
|
||||
"CREATE TABLE t(key int primary key, value text)",
|
||||
);
|
||||
}
|
||||
|
||||
// Populate data
|
||||
for node in &nodes {
|
||||
node.safe_psql(
|
||||
"postgres",
|
||||
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
|
||||
);
|
||||
}
|
||||
// Populate data
|
||||
for node in &nodes {
|
||||
node.safe_psql(
|
||||
"postgres",
|
||||
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
|
||||
);
|
||||
}
|
||||
|
||||
// Check data
|
||||
for node in &nodes {
|
||||
let count: i64 = node
|
||||
.safe_psql("postgres", "SELECT sum(key) FROM t")
|
||||
.first()
|
||||
.unwrap()
|
||||
.get(0);
|
||||
println!("sum = {}", count);
|
||||
assert_eq!(count, 5000050000);
|
||||
}
|
||||
// Check data
|
||||
for node in &nodes {
|
||||
let count: i64 = node
|
||||
.safe_psql("postgres", "SELECT sum(key) FROM t")
|
||||
.first()
|
||||
.unwrap()
|
||||
.get(0);
|
||||
println!("sum = {}", count);
|
||||
assert_eq!(count, 5000050000);
|
||||
}
|
||||
}
|
||||
|
||||
// Majority is always alive
|
||||
#[test]
|
||||
fn test_acceptors_restarts() {
|
||||
let local_env = local_env::test_env("test_acceptors_restarts");
|
||||
|
||||
// Start pageserver that reads WAL directly from that postgres
|
||||
const REDUNDANCY: usize = 3;
|
||||
const FAULT_PROBABILITY: f32 = 0.01;
|
||||
|
||||
let storage_cplane = TestStorageControlPlane::fault_tolerant(REDUNDANCY);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane.pageserver);
|
||||
let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
// start postgres
|
||||
let node = compute_cplane.new_test_master_node();
|
||||
let maintli = storage_cplane.get_branch_timeline("main");
|
||||
let node = compute_cplane.new_test_master_node(maintli);
|
||||
node.start().unwrap();
|
||||
|
||||
// start proxy
|
||||
let _proxy = node.start_proxy(wal_acceptors);
|
||||
let _proxy = node.start_proxy(&wal_acceptors);
|
||||
let mut failed_node: Option<usize> = None;
|
||||
|
||||
// check basic work with table
|
||||
@@ -150,20 +181,23 @@ fn start_acceptor(cplane: &Arc<TestStorageControlPlane>, no: usize) {
|
||||
// them again and check that nothing was losed. Repeat.
|
||||
// N_CRASHES env var
|
||||
#[test]
|
||||
fn test_acceptors_unavalability() {
|
||||
fn test_acceptors_unavailability() {
|
||||
let local_env = local_env::test_env("test_acceptors_unavailability");
|
||||
|
||||
// Start pageserver that reads WAL directly from that postgres
|
||||
const REDUNDANCY: usize = 2;
|
||||
|
||||
let storage_cplane = TestStorageControlPlane::fault_tolerant(REDUNDANCY);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane.pageserver);
|
||||
let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
|
||||
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
||||
|
||||
// start postgres
|
||||
let node = compute_cplane.new_test_master_node();
|
||||
let maintli = storage_cplane.get_branch_timeline("main");
|
||||
let node = compute_cplane.new_test_master_node(maintli);
|
||||
node.start().unwrap();
|
||||
|
||||
// start proxy
|
||||
let _proxy = node.start_proxy(wal_acceptors);
|
||||
let _proxy = node.start_proxy(&wal_acceptors);
|
||||
|
||||
// check basic work with table
|
||||
node.safe_psql(
|
||||
@@ -226,19 +260,24 @@ fn simulate_failures(cplane: Arc<TestStorageControlPlane>) {
|
||||
// Race condition test
|
||||
#[test]
|
||||
fn test_race_conditions() {
|
||||
let local_env = local_env::test_env("test_race_conditions");
|
||||
|
||||
// Start pageserver that reads WAL directly from that postgres
|
||||
const REDUNDANCY: usize = 3;
|
||||
|
||||
let storage_cplane = Arc::new(TestStorageControlPlane::fault_tolerant(REDUNDANCY));
|
||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane.pageserver);
|
||||
let storage_cplane = Arc::new(TestStorageControlPlane::fault_tolerant(
|
||||
&local_env, REDUNDANCY,
|
||||
));
|
||||
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
||||
|
||||
// start postgres
|
||||
let node = compute_cplane.new_test_master_node();
|
||||
let maintli = storage_cplane.get_branch_timeline("main");
|
||||
let node = compute_cplane.new_test_master_node(maintli);
|
||||
node.start().unwrap();
|
||||
|
||||
// start proxy
|
||||
let _proxy = node.start_proxy(wal_acceptors);
|
||||
let _proxy = node.start_proxy(&wal_acceptors);
|
||||
|
||||
// check basic work with table
|
||||
node.safe_psql(
|
||||
|
||||
@@ -14,6 +14,7 @@ regex = "1.4.5"
|
||||
bytes = "1.0.1"
|
||||
byteorder = "1.4.3"
|
||||
fs2 = "0.4.3"
|
||||
fs_extra = "1.2.0"
|
||||
futures = "0.3.13"
|
||||
lazy_static = "1.4.0"
|
||||
slog-stdlog = "4.1.0"
|
||||
@@ -38,3 +39,7 @@ anyhow = "1.0"
|
||||
crc32c = "0.6.0"
|
||||
walkdir = "2"
|
||||
thiserror = "1.0"
|
||||
hex = "0.4.3"
|
||||
tar = "0.4.33"
|
||||
|
||||
postgres_ffi = { path = "../postgres_ffi" }
|
||||
|
||||
202
pageserver/src/basebackup.rs
Normal file
202
pageserver/src/basebackup.rs
Normal file
@@ -0,0 +1,202 @@
|
||||
use log::*;
|
||||
use regex::Regex;
|
||||
use std::fmt;
|
||||
use std::io::Write;
|
||||
use tar::Builder;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::ZTimelineId;
|
||||
|
||||
pub fn send_snapshot_tarball(
|
||||
write: &mut dyn Write,
|
||||
timelineid: ZTimelineId,
|
||||
snapshotlsn: u64,
|
||||
) -> Result<(), std::io::Error> {
|
||||
let mut ar = Builder::new(write);
|
||||
|
||||
let snappath = format!("timelines/{}/snapshots/{:016X}", timelineid, snapshotlsn);
|
||||
let walpath = format!("timelines/{}/wal", timelineid);
|
||||
|
||||
debug!("sending tarball of snapshot in {}", snappath);
|
||||
//ar.append_dir_all("", &snappath)?;
|
||||
|
||||
for entry in WalkDir::new(&snappath) {
|
||||
let entry = entry?;
|
||||
let fullpath = entry.path();
|
||||
let relpath = entry.path().strip_prefix(&snappath).unwrap();
|
||||
|
||||
if relpath.to_str().unwrap() == "" {
|
||||
continue;
|
||||
}
|
||||
|
||||
if entry.file_type().is_dir() {
|
||||
trace!(
|
||||
"sending dir {} as {}",
|
||||
fullpath.display(),
|
||||
relpath.display()
|
||||
);
|
||||
ar.append_dir(relpath, fullpath)?;
|
||||
} else if entry.file_type().is_symlink() {
|
||||
error!("ignoring symlink in snapshot dir");
|
||||
} else if entry.file_type().is_file() {
|
||||
// Shared catalogs are exempt
|
||||
if relpath.starts_with("global/") {
|
||||
trace!("sending shared catalog {}", relpath.display());
|
||||
ar.append_path_with_name(fullpath, relpath)?;
|
||||
} else if !is_rel_file_path(relpath.to_str().unwrap()) {
|
||||
trace!("sending {}", relpath.display());
|
||||
ar.append_path_with_name(fullpath, relpath)?;
|
||||
} else {
|
||||
trace!("not sending {}", relpath.display());
|
||||
// FIXME: send all files for now
|
||||
ar.append_path_with_name(fullpath, relpath)?;
|
||||
}
|
||||
} else {
|
||||
error!("unknown file type: {}", fullpath.display());
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: also send all the WAL
|
||||
for entry in std::fs::read_dir(&walpath)? {
|
||||
let entry = entry?;
|
||||
let fullpath = &entry.path();
|
||||
let relpath = fullpath.strip_prefix(&walpath).unwrap();
|
||||
|
||||
if !entry.path().is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let archive_fname = relpath.to_str().unwrap().clone();
|
||||
let archive_fname = archive_fname
|
||||
.strip_suffix(".partial")
|
||||
.unwrap_or(&archive_fname);
|
||||
let archive_path = "pg_wal/".to_owned() + archive_fname;
|
||||
ar.append_path_with_name(fullpath, archive_path)?;
|
||||
}
|
||||
|
||||
ar.finish()?;
|
||||
debug!("all tarred up!");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// formats:
|
||||
// <oid>
|
||||
// <oid>_<fork name>
|
||||
// <oid>.<segment number>
|
||||
// <oid>_<fork name>.<segment number>
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FilePathError {
|
||||
msg: String,
|
||||
}
|
||||
|
||||
impl FilePathError {
|
||||
fn new(msg: &str) -> FilePathError {
|
||||
FilePathError {
|
||||
msg: msg.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<core::num::ParseIntError> for FilePathError {
|
||||
fn from(e: core::num::ParseIntError) -> Self {
|
||||
return FilePathError {
|
||||
msg: format!("invalid filename: {}", e),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FilePathError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "invalid filename")
|
||||
}
|
||||
}
|
||||
|
||||
fn forkname_to_forknum(forkname: Option<&str>) -> Result<u32, FilePathError> {
|
||||
match forkname {
|
||||
// "main" is not in filenames, it's implicit if the fork name is not present
|
||||
None => Ok(0),
|
||||
Some("fsm") => Ok(1),
|
||||
Some("vm") => Ok(2),
|
||||
Some("init") => Ok(3),
|
||||
Some(_) => Err(FilePathError::new("invalid forkname")),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_filename(fname: &str) -> Result<(u32, u32, u32), FilePathError> {
|
||||
let re = Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
|
||||
|
||||
let caps = re
|
||||
.captures(fname)
|
||||
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||
|
||||
let relnode_str = caps.name("relnode").unwrap().as_str();
|
||||
let relnode = u32::from_str_radix(relnode_str, 10)?;
|
||||
|
||||
let forkname_match = caps.name("forkname");
|
||||
let forkname = if forkname_match.is_none() {
|
||||
None
|
||||
} else {
|
||||
Some(forkname_match.unwrap().as_str())
|
||||
};
|
||||
let forknum = forkname_to_forknum(forkname)?;
|
||||
|
||||
let segno_match = caps.name("segno");
|
||||
let segno = if segno_match.is_none() {
|
||||
0
|
||||
} else {
|
||||
u32::from_str_radix(segno_match.unwrap().as_str(), 10)?
|
||||
};
|
||||
|
||||
return Ok((relnode, forknum, segno));
|
||||
}
|
||||
|
||||
fn parse_rel_file_path(path: &str) -> Result<(), FilePathError> {
|
||||
/*
|
||||
* Relation data files can be in one of the following directories:
|
||||
*
|
||||
* global/
|
||||
* shared relations
|
||||
*
|
||||
* base/<db oid>/
|
||||
* regular relations, default tablespace
|
||||
*
|
||||
* pg_tblspc/<tblspc oid>/<tblspc version>/
|
||||
* within a non-default tablespace (the name of the directory
|
||||
* depends on version)
|
||||
*
|
||||
* And the relation data files themselves have a filename like:
|
||||
*
|
||||
* <oid>.<segment number>
|
||||
*/
|
||||
if let Some(fname) = path.strip_prefix("global/") {
|
||||
let (_relnode, _forknum, _segno) = parse_filename(fname)?;
|
||||
|
||||
return Ok(());
|
||||
} else if let Some(dbpath) = path.strip_prefix("base/") {
|
||||
let mut s = dbpath.split("/");
|
||||
let dbnode_str = s
|
||||
.next()
|
||||
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||
let _dbnode = u32::from_str_radix(dbnode_str, 10)?;
|
||||
let fname = s
|
||||
.next()
|
||||
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||
if s.next().is_some() {
|
||||
return Err(FilePathError::new("invalid relation data file name"));
|
||||
};
|
||||
|
||||
let (_relnode, _forknum, _segno) = parse_filename(fname)?;
|
||||
|
||||
return Ok(());
|
||||
} else if let Some(_) = path.strip_prefix("pg_tblspc/") {
|
||||
// TODO
|
||||
return Err(FilePathError::new("tablespaces not supported"));
|
||||
} else {
|
||||
return Err(FilePathError::new("invalid relation data file name"));
|
||||
}
|
||||
}
|
||||
|
||||
fn is_rel_file_path(path: &str) -> bool {
|
||||
return parse_rel_file_path(path).is_ok();
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use clap::{App, AppSettings};
|
||||
|
||||
pub mod pg;
|
||||
pub mod snapshot;
|
||||
pub mod storage;
|
||||
mod subcommand;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli_commands = subcommand::ClapCommands {
|
||||
commands: vec![
|
||||
Box::new(pg::PgCmd {
|
||||
clap_cmd: clap::SubCommand::with_name("pg"),
|
||||
}),
|
||||
Box::new(storage::StorageCmd {
|
||||
clap_cmd: clap::SubCommand::with_name("storage"),
|
||||
}),
|
||||
Box::new(snapshot::SnapshotCmd {
|
||||
clap_cmd: clap::SubCommand::with_name("snapshot"),
|
||||
}),
|
||||
],
|
||||
};
|
||||
|
||||
let matches = App::new("zenith")
|
||||
.about("Zenith CLI")
|
||||
.version("1.0")
|
||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
||||
.subcommands(cli_commands.generate())
|
||||
.get_matches();
|
||||
|
||||
if let Some(subcommand) = matches.subcommand_name() {
|
||||
println!("'git {}' was used", subcommand);
|
||||
}
|
||||
|
||||
match matches.subcommand() {
|
||||
("pg", Some(sub_args)) => cli_commands.commands[0].run(sub_args.clone())?,
|
||||
("storage", Some(sub_args)) => cli_commands.commands[1].run(sub_args.clone())?,
|
||||
("snapshot", Some(sub_args)) => cli_commands.commands[2].run(sub_args.clone())?,
|
||||
("", None) => println!("No subcommand"),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,105 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use clap::{App, AppSettings, Arg};
|
||||
|
||||
use crate::subcommand;
|
||||
|
||||
pub struct PgCmd<'a> {
|
||||
pub clap_cmd: clap::App<'a, 'a>,
|
||||
}
|
||||
|
||||
impl subcommand::SubCommand for PgCmd<'_> {
|
||||
fn gen_clap_command(&self) -> clap::App {
|
||||
let c = self.clap_cmd.clone();
|
||||
c.about("Operations with zenith compute nodes")
|
||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
||||
.subcommand(App::new("list").about("List existing compute nodes"))
|
||||
.subcommand(
|
||||
App::new("create")
|
||||
.about(
|
||||
"Create (init) new data directory using given storage and start postgres",
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("name")
|
||||
.short("n")
|
||||
.long("name")
|
||||
.takes_value(true)
|
||||
.help("Name of the compute node"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("storage")
|
||||
.short("s")
|
||||
.long("storage")
|
||||
.takes_value(true)
|
||||
.help("Name of the storage node to use"),
|
||||
)
|
||||
//TODO should it be just name of uploaded snapshot or some path?
|
||||
.arg(
|
||||
Arg::with_name("snapshot")
|
||||
.long("snapshot")
|
||||
.takes_value(true)
|
||||
.help("Name of the snapshot to use"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("nostart")
|
||||
.long("no-start")
|
||||
.takes_value(false)
|
||||
.help("Don't start postgres on the created node"),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
App::new("destroy")
|
||||
.about("Stop postgres and destroy node's data directory")
|
||||
.arg(
|
||||
Arg::with_name("name")
|
||||
.short("n")
|
||||
.long("name")
|
||||
.takes_value(true)
|
||||
.help("Name of the compute node"),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
App::new("start")
|
||||
.about("Start postgres on the given node")
|
||||
.arg(
|
||||
Arg::with_name("name")
|
||||
.short("n")
|
||||
.long("name")
|
||||
.takes_value(true)
|
||||
.help("Name of the compute node"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("replica")
|
||||
.long("replica")
|
||||
.takes_value(false)
|
||||
.help("Start the compute node as replica"),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
App::new("stop")
|
||||
.about("Stop postgres on the given node")
|
||||
.arg(
|
||||
Arg::with_name("name")
|
||||
.short("n")
|
||||
.long("name")
|
||||
.takes_value(true)
|
||||
.help("Name of the compute node"),
|
||||
),
|
||||
)
|
||||
.subcommand(
|
||||
App::new("show")
|
||||
.about("Show info about the given node")
|
||||
.arg(
|
||||
Arg::with_name("name")
|
||||
.short("n")
|
||||
.long("name")
|
||||
.takes_value(true)
|
||||
.help("Name of the compute node"),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
fn run(&self, args: clap::ArgMatches) -> Result<()> {
|
||||
println!("Run PgCmd with args {:?}", args);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use clap::{App, AppSettings, Arg};
|
||||
|
||||
use crate::subcommand;
|
||||
|
||||
pub struct SnapshotCmd<'a> {
|
||||
pub clap_cmd: clap::App<'a, 'a>,
|
||||
}
|
||||
|
||||
impl subcommand::SubCommand for SnapshotCmd<'_> {
|
||||
fn gen_clap_command(&self) -> clap::App {
|
||||
let c = self.clap_cmd.clone();
|
||||
c.about("Operations with zenith snapshots")
|
||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
||||
.subcommand(App::new("list"))
|
||||
.subcommand(App::new("create").arg(Arg::with_name("pgdata").required(true)))
|
||||
.subcommand(App::new("destroy"))
|
||||
.subcommand(App::new("start"))
|
||||
.subcommand(App::new("stop"))
|
||||
.subcommand(App::new("show"))
|
||||
}
|
||||
|
||||
fn run(&self, args: clap::ArgMatches) -> Result<()> {
|
||||
println!("Run SnapshotCmd with args {:?}", args);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use clap::{App, AppSettings};
|
||||
|
||||
use crate::subcommand;
|
||||
|
||||
pub struct StorageCmd<'a> {
|
||||
pub clap_cmd: clap::App<'a, 'a>,
|
||||
}
|
||||
|
||||
impl subcommand::SubCommand for StorageCmd<'_> {
|
||||
fn gen_clap_command(&self) -> clap::App {
|
||||
let c = self.clap_cmd.clone();
|
||||
c.about("Operations with zenith storage nodes")
|
||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
||||
.subcommand(App::new("list"))
|
||||
.subcommand(App::new("attach"))
|
||||
.subcommand(App::new("detach"))
|
||||
.subcommand(App::new("show"))
|
||||
}
|
||||
|
||||
fn run(&self, args: clap::ArgMatches) -> Result<()> {
|
||||
println!("Run StorageCmd with args {:?}", args);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
use anyhow::Result;
|
||||
|
||||
/// All subcommands need to implement this interface.
|
||||
pub trait SubCommand {
|
||||
/// Generates the cli-config that Clap requires for the subcommand.
|
||||
fn gen_clap_command(&self) -> clap::App;
|
||||
|
||||
/// Runs the body of the subcommand.
|
||||
fn run(&self, args: clap::ArgMatches) -> Result<()>;
|
||||
}
|
||||
|
||||
/// A struct which holds a vector of heap-allocated `Box`es of trait objects all of which must
|
||||
/// implement the `SubCommand` trait, but other than that, can be of any type.
|
||||
pub struct ClapCommands {
|
||||
pub commands: Vec<Box<dyn SubCommand>>,
|
||||
}
|
||||
|
||||
impl ClapCommands {
|
||||
/// Generates a vector of `clap::Apps` that can be passed into clap's `.subcommands()` method in
|
||||
/// order to generate the full CLI.
|
||||
pub fn generate(&self) -> Vec<clap::App> {
|
||||
let mut v: Vec<clap::App> = Vec::new();
|
||||
|
||||
for command in self.commands.iter() {
|
||||
v.push(command.gen_clap_command());
|
||||
}
|
||||
v
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
use log::*;
|
||||
use std::fs;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io;
|
||||
use std::path::PathBuf;
|
||||
use std::process::exit;
|
||||
@@ -17,59 +18,50 @@ use daemonize::Daemonize;
|
||||
use slog::Drain;
|
||||
|
||||
use pageserver::page_service;
|
||||
use pageserver::restore_datadir;
|
||||
use pageserver::restore_s3;
|
||||
use pageserver::tui;
|
||||
use pageserver::walreceiver;
|
||||
//use pageserver::walreceiver;
|
||||
use pageserver::PageServerConf;
|
||||
|
||||
fn zenith_repo_dir() -> String {
|
||||
// Find repository path
|
||||
match std::env::var_os("ZENITH_REPO_DIR") {
|
||||
Some(val) => String::from(val.to_str().unwrap()),
|
||||
None => ".zenith".into(),
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let arg_matches = App::new("Zenith page server")
|
||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||
.arg(Arg::with_name("datadir")
|
||||
.short("D")
|
||||
.long("dir")
|
||||
.takes_value(true)
|
||||
.help("Path to the page server data directory"))
|
||||
.arg(Arg::with_name("wal_producer")
|
||||
.short("w")
|
||||
.long("wal-producer")
|
||||
.takes_value(true)
|
||||
.help("connect to the WAL sender (postgres or wal_acceptor) on connstr (default: 'host=127.0.0.1 port=65432 user=zenith')"))
|
||||
.arg(Arg::with_name("listen")
|
||||
.short("l")
|
||||
.long("listen")
|
||||
.takes_value(true)
|
||||
.help("listen for incoming page requests on ip:port (default: 127.0.0.1:5430)"))
|
||||
.arg(Arg::with_name("interactive")
|
||||
.short("i")
|
||||
.long("interactive")
|
||||
.takes_value(false)
|
||||
.help("Interactive mode"))
|
||||
.arg(Arg::with_name("daemonize")
|
||||
.short("d")
|
||||
.long("daemonize")
|
||||
.takes_value(false)
|
||||
.help("Run in the background"))
|
||||
.arg(Arg::with_name("restore_from")
|
||||
.long("restore-from")
|
||||
.takes_value(true)
|
||||
.help("Upload data from s3 or datadir"))
|
||||
.arg(
|
||||
Arg::with_name("listen")
|
||||
.short("l")
|
||||
.long("listen")
|
||||
.takes_value(true)
|
||||
.help("listen for incoming page requests on ip:port (default: 127.0.0.1:5430)"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("interactive")
|
||||
.short("i")
|
||||
.long("interactive")
|
||||
.takes_value(false)
|
||||
.help("Interactive mode"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("daemonize")
|
||||
.short("d")
|
||||
.long("daemonize")
|
||||
.takes_value(false)
|
||||
.help("Run in the background"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let mut conf = PageServerConf {
|
||||
data_dir: PathBuf::from("./"),
|
||||
daemonize: false,
|
||||
interactive: false,
|
||||
wal_producer_connstr: None,
|
||||
listen_addr: "127.0.0.1:5430".parse().unwrap(),
|
||||
restore_from: String::new(),
|
||||
};
|
||||
|
||||
if let Some(dir) = arg_matches.value_of("datadir") {
|
||||
conf.data_dir = PathBuf::from(dir);
|
||||
}
|
||||
|
||||
if arg_matches.is_present("daemonize") {
|
||||
conf.daemonize = true;
|
||||
}
|
||||
@@ -83,14 +75,6 @@ fn main() -> Result<()> {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if let Some(restore_from) = arg_matches.value_of("restore_from") {
|
||||
conf.restore_from = String::from(restore_from);
|
||||
}
|
||||
|
||||
if let Some(addr) = arg_matches.value_of("wal_producer") {
|
||||
conf.wal_producer_connstr = Some(String::from(addr));
|
||||
}
|
||||
|
||||
if let Some(addr) = arg_matches.value_of("listen") {
|
||||
conf.listen_addr = addr.parse()?;
|
||||
}
|
||||
@@ -125,19 +109,25 @@ fn start_pageserver(conf: &PageServerConf) -> Result<()> {
|
||||
if conf.daemonize {
|
||||
info!("daemonizing...");
|
||||
|
||||
// There shouldn't be any logging to stdin/stdout. Redirect it to the main log so
|
||||
let repodir = PathBuf::from(zenith_repo_dir());
|
||||
|
||||
// There should'n be any logging to stdin/stdout. Redirect it to the main log so
|
||||
// that we will see any accidental manual fprintf's or backtraces.
|
||||
let log_filename = conf.data_dir.join("pageserver.log");
|
||||
let log_filename = repodir.join("pageserver.log");
|
||||
let stdout = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&log_filename)
|
||||
.with_context(|| format!("failed to open {:?}", log_filename))?;
|
||||
let stderr = stdout.try_clone()?;
|
||||
.with_context(|| format!("failed to open {:?}", &log_filename))?;
|
||||
let stderr = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&log_filename)
|
||||
.with_context(|| format!("failed to open {:?}", &log_filename))?;
|
||||
|
||||
let daemonize = Daemonize::new()
|
||||
.pid_file(conf.data_dir.join("pageserver.pid"))
|
||||
.working_directory(conf.data_dir.clone())
|
||||
.pid_file(repodir.clone().join("pageserver.pid"))
|
||||
.working_directory(repodir)
|
||||
.stdout(stdout)
|
||||
.stderr(stderr);
|
||||
|
||||
@@ -145,25 +135,20 @@ fn start_pageserver(conf: &PageServerConf) -> Result<()> {
|
||||
Ok(_) => info!("Success, daemonized"),
|
||||
Err(e) => error!("Error, {}", e),
|
||||
}
|
||||
} else {
|
||||
// change into the repository directory. In daemon mode, Daemonize
|
||||
// does this for us.
|
||||
let repodir = zenith_repo_dir();
|
||||
std::env::set_current_dir(&repodir)?;
|
||||
info!("Changed current directory to repository in {}", &repodir);
|
||||
}
|
||||
|
||||
let mut threads = Vec::new();
|
||||
|
||||
info!("starting... {}", conf.restore_from);
|
||||
|
||||
// Before opening up for connections, restore the latest base backup from S3.
|
||||
// (We don't persist anything to local disk at the moment, so we need to do
|
||||
// this at every startup)
|
||||
if conf.restore_from.eq("s3") {
|
||||
info!("restore-from s3...");
|
||||
restore_s3::restore_main(&conf);
|
||||
} else if conf.restore_from.eq("local") {
|
||||
info!("restore-from local...");
|
||||
restore_datadir::restore_main(&conf);
|
||||
}
|
||||
// TODO: Check that it looks like a valid repository before going further
|
||||
|
||||
// Create directory for wal-redo datadirs
|
||||
match fs::create_dir(conf.data_dir.join("wal-redo")) {
|
||||
match fs::create_dir("wal-redo") {
|
||||
Ok(_) => {}
|
||||
Err(e) => match e.kind() {
|
||||
io::ErrorKind::AlreadyExists => {}
|
||||
@@ -173,25 +158,6 @@ fn start_pageserver(conf: &PageServerConf) -> Result<()> {
|
||||
},
|
||||
}
|
||||
|
||||
// Launch the WAL receiver thread if pageserver was started with --wal-producer
|
||||
// option. It will try to connect to the WAL safekeeper, and stream the WAL. If
|
||||
// the connection is lost, it will reconnect on its own. We just fire and forget
|
||||
// it here.
|
||||
//
|
||||
// All other wal receivers are started on demand by "callmemaybe" command
|
||||
// sent to pageserver.
|
||||
if let Some(wal_producer) = &conf.wal_producer_connstr {
|
||||
let conf_copy = conf.clone();
|
||||
let wal_producer = wal_producer.clone();
|
||||
let walreceiver_thread = thread::Builder::new()
|
||||
.name("static WAL receiver thread".into())
|
||||
.spawn(move || {
|
||||
walreceiver::thread_main(&conf_copy, &wal_producer);
|
||||
})
|
||||
.unwrap();
|
||||
threads.push(walreceiver_thread);
|
||||
}
|
||||
|
||||
// GetPage@LSN requests are served by another thread. (It uses async I/O,
|
||||
// but the code in page_service sets up it own thread pool for that)
|
||||
let conf_copy = conf.clone();
|
||||
@@ -220,20 +186,19 @@ fn init_logging(conf: &PageServerConf) -> Result<slog_scope::GlobalLoggerGuard,
|
||||
if conf.interactive {
|
||||
Ok(tui::init_logging())
|
||||
} else if conf.daemonize {
|
||||
let log = conf.data_dir.join("pageserver.log");
|
||||
let log = zenith_repo_dir() + "/pageserver.log";
|
||||
let log_file = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&log).map_err(|err| {
|
||||
// We failed to initialize logging, so we can't log this message with error!
|
||||
eprintln!("Could not create log file {:?}: {}", log, err);
|
||||
err
|
||||
})?;
|
||||
|
||||
// We failed to initialize logging, so we can't log this message with error!
|
||||
eprintln!("Could not create log file {:?}: {}", log, err);
|
||||
err
|
||||
})?;
|
||||
let decorator = slog_term::PlainSyncDecorator::new(log_file);
|
||||
let drain = slog_term::CompactFormat::new(decorator).build();
|
||||
let drain = slog::Filter::new(drain, |record: &slog::Record| {
|
||||
if record.level().is_at_least(slog::Level::Info) {
|
||||
if record.level().is_at_least(slog::Level::Debug) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
use std::fmt;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
|
||||
pub mod basebackup;
|
||||
pub mod page_cache;
|
||||
pub mod page_service;
|
||||
pub mod pg_constants;
|
||||
pub mod restore_datadir;
|
||||
pub mod restore_s3;
|
||||
pub mod restore_local_repo;
|
||||
pub mod tui;
|
||||
pub mod tui_event;
|
||||
mod tui_logger;
|
||||
@@ -15,10 +16,45 @@ pub mod walredo;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PageServerConf {
|
||||
pub data_dir: PathBuf,
|
||||
pub daemonize: bool,
|
||||
pub interactive: bool,
|
||||
pub wal_producer_connstr: Option<String>,
|
||||
pub listen_addr: SocketAddr,
|
||||
pub restore_from: String,
|
||||
}
|
||||
|
||||
// Zenith Timeline ID is a 32-byte random ID.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct ZTimelineId([u8; 16]);
|
||||
|
||||
impl FromStr for ZTimelineId {
|
||||
type Err = hex::FromHexError;
|
||||
|
||||
fn from_str(s: &str) -> Result<ZTimelineId, Self::Err> {
|
||||
let timelineid = hex::decode(s)?;
|
||||
|
||||
let mut buf: [u8; 16] = [0u8; 16];
|
||||
buf.copy_from_slice(timelineid.as_slice());
|
||||
Ok(ZTimelineId(buf))
|
||||
}
|
||||
}
|
||||
|
||||
impl ZTimelineId {
|
||||
pub fn from(b: [u8; 16]) -> ZTimelineId {
|
||||
ZTimelineId(b)
|
||||
}
|
||||
|
||||
pub fn get_from_buf(buf: &mut dyn bytes::Buf) -> ZTimelineId {
|
||||
let mut arr = [0u8; 16];
|
||||
buf.copy_to_slice(&mut arr);
|
||||
ZTimelineId::from(arr)
|
||||
}
|
||||
|
||||
pub fn as_arr(&self) -> [u8; 16] {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for ZTimelineId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(&hex::encode(self.0))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
// per-entry mutex.
|
||||
//
|
||||
|
||||
use crate::restore_local_repo::restore_timeline;
|
||||
use crate::ZTimelineId;
|
||||
use crate::{walredo, PageServerConf};
|
||||
use anyhow::bail;
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
@@ -99,34 +101,57 @@ struct PageCacheShared {
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref PAGECACHES: Mutex<HashMap<u64, Arc<PageCache>>> = Mutex::new(HashMap::new());
|
||||
pub static ref PAGECACHES: Mutex<HashMap<ZTimelineId, Arc<PageCache>>> =
|
||||
Mutex::new(HashMap::new());
|
||||
}
|
||||
|
||||
pub fn get_pagecache(conf: &PageServerConf, sys_id: u64) -> Arc<PageCache> {
|
||||
// Get Page Cache for given timeline. It is assumed to already exist.
|
||||
pub fn get_pagecache(_conf: &PageServerConf, timelineid: ZTimelineId) -> Option<Arc<PageCache>> {
|
||||
let pcaches = PAGECACHES.lock().unwrap();
|
||||
|
||||
match pcaches.get(&timelineid) {
|
||||
Some(pcache) => Some(pcache.clone()),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_or_restore_pagecache(
|
||||
conf: &PageServerConf,
|
||||
timelineid: ZTimelineId,
|
||||
) -> anyhow::Result<Arc<PageCache>> {
|
||||
let mut pcaches = PAGECACHES.lock().unwrap();
|
||||
|
||||
if !pcaches.contains_key(&sys_id) {
|
||||
pcaches.insert(sys_id, Arc::new(init_page_cache(&conf, sys_id)));
|
||||
match pcaches.get(&timelineid) {
|
||||
Some(pcache) => Ok(pcache.clone()),
|
||||
None => {
|
||||
let pcache = init_page_cache(&conf, timelineid);
|
||||
|
||||
// Initialize the WAL redo thread
|
||||
//
|
||||
// Now join_handle is not saved any where and we won'try restart tharead
|
||||
// if it is dead. We may later stop that treads after some inactivity period
|
||||
// and restart them on demand.
|
||||
let conf = conf.clone();
|
||||
let _walredo_thread = thread::Builder::new()
|
||||
.name("WAL redo thread".into())
|
||||
.spawn(move || {
|
||||
walredo::wal_redo_main(&conf, sys_id);
|
||||
})
|
||||
.unwrap();
|
||||
restore_timeline(conf, &pcache, timelineid)?;
|
||||
|
||||
let result = Arc::new(pcache);
|
||||
|
||||
pcaches.insert(timelineid, result.clone());
|
||||
|
||||
// Initialize the WAL redo thread
|
||||
//
|
||||
// Now join_handle is not saved any where and we won'try restart tharead
|
||||
// if it is dead. We may later stop that treads after some inactivity period
|
||||
// and restart them on demand.
|
||||
let conf_copy = conf.clone();
|
||||
let _walredo_thread = thread::Builder::new()
|
||||
.name("WAL redo thread".into())
|
||||
.spawn(move || {
|
||||
walredo::wal_redo_main(&conf_copy, timelineid);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
return Ok(result);
|
||||
}
|
||||
}
|
||||
|
||||
pcaches.get(&sys_id).unwrap().clone()
|
||||
}
|
||||
|
||||
fn open_rocksdb(conf: &PageServerConf, sys_id: u64) -> DB {
|
||||
let path = conf.data_dir.join(sys_id.to_string());
|
||||
fn open_rocksdb(conf: &PageServerConf, timelineid: u64) -> DB {
|
||||
let path = conf.data_dir.join(timelineid.to_string());
|
||||
let mut opts = Options::default();
|
||||
opts.create_if_missing(true);
|
||||
opts.set_use_fsync(true);
|
||||
@@ -134,12 +159,12 @@ fn open_rocksdb(conf: &PageServerConf, sys_id: u64) -> DB {
|
||||
DB::open(&opts, &path).unwrap()
|
||||
}
|
||||
|
||||
fn init_page_cache(conf: &PageServerConf, sys_id: u64) -> PageCache {
|
||||
fn init_page_cache(conf: &PageServerConf, timelineid: u64) -> PageCache {
|
||||
// Initialize the channel between the page cache and the WAL applicator
|
||||
let (s, r) = unbounded();
|
||||
|
||||
PageCache {
|
||||
db: open_rocksdb(&conf, sys_id),
|
||||
db: open_rocksdb(&conf, timelineid),
|
||||
shared: Mutex::new(PageCacheShared {
|
||||
first_valid_lsn: 0,
|
||||
last_valid_lsn: 0,
|
||||
@@ -520,7 +545,8 @@ impl PageCache {
|
||||
// Adds a WAL record to the page cache
|
||||
//
|
||||
pub fn put_wal_record(&self, tag: BufferTag, rec: WALRecord) {
|
||||
let key = CacheKey { tag, lsn: rec.lsn };
|
||||
let lsn = rec.lsn;
|
||||
let key = CacheKey { tag, lsn };
|
||||
|
||||
let content = CacheEntryContent {
|
||||
page_image: None,
|
||||
@@ -533,8 +559,8 @@ impl PageCache {
|
||||
let mut val_buf = BytesMut::new();
|
||||
content.pack(&mut val_buf);
|
||||
|
||||
trace!("put_wal_record lsn: {}", key.lsn);
|
||||
let _res = self.db.put(&key_buf[..], &val_buf[..]);
|
||||
//trace!("put_wal_record lsn: {}", lsn);
|
||||
|
||||
self.num_entries.fetch_add(1, Ordering::Relaxed);
|
||||
self.num_wal_records.fetch_add(1, Ordering::Relaxed);
|
||||
@@ -599,17 +625,19 @@ impl PageCache {
|
||||
let mut shared = self.shared.lock().unwrap();
|
||||
|
||||
// Can't move backwards.
|
||||
//assert!(lsn >= shared.last_valid_lsn);
|
||||
if lsn > shared.last_valid_lsn {
|
||||
let oldlsn = shared.last_valid_lsn;
|
||||
if lsn >= oldlsn {
|
||||
shared.last_valid_lsn = lsn;
|
||||
self.valid_lsn_condvar.notify_all();
|
||||
|
||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||
} else {
|
||||
trace!(
|
||||
"lsn={}, shared.last_valid_lsn={}",
|
||||
lsn,
|
||||
shared.last_valid_lsn
|
||||
warn!(
|
||||
"attempted to move last valid LSN backwards (was {:X}/{:X}, new {:X}/{:X})",
|
||||
oldlsn >> 32,
|
||||
oldlsn & 0xffffffff,
|
||||
lsn >> 32,
|
||||
lsn & 0xffffffff
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,29 +7,43 @@
|
||||
// *status* -- show actual info about this pageserver,
|
||||
// *pagestream* -- enter mode where smgr and pageserver talk with their
|
||||
// custom protocol.
|
||||
// *callmemaybe $url* -- ask pageserver to start walreceiver on $url
|
||||
// *callmemaybe <zenith timelineid> $url* -- ask pageserver to start walreceiver on $url
|
||||
//
|
||||
|
||||
use byteorder::{BigEndian, ByteOrder};
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use log::*;
|
||||
use regex::Regex;
|
||||
use std::io;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt, BufWriter};
|
||||
use tokio::net::{TcpListener, TcpStream};
|
||||
use tokio::runtime;
|
||||
use tokio::runtime::Runtime;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::task;
|
||||
|
||||
use crate::basebackup;
|
||||
use crate::page_cache;
|
||||
use crate::restore_local_repo;
|
||||
use crate::walreceiver;
|
||||
use crate::PageServerConf;
|
||||
use crate::ZTimelineId;
|
||||
|
||||
type Result<T> = std::result::Result<T, io::Error>;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum FeMessage {
|
||||
StartupMessage(FeStartupMessage),
|
||||
Query(FeQueryMessage),
|
||||
Query(FeQueryMessage), // Simple query
|
||||
Parse(FeParseMessage), // Extended query protocol
|
||||
Describe(FeDescribeMessage),
|
||||
Bind(FeBindMessage),
|
||||
Execute(FeExecuteMessage),
|
||||
Close(FeCloseMessage),
|
||||
Sync,
|
||||
Terminate,
|
||||
|
||||
//
|
||||
@@ -45,6 +59,11 @@ enum BeMessage {
|
||||
AuthenticationOk,
|
||||
ReadyForQuery,
|
||||
RowDescription,
|
||||
ParseComplete,
|
||||
ParameterDescription,
|
||||
NoData,
|
||||
BindComplete,
|
||||
CloseComplete,
|
||||
DataRow,
|
||||
CommandComplete,
|
||||
ControlFile,
|
||||
@@ -141,6 +160,176 @@ struct FeQueryMessage {
|
||||
body: Bytes,
|
||||
}
|
||||
|
||||
// We only support the simple case of Parse on unnamed prepared statement and
|
||||
// no params
|
||||
#[derive(Debug)]
|
||||
struct FeParseMessage {
|
||||
query_string: Bytes,
|
||||
}
|
||||
|
||||
fn read_null_terminated(buf: &mut Bytes) -> Result<Bytes> {
|
||||
let mut result = BytesMut::new();
|
||||
|
||||
loop {
|
||||
if !buf.has_remaining() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"no null-terminator in string",
|
||||
));
|
||||
}
|
||||
|
||||
let byte = buf.get_u8();
|
||||
|
||||
if byte == 0 {
|
||||
break;
|
||||
}
|
||||
result.put_u8(byte);
|
||||
}
|
||||
return Ok(result.freeze());
|
||||
}
|
||||
|
||||
impl FeParseMessage {
|
||||
pub fn parse(body: Bytes) -> Result<FeMessage> {
|
||||
let mut buf = body.clone();
|
||||
let _pstmt_name = read_null_terminated(&mut buf)?;
|
||||
let query_string = read_null_terminated(&mut buf)?;
|
||||
let nparams = buf.get_i16();
|
||||
|
||||
// FIXME: the rust-postgres driver uses a named prepared statement
|
||||
// for copy_out(). We're not prepared to handle that correctly. For
|
||||
// now, just ignore the statement name, assuming that the client never
|
||||
// uses more than one prepared statement at a time.
|
||||
/*
|
||||
if pstmt_name.len() != 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"named prepared statements not implemented in Parse",
|
||||
));
|
||||
}
|
||||
*/
|
||||
|
||||
if nparams != 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"query params not implemented",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(FeMessage::Parse(FeParseMessage { query_string }))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FeDescribeMessage {
|
||||
kind: u8, // 'S' to describe a prepared statement; or 'P' to describe a portal.
|
||||
// we only support unnamed prepared stmt or portal
|
||||
}
|
||||
|
||||
impl FeDescribeMessage {
|
||||
pub fn parse(body: Bytes) -> Result<FeMessage> {
|
||||
let mut buf = body.clone();
|
||||
let kind = buf.get_u8();
|
||||
let _pstmt_name = read_null_terminated(&mut buf)?;
|
||||
|
||||
// FIXME: see FeParseMessage::parse
|
||||
/*
|
||||
if pstmt_name.len() != 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"named prepared statements not implemented in Describe",
|
||||
));
|
||||
}
|
||||
*/
|
||||
|
||||
if kind != b'S' {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"only prepared statmement Describe is implemented",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(FeMessage::Describe(FeDescribeMessage { kind }))
|
||||
}
|
||||
}
|
||||
|
||||
// we only support unnamed prepared stmt or portal
|
||||
#[derive(Debug)]
|
||||
struct FeExecuteMessage {
|
||||
/// max # of rows
|
||||
maxrows: i32,
|
||||
}
|
||||
|
||||
impl FeExecuteMessage {
|
||||
pub fn parse(body: Bytes) -> Result<FeMessage> {
|
||||
let mut buf = body.clone();
|
||||
let portal_name = read_null_terminated(&mut buf)?;
|
||||
let maxrows = buf.get_i32();
|
||||
|
||||
if portal_name.len() != 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"named portals not implemented",
|
||||
));
|
||||
}
|
||||
|
||||
if maxrows != 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"row limit in Execute message not supported",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(FeMessage::Execute(FeExecuteMessage { maxrows }))
|
||||
}
|
||||
}
|
||||
|
||||
// we only support unnamed prepared stmt and portal
|
||||
#[derive(Debug)]
|
||||
struct FeBindMessage {}
|
||||
|
||||
impl FeBindMessage {
|
||||
pub fn parse(body: Bytes) -> Result<FeMessage> {
|
||||
let mut buf = body.clone();
|
||||
let portal_name = read_null_terminated(&mut buf)?;
|
||||
let _pstmt_name = read_null_terminated(&mut buf)?;
|
||||
|
||||
if portal_name.len() != 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"named portals not implemented",
|
||||
));
|
||||
}
|
||||
|
||||
// FIXME: see FeParseMessage::parse
|
||||
/*
|
||||
if pstmt_name.len() != 0 {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"named prepared statements not implemented",
|
||||
));
|
||||
}
|
||||
*/
|
||||
|
||||
Ok(FeMessage::Bind(FeBindMessage {}))
|
||||
}
|
||||
}
|
||||
|
||||
// we only support unnamed prepared stmt and portal
|
||||
#[derive(Debug)]
|
||||
struct FeCloseMessage {}
|
||||
|
||||
impl FeCloseMessage {
|
||||
pub fn parse(body: Bytes) -> Result<FeMessage> {
|
||||
let mut buf = body.clone();
|
||||
let _kind = buf.get_u8();
|
||||
let _pstmt_or_portal_name = read_null_terminated(&mut buf)?;
|
||||
|
||||
// FIXME: we do nothing with Close
|
||||
|
||||
Ok(FeMessage::Close(FeCloseMessage {}))
|
||||
}
|
||||
}
|
||||
|
||||
impl FeMessage {
|
||||
pub fn parse(buf: &mut BytesMut) -> Result<Option<FeMessage>> {
|
||||
if buf.len() < 5 {
|
||||
@@ -169,10 +358,16 @@ impl FeMessage {
|
||||
let mut body = buf.split_to(total_len);
|
||||
body.advance(5);
|
||||
|
||||
let mut body = body.freeze();
|
||||
|
||||
match tag {
|
||||
b'Q' => Ok(Some(FeMessage::Query(FeQueryMessage {
|
||||
body: body.freeze(),
|
||||
}))),
|
||||
b'Q' => Ok(Some(FeMessage::Query(FeQueryMessage { body: body }))),
|
||||
b'P' => Ok(Some(FeParseMessage::parse(body)?)),
|
||||
b'D' => Ok(Some(FeDescribeMessage::parse(body)?)),
|
||||
b'E' => Ok(Some(FeExecuteMessage::parse(body)?)),
|
||||
b'B' => Ok(Some(FeBindMessage::parse(body)?)),
|
||||
b'C' => Ok(Some(FeCloseMessage::parse(body)?)),
|
||||
b'S' => Ok(Some(FeMessage::Sync)),
|
||||
b'X' => Ok(Some(FeMessage::Terminate)),
|
||||
b'd' => {
|
||||
let smgr_tag = body.get_u8();
|
||||
@@ -210,24 +405,35 @@ impl FeMessage {
|
||||
pub fn thread_main(conf: &PageServerConf) {
|
||||
// Create a new thread pool
|
||||
//
|
||||
// FIXME: keep it single-threaded for now, make it easier to debug with gdb,
|
||||
// and we're not concerned with performance yet.
|
||||
//let runtime = runtime::Runtime::new().unwrap();
|
||||
let runtime = runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap();
|
||||
// FIXME: It would be nice to keep this single-threaded for debugging purposes,
|
||||
// but that currently leads to a deadlock: if a GetPage@LSN request arrives
|
||||
// for an LSN that hasn't been received yet, the thread gets stuck waiting for
|
||||
// the WAL to arrive. If the WAL receiver hasn't been launched yet, i.e
|
||||
// we haven't received a "callmemaybe" request yet to tell us where to get the
|
||||
// WAL, we will not have a thread available to process the "callmemaybe"
|
||||
// request when it does arrive. Using a thread pool alleviates the problem so
|
||||
// that it doesn't happen in the tests anymore, but in principle it could still
|
||||
// happen if we receive enough GetPage@LSN requests to consume all of the
|
||||
// available threads.
|
||||
//let runtime = runtime::Builder::new_current_thread().enable_all().build().unwrap();
|
||||
let runtime = runtime::Runtime::new().unwrap();
|
||||
|
||||
info!("Starting page server on {}", conf.listen_addr);
|
||||
|
||||
runtime.block_on(async {
|
||||
let runtime_ref = Arc::new(runtime);
|
||||
|
||||
runtime_ref.clone().block_on(async {
|
||||
let listener = TcpListener::bind(conf.listen_addr).await.unwrap();
|
||||
|
||||
loop {
|
||||
let (socket, peer_addr) = listener.accept().await.unwrap();
|
||||
debug!("accepted connection from {}", peer_addr);
|
||||
<<<<<<< HEAD
|
||||
socket.set_nodelay(true).unwrap();
|
||||
let mut conn_handler = Connection::new(conf.clone(), socket);
|
||||
=======
|
||||
let mut conn_handler = Connection::new(conf.clone(), socket, &runtime_ref);
|
||||
>>>>>>> main
|
||||
|
||||
task::spawn(async move {
|
||||
if let Err(err) = conn_handler.run().await {
|
||||
@@ -244,15 +450,17 @@ struct Connection {
|
||||
buffer: BytesMut,
|
||||
init_done: bool,
|
||||
conf: PageServerConf,
|
||||
runtime: Arc<Runtime>,
|
||||
}
|
||||
|
||||
impl Connection {
|
||||
pub fn new(conf: PageServerConf, socket: TcpStream) -> Connection {
|
||||
pub fn new(conf: PageServerConf, socket: TcpStream, runtime: &Arc<Runtime>) -> Connection {
|
||||
Connection {
|
||||
stream: BufWriter::new(socket),
|
||||
buffer: BytesMut::with_capacity(10 * 1024),
|
||||
init_done: false,
|
||||
conf,
|
||||
runtime: Arc::clone(runtime),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -300,6 +508,33 @@ impl Connection {
|
||||
self.stream.write_u8(b'I').await?;
|
||||
}
|
||||
|
||||
BeMessage::ParseComplete => {
|
||||
self.stream.write_u8(b'1').await?;
|
||||
self.stream.write_i32(4).await?;
|
||||
}
|
||||
|
||||
BeMessage::BindComplete => {
|
||||
self.stream.write_u8(b'2').await?;
|
||||
self.stream.write_i32(4).await?;
|
||||
}
|
||||
|
||||
BeMessage::CloseComplete => {
|
||||
self.stream.write_u8(b'3').await?;
|
||||
self.stream.write_i32(4).await?;
|
||||
}
|
||||
|
||||
BeMessage::NoData => {
|
||||
self.stream.write_u8(b'n').await?;
|
||||
self.stream.write_i32(4).await?;
|
||||
}
|
||||
|
||||
BeMessage::ParameterDescription => {
|
||||
self.stream.write_u8(b't').await?;
|
||||
self.stream.write_i32(6).await?;
|
||||
// we don't support params, so always 0
|
||||
self.stream.write_i16(0).await?;
|
||||
}
|
||||
|
||||
BeMessage::RowDescription => {
|
||||
// XXX
|
||||
let mut b = Bytes::from("data\0");
|
||||
@@ -389,8 +624,11 @@ impl Connection {
|
||||
}
|
||||
|
||||
async fn run(&mut self) -> Result<()> {
|
||||
let mut unnamed_query_string = Bytes::new();
|
||||
loop {
|
||||
match self.read_message().await? {
|
||||
let msg = self.read_message().await?;
|
||||
info!("got message {:?}", msg);
|
||||
match msg {
|
||||
Some(FeMessage::StartupMessage(m)) => {
|
||||
trace!("got message {:?}", m);
|
||||
|
||||
@@ -410,7 +648,28 @@ impl Connection {
|
||||
}
|
||||
}
|
||||
Some(FeMessage::Query(m)) => {
|
||||
self.process_query(&m).await?;
|
||||
self.process_query(m.body).await?;
|
||||
}
|
||||
Some(FeMessage::Parse(m)) => {
|
||||
unnamed_query_string = m.query_string;
|
||||
self.write_message(&BeMessage::ParseComplete).await?;
|
||||
}
|
||||
Some(FeMessage::Describe(_)) => {
|
||||
self.write_message_noflush(&BeMessage::ParameterDescription)
|
||||
.await?;
|
||||
self.write_message(&BeMessage::NoData).await?;
|
||||
}
|
||||
Some(FeMessage::Bind(_)) => {
|
||||
self.write_message(&BeMessage::BindComplete).await?;
|
||||
}
|
||||
Some(FeMessage::Close(_)) => {
|
||||
self.write_message(&BeMessage::CloseComplete).await?;
|
||||
}
|
||||
Some(FeMessage::Execute(_)) => {
|
||||
self.process_query(unnamed_query_string.clone()).await?;
|
||||
}
|
||||
Some(FeMessage::Sync) => {
|
||||
self.write_message(&BeMessage::ReadyForQuery).await?;
|
||||
}
|
||||
Some(FeMessage::Terminate) => {
|
||||
break;
|
||||
@@ -419,7 +678,8 @@ impl Connection {
|
||||
info!("connection closed");
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
x => {
|
||||
error!("unexpected message type : {:?}", x);
|
||||
return Err(io::Error::new(io::ErrorKind::Other, "unexpected message"));
|
||||
}
|
||||
}
|
||||
@@ -428,41 +688,62 @@ impl Connection {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn process_query(&mut self, q: &FeQueryMessage) -> Result<()> {
|
||||
trace!("got query {:?}", q.body);
|
||||
async fn process_query(&mut self, query_string: Bytes) -> Result<()> {
|
||||
debug!("process query {:?}", query_string);
|
||||
|
||||
if q.body.starts_with(b"controlfile") {
|
||||
// remove null terminator, if any
|
||||
let mut query_string = query_string.clone();
|
||||
if query_string.last() == Some(&0) {
|
||||
query_string.truncate(query_string.len() - 1);
|
||||
}
|
||||
|
||||
if query_string.starts_with(b"controlfile") {
|
||||
self.handle_controlfile().await
|
||||
} else if q.body.starts_with(b"pagestream ") {
|
||||
let (_l, r) = q.body.split_at("pagestream ".len());
|
||||
let mut r = r.to_vec();
|
||||
r.pop();
|
||||
let sysid = String::from_utf8(r).unwrap().trim().to_string();
|
||||
let sysid: u64 = sysid.parse().unwrap(); // XXX
|
||||
} else if query_string.starts_with(b"pagestream ") {
|
||||
let (_l, r) = query_string.split_at("pagestream ".len());
|
||||
let timelineid_str = String::from_utf8(r.to_vec()).unwrap();
|
||||
let timelineid = ZTimelineId::from_str(&timelineid_str).unwrap();
|
||||
|
||||
self.handle_pagerequests(sysid).await
|
||||
} else if q.body.starts_with(b"callmemaybe ") {
|
||||
let (_l, r) = q.body.split_at("callmemaybe ".len());
|
||||
let mut r = r.to_vec();
|
||||
r.pop();
|
||||
let connstr = String::from_utf8(r).unwrap().trim().to_string();
|
||||
self.handle_pagerequests(timelineid).await
|
||||
} else if query_string.starts_with(b"basebackup ") {
|
||||
let (_l, r) = query_string.split_at("basebackup ".len());
|
||||
let r = r.to_vec();
|
||||
let timelineid_str = String::from(String::from_utf8(r).unwrap().trim_end());
|
||||
info!("got basebackup command: \"{}\"", timelineid_str);
|
||||
let timelineid = ZTimelineId::from_str(&timelineid_str).unwrap();
|
||||
|
||||
let conf_copy = self.conf.clone();
|
||||
let _walreceiver_thread = thread::Builder::new()
|
||||
.name("WAL receiver thread".into())
|
||||
.spawn(move || {
|
||||
walreceiver::thread_main(&conf_copy, &connstr);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// generic ack:
|
||||
self.write_message_noflush(&BeMessage::RowDescription)
|
||||
.await?;
|
||||
self.write_message_noflush(&BeMessage::DataRow).await?;
|
||||
// Check that the timeline exists
|
||||
self.handle_basebackup_request(timelineid).await?;
|
||||
self.write_message_noflush(&BeMessage::CommandComplete)
|
||||
.await?;
|
||||
self.write_message(&BeMessage::ReadyForQuery).await
|
||||
} else if q.body.starts_with(b"status") {
|
||||
} else if query_string.starts_with(b"callmemaybe ") {
|
||||
let query_str = String::from_utf8(query_string.to_vec())
|
||||
.unwrap()
|
||||
.to_string();
|
||||
|
||||
// callmemaybe <zenith timelineid as hex string> <connstr>
|
||||
let re = Regex::new(r"^callmemaybe ([[:xdigit:]]+) (.*)$").unwrap();
|
||||
let caps = re.captures(&query_str);
|
||||
let caps = caps.unwrap();
|
||||
|
||||
let timelineid = ZTimelineId::from_str(caps.get(1).unwrap().as_str().clone()).unwrap();
|
||||
let connstr: String = String::from(caps.get(2).unwrap().as_str());
|
||||
|
||||
// Check that the timeline exists
|
||||
let pcache = page_cache::get_or_restore_pagecache(&self.conf, timelineid);
|
||||
if pcache.is_err() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!("client requested callmemaybe on timeline {} which does not exist in page server", timelineid)));
|
||||
}
|
||||
|
||||
walreceiver::launch_wal_receiver(&self.conf, timelineid, &connstr);
|
||||
|
||||
self.write_message_noflush(&BeMessage::CommandComplete)
|
||||
.await?;
|
||||
self.write_message(&BeMessage::ReadyForQuery).await
|
||||
} else if query_string.starts_with(b"status") {
|
||||
self.write_message_noflush(&BeMessage::RowDescription)
|
||||
.await?;
|
||||
self.write_message_noflush(&BeMessage::DataRow).await?;
|
||||
@@ -488,7 +769,16 @@ impl Connection {
|
||||
self.write_message(&BeMessage::ReadyForQuery).await
|
||||
}
|
||||
|
||||
async fn handle_pagerequests(&mut self, sysid: u64) -> Result<()> {
|
||||
async fn handle_pagerequests(&mut self, timelineid: ZTimelineId) -> Result<()> {
|
||||
// Check that the timeline exists
|
||||
let pcache = page_cache::get_or_restore_pagecache(&self.conf, timelineid);
|
||||
if pcache.is_err() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!("client requested pagestream on timeline {} which does not exist in page server", timelineid)));
|
||||
}
|
||||
let pcache = pcache.unwrap();
|
||||
|
||||
/* switch client to COPYBOTH */
|
||||
self.stream.write_u8(b'W').await?;
|
||||
self.stream.write_i32(4 + 1 + 2).await?;
|
||||
@@ -496,15 +786,21 @@ impl Connection {
|
||||
self.stream.write_i16(0).await?; /* numAttributes */
|
||||
self.stream.flush().await?;
|
||||
|
||||
let pcache = page_cache::get_pagecache(&self.conf, sysid);
|
||||
|
||||
loop {
|
||||
let message = self.read_message().await?;
|
||||
<<<<<<< HEAD
|
||||
/*
|
||||
if let Some(m) = &message {
|
||||
trace!("query({}): {:?}", sysid, m);
|
||||
};
|
||||
*/
|
||||
=======
|
||||
|
||||
if let Some(m) = &message {
|
||||
info!("query({:?}): {:?}", timelineid, m);
|
||||
};
|
||||
|
||||
>>>>>>> main
|
||||
if message.is_none() {
|
||||
// connection was closed
|
||||
return Ok(());
|
||||
@@ -573,8 +869,140 @@ impl Connection {
|
||||
|
||||
self.write_message(&msg).await?
|
||||
}
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
Some(FeMessage::ZenithCreateRequest(req)) => {
|
||||
let tag = page_cache::RelTag {
|
||||
spcnode: req.spcnode,
|
||||
dbnode: req.dbnode,
|
||||
relnode: req.relnode,
|
||||
forknum: req.forknum,
|
||||
};
|
||||
|
||||
pcache.relsize_inc(&tag, 0);
|
||||
|
||||
self.write_message(&BeMessage::ZenithStatusResponse(ZenithStatusResponse {
|
||||
ok: true,
|
||||
n_blocks: 0,
|
||||
}))
|
||||
.await?
|
||||
}
|
||||
Some(FeMessage::ZenithExtendRequest(req)) => {
|
||||
let tag = page_cache::RelTag {
|
||||
spcnode: req.spcnode,
|
||||
dbnode: req.dbnode,
|
||||
relnode: req.relnode,
|
||||
forknum: req.forknum,
|
||||
};
|
||||
|
||||
pcache.relsize_inc(&tag, req.blkno + 1);
|
||||
|
||||
self.write_message(&BeMessage::ZenithStatusResponse(ZenithStatusResponse {
|
||||
ok: true,
|
||||
n_blocks: 0,
|
||||
}))
|
||||
.await?
|
||||
}
|
||||
>>>>>>> main
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_basebackup_request(&mut self, timelineid: ZTimelineId) -> Result<()> {
|
||||
// check that the timeline exists
|
||||
let pcache = page_cache::get_or_restore_pagecache(&self.conf, timelineid);
|
||||
if pcache.is_err() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!("client requested basebackup on timeline {} which does not exist in page server", timelineid)));
|
||||
}
|
||||
|
||||
/* switch client to COPYOUT */
|
||||
let stream = &mut self.stream;
|
||||
stream.write_u8(b'H').await?;
|
||||
stream.write_i32(4 + 1 + 2).await?;
|
||||
stream.write_u8(0).await?; /* copy_is_binary */
|
||||
stream.write_i16(0).await?; /* numAttributes */
|
||||
stream.flush().await?;
|
||||
info!("sent CopyOut");
|
||||
|
||||
/* Send a tarball of the latest snapshot on the timeline */
|
||||
|
||||
// find latest snapshot
|
||||
let snapshotlsn = restore_local_repo::find_latest_snapshot(&self.conf, timelineid).unwrap();
|
||||
|
||||
// Stream it
|
||||
let (s, mut r) = mpsc::channel(5);
|
||||
|
||||
let f_tar = task::spawn_blocking(move || {
|
||||
basebackup::send_snapshot_tarball(&mut CopyDataSink(s), timelineid, snapshotlsn)?;
|
||||
Ok(())
|
||||
});
|
||||
let f_tar2 = async {
|
||||
let joinres = f_tar.await;
|
||||
|
||||
if joinres.is_err() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
joinres.unwrap_err(),
|
||||
));
|
||||
}
|
||||
return joinres.unwrap();
|
||||
};
|
||||
|
||||
let f_pump = async move {
|
||||
loop {
|
||||
let buf = r.recv().await;
|
||||
if buf.is_none() {
|
||||
break;
|
||||
}
|
||||
let mut buf = buf.unwrap();
|
||||
|
||||
// CopyData
|
||||
stream.write_u8(b'd').await?;
|
||||
stream.write_u32((4 + buf.len()) as u32).await?;
|
||||
stream.write_all(&mut buf).await?;
|
||||
trace!("CopyData sent for {} bytes!", buf.len());
|
||||
|
||||
// FIXME: flush isn't really required, but makes it easier
|
||||
// to view in wireshark
|
||||
stream.flush().await?;
|
||||
}
|
||||
Ok(())
|
||||
};
|
||||
|
||||
tokio::try_join!(f_tar2, f_pump)?;
|
||||
|
||||
// CopyDone
|
||||
self.stream.write_u8(b'c').await?;
|
||||
self.stream.write_u32(4).await?;
|
||||
self.stream.flush().await?;
|
||||
debug!("CopyDone sent!");
|
||||
|
||||
// FIXME: I'm getting an error from the tokio copyout driver without this.
|
||||
// I think it happens when the CommandComplete, CloseComplete and ReadyForQuery
|
||||
// are sent in the same TCP packet as the CopyDone. I don't understand why.
|
||||
thread::sleep(std::time::Duration::from_secs(1));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct CopyDataSink(mpsc::Sender<Bytes>);
|
||||
|
||||
impl std::io::Write for CopyDataSink {
|
||||
fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> {
|
||||
let buf = Bytes::copy_from_slice(data);
|
||||
|
||||
if let Err(e) = self.0.blocking_send(buf) {
|
||||
return Err(io::Error::new(io::ErrorKind::Other, e));
|
||||
}
|
||||
|
||||
Ok(data.len())
|
||||
}
|
||||
fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
|
||||
// no-op
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,333 +0,0 @@
|
||||
//
|
||||
// Restore chunks from S3
|
||||
//
|
||||
// This runs once at Page Server startup. It loads all the "base images" from
|
||||
// S3 into the in-memory page cache. It also initializes the "last valid LSN"
|
||||
// in the page cache to the LSN of the base image, so that when the WAL receiver
|
||||
// is started, it starts streaming from that LSN.
|
||||
//
|
||||
|
||||
use bytes::{Buf, BytesMut};
|
||||
use log::*;
|
||||
use regex::Regex;
|
||||
use std::env;
|
||||
use std::fmt;
|
||||
|
||||
use tokio::runtime;
|
||||
|
||||
use futures::future;
|
||||
|
||||
use crate::{page_cache, pg_constants, PageServerConf};
|
||||
use std::fs;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
pub fn restore_main(conf: &PageServerConf) {
|
||||
// Create a new thread pool
|
||||
let runtime = runtime::Runtime::new().unwrap();
|
||||
|
||||
runtime.block_on(async {
|
||||
let result = restore_chunk(conf).await;
|
||||
|
||||
match result {
|
||||
Ok(_) => {
|
||||
return;
|
||||
}
|
||||
Err(err) => {
|
||||
error!("error: {}", err);
|
||||
return;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async fn restore_chunk(conf: &PageServerConf) -> Result<(), FilePathError> {
|
||||
let pgdata_base_path = env::var("PGDATA_BASE_PATH").unwrap();
|
||||
info!("Restoring from local dir...");
|
||||
|
||||
let sys_id: u64 = 42;
|
||||
let control_lsn = 0; //TODO get it from sysid
|
||||
let mut slurp_futures: Vec<_> = Vec::new();
|
||||
|
||||
for e in WalkDir::new(pgdata_base_path.clone()) {
|
||||
let entry = e.unwrap();
|
||||
|
||||
if !entry.path().is_dir() {
|
||||
let path = entry.path().to_str().unwrap();
|
||||
|
||||
let relpath = path
|
||||
.strip_prefix(&format!("{}/", pgdata_base_path))
|
||||
.unwrap();
|
||||
info!(
|
||||
"Restoring file {} relpath {}",
|
||||
entry.path().display(),
|
||||
relpath
|
||||
);
|
||||
|
||||
let parsed = parse_rel_file_path(&relpath);
|
||||
|
||||
match parsed {
|
||||
Ok(mut p) => {
|
||||
p.lsn = control_lsn;
|
||||
|
||||
let f = slurp_base_file(conf, sys_id, path.to_string(), p);
|
||||
|
||||
slurp_futures.push(f);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("unrecognized file: {} ({})", relpath, e);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
let pcache = page_cache::get_pagecache(conf, sys_id);
|
||||
pcache.init_valid_lsn(control_lsn);
|
||||
|
||||
info!("{} files to restore...", slurp_futures.len());
|
||||
|
||||
future::join_all(slurp_futures).await;
|
||||
info!("restored!");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FilePathError {
|
||||
msg: String,
|
||||
}
|
||||
|
||||
impl FilePathError {
|
||||
fn new(msg: &str) -> FilePathError {
|
||||
FilePathError {
|
||||
msg: msg.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<core::num::ParseIntError> for FilePathError {
|
||||
fn from(e: core::num::ParseIntError) -> Self {
|
||||
return FilePathError {
|
||||
msg: format!("invalid filename: {}", e),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FilePathError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "invalid filename")
|
||||
}
|
||||
}
|
||||
|
||||
fn forkname_to_forknum(forkname: Option<&str>) -> Result<u32, FilePathError> {
|
||||
match forkname {
|
||||
// "main" is not in filenames, it's implicit if the fork name is not present
|
||||
None => Ok(0),
|
||||
Some("fsm") => Ok(1),
|
||||
Some("vm") => Ok(2),
|
||||
Some("init") => Ok(3),
|
||||
Some(_) => Err(FilePathError::new("invalid forkname")),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ParsedBaseImageFileName {
|
||||
pub spcnode: u32,
|
||||
pub dbnode: u32,
|
||||
pub relnode: u32,
|
||||
pub forknum: u32,
|
||||
pub segno: u32,
|
||||
|
||||
pub lsn: u64,
|
||||
}
|
||||
|
||||
// formats:
|
||||
// <oid>
|
||||
// <oid>_<fork name>
|
||||
// <oid>.<segment number>
|
||||
// <oid>_<fork name>.<segment number>
|
||||
fn parse_filename(fname: &str) -> Result<(u32, u32, u32, u64), FilePathError> {
|
||||
let re = Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
|
||||
|
||||
let caps = re
|
||||
.captures(fname)
|
||||
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||
|
||||
let relnode_str = caps.name("relnode").unwrap().as_str();
|
||||
let relnode = u32::from_str_radix(relnode_str, 10)?;
|
||||
|
||||
let forkname_match = caps.name("forkname");
|
||||
let forkname = if forkname_match.is_none() {
|
||||
None
|
||||
} else {
|
||||
Some(forkname_match.unwrap().as_str())
|
||||
};
|
||||
let forknum = forkname_to_forknum(forkname)?;
|
||||
|
||||
let segno_match = caps.name("segno");
|
||||
let segno = if segno_match.is_none() {
|
||||
0
|
||||
} else {
|
||||
u32::from_str_radix(segno_match.unwrap().as_str(), 10)?
|
||||
};
|
||||
return Ok((relnode, forknum, segno, 0));
|
||||
}
|
||||
|
||||
fn parse_rel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathError> {
|
||||
/*
|
||||
* Relation data files can be in one of the following directories:
|
||||
*
|
||||
* global/
|
||||
* shared relations
|
||||
*
|
||||
* base/<db oid>/
|
||||
* regular relations, default tablespace
|
||||
*
|
||||
* pg_tblspc/<tblspc oid>/<tblspc version>/
|
||||
* within a non-default tablespace (the name of the directory
|
||||
* depends on version)
|
||||
*
|
||||
* And the relation data files themselves have a filename like:
|
||||
*
|
||||
* <oid>.<segment number>
|
||||
*/
|
||||
if let Some(fname) = path.strip_prefix("global/") {
|
||||
if fname.contains("pg_control") {
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
spcnode: pg_constants::GLOBALTABLESPACE_OID,
|
||||
dbnode: 0,
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_CONTROLFILE_FORKNUM,
|
||||
segno: 0,
|
||||
lsn: 0,
|
||||
});
|
||||
}
|
||||
|
||||
if fname.contains("pg_filenode") {
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
spcnode: pg_constants::GLOBALTABLESPACE_OID,
|
||||
dbnode: 0,
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_FILENODEMAP_FORKNUM,
|
||||
segno: 0,
|
||||
lsn: 0,
|
||||
});
|
||||
}
|
||||
|
||||
let (relnode, forknum, segno, lsn) = parse_filename(fname)?;
|
||||
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
spcnode: pg_constants::GLOBALTABLESPACE_OID,
|
||||
dbnode: 0,
|
||||
relnode,
|
||||
forknum,
|
||||
segno,
|
||||
lsn,
|
||||
});
|
||||
} else if let Some(dbpath) = path.strip_prefix("base/") {
|
||||
let mut s = dbpath.split("/");
|
||||
let dbnode_str = s
|
||||
.next()
|
||||
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||
let dbnode = u32::from_str_radix(dbnode_str, 10)?;
|
||||
let fname = s
|
||||
.next()
|
||||
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||
if s.next().is_some() {
|
||||
return Err(FilePathError::new("invalid relation data file name"));
|
||||
};
|
||||
|
||||
if fname.contains("pg_filenode") {
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
spcnode: pg_constants::DEFAULTTABLESPACE_OID,
|
||||
dbnode: dbnode,
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_FILENODEMAP_FORKNUM,
|
||||
segno: 0,
|
||||
lsn: 0,
|
||||
});
|
||||
}
|
||||
|
||||
let (relnode, forknum, segno, lsn) = parse_filename(fname)?;
|
||||
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
spcnode: pg_constants::DEFAULTTABLESPACE_OID,
|
||||
dbnode,
|
||||
relnode,
|
||||
forknum,
|
||||
segno,
|
||||
lsn,
|
||||
});
|
||||
} else if let Some(fname) = path.strip_prefix("pg_xact/") {
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
spcnode: 0,
|
||||
dbnode: 0,
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_XACT_FORKNUM,
|
||||
segno: u32::from_str_radix(fname, 10).unwrap(),
|
||||
lsn: 0,
|
||||
});
|
||||
} else if let Some(fname) = path.strip_prefix("pg_multixact/members/") {
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
spcnode: 0,
|
||||
dbnode: 0,
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_MXACT_MEMBERS_FORKNUM,
|
||||
segno: u32::from_str_radix(fname, 10).unwrap(),
|
||||
lsn: 0,
|
||||
});
|
||||
} else if let Some(fname) = path.strip_prefix("pg_multixact/offsets/") {
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
spcnode: 0,
|
||||
dbnode: 0,
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_MXACT_OFFSETS_FORKNUM,
|
||||
segno: u32::from_str_radix(fname, 10).unwrap(),
|
||||
lsn: 0,
|
||||
});
|
||||
} else if let Some(_) = path.strip_prefix("pg_tblspc/") {
|
||||
// TODO
|
||||
return Err(FilePathError::new("tablespaces not supported"));
|
||||
} else {
|
||||
return Err(FilePathError::new("invalid relation data file name"));
|
||||
}
|
||||
}
|
||||
|
||||
async fn slurp_base_file(
|
||||
conf: &PageServerConf,
|
||||
sys_id: u64,
|
||||
file_path: String,
|
||||
parsed: ParsedBaseImageFileName,
|
||||
) {
|
||||
info!("slurp_base_file local path {}", file_path);
|
||||
|
||||
let mut data = fs::read(file_path).unwrap();
|
||||
|
||||
// pg_filenode.map has non-standard size - 512 bytes
|
||||
// enlarge it to treat as a regular page
|
||||
if parsed.forknum == pg_constants::PG_FILENODEMAP_FORKNUM {
|
||||
data.resize(8192, 0);
|
||||
}
|
||||
|
||||
let data_bytes: &[u8] = &data;
|
||||
let mut bytes = BytesMut::from(data_bytes).freeze();
|
||||
|
||||
// FIXME: use constants (BLCKSZ)
|
||||
let mut blknum: u32 = parsed.segno * (1024 * 1024 * 1024 / 8192);
|
||||
|
||||
let pcache = page_cache::get_pagecache(conf, sys_id);
|
||||
|
||||
while bytes.remaining() >= 8192 {
|
||||
let tag = page_cache::BufferTag {
|
||||
rel: page_cache::RelTag {
|
||||
spcnode: parsed.spcnode,
|
||||
dbnode: parsed.dbnode,
|
||||
relnode: parsed.relnode,
|
||||
forknum: parsed.forknum as u8,
|
||||
},
|
||||
blknum: blknum,
|
||||
};
|
||||
|
||||
pcache.put_page_image(tag, parsed.lsn, bytes.copy_to_bytes(8192));
|
||||
|
||||
blknum += 1;
|
||||
}
|
||||
}
|
||||
490
pageserver/src/restore_local_repo.rs
Normal file
490
pageserver/src/restore_local_repo.rs
Normal file
@@ -0,0 +1,490 @@
|
||||
//
|
||||
// Restore chunks from local Zenith repository
|
||||
//
|
||||
// This runs once at Page Server startup. It loads all the "snapshots" and all
|
||||
// WAL from all timelines from the local zenith repository into the in-memory page
|
||||
// cache.
|
||||
//
|
||||
// This also initializes the "last valid LSN" in the page cache to the last LSN
|
||||
// seen in the WAL, so that when the WAL receiver is started, it starts
|
||||
// streaming from that LSN.
|
||||
//
|
||||
|
||||
use log::*;
|
||||
use regex::Regex;
|
||||
use std::fmt;
|
||||
|
||||
use std::cmp::max;
|
||||
use std::error::Error;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use std::io::Seek;
|
||||
use std::io::SeekFrom;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::Result;
|
||||
use bytes::Bytes;
|
||||
|
||||
use crate::page_cache;
|
||||
use crate::page_cache::BufferTag;
|
||||
use crate::page_cache::PageCache;
|
||||
use crate::waldecoder::WalStreamDecoder;
|
||||
use crate::PageServerConf;
|
||||
use crate::ZTimelineId;
|
||||
|
||||
// From pg_tablespace_d.h
|
||||
//
|
||||
// FIXME: we'll probably need these elsewhere too, move to some common location
|
||||
const DEFAULTTABLESPACE_OID: u32 = 1663;
|
||||
const GLOBALTABLESPACE_OID: u32 = 1664;
|
||||
|
||||
//
|
||||
// Load it all into the page cache.
|
||||
//
|
||||
pub fn restore_timeline(
|
||||
conf: &PageServerConf,
|
||||
pcache: &PageCache,
|
||||
timeline: ZTimelineId,
|
||||
) -> Result<()> {
|
||||
let timelinepath = PathBuf::from("timelines").join(timeline.to_string());
|
||||
|
||||
if !timelinepath.exists() {
|
||||
anyhow::bail!("timeline {} does not exist in the page server's repository");
|
||||
}
|
||||
|
||||
// Scan .zenith/timelines/<timeline>/snapshots
|
||||
let snapshotspath = PathBuf::from("timelines")
|
||||
.join(timeline.to_string())
|
||||
.join("snapshots");
|
||||
|
||||
let mut last_snapshot_lsn: u64 = 0;
|
||||
|
||||
for direntry in fs::read_dir(&snapshotspath).unwrap() {
|
||||
let direntry = direntry?;
|
||||
let filename = direntry.file_name().to_str().unwrap().to_owned();
|
||||
|
||||
let lsn = u64::from_str_radix(&filename, 16)?;
|
||||
last_snapshot_lsn = max(lsn, last_snapshot_lsn);
|
||||
|
||||
restore_snapshot(conf, pcache, timeline, &filename)?;
|
||||
info!("restored snapshot at {}", filename);
|
||||
}
|
||||
|
||||
if last_snapshot_lsn == 0 {
|
||||
error!(
|
||||
"could not find valid snapshot in {}",
|
||||
snapshotspath.display()
|
||||
);
|
||||
// TODO return error?
|
||||
}
|
||||
pcache.init_valid_lsn(last_snapshot_lsn);
|
||||
|
||||
restore_wal(conf, pcache, timeline, last_snapshot_lsn)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn find_latest_snapshot(_conf: &PageServerConf, timeline: ZTimelineId) -> Result<u64> {
|
||||
let snapshotspath = format!("timelines/{}/snapshots", timeline);
|
||||
|
||||
let mut last_snapshot_lsn = 0;
|
||||
for direntry in fs::read_dir(&snapshotspath).unwrap() {
|
||||
let filename = direntry.unwrap().file_name().to_str().unwrap().to_owned();
|
||||
|
||||
let lsn = u64::from_str_radix(&filename, 16)?;
|
||||
last_snapshot_lsn = max(lsn, last_snapshot_lsn);
|
||||
}
|
||||
|
||||
if last_snapshot_lsn == 0 {
|
||||
error!("could not find valid snapshot in {}", &snapshotspath);
|
||||
// TODO return error?
|
||||
}
|
||||
Ok(last_snapshot_lsn)
|
||||
}
|
||||
|
||||
fn restore_snapshot(
|
||||
conf: &PageServerConf,
|
||||
pcache: &PageCache,
|
||||
timeline: ZTimelineId,
|
||||
snapshot: &str,
|
||||
) -> Result<()> {
|
||||
let snapshotpath = PathBuf::from("timelines")
|
||||
.join(timeline.to_string())
|
||||
.join("snapshots")
|
||||
.join(snapshot);
|
||||
|
||||
// Scan 'global'
|
||||
for direntry in fs::read_dir(snapshotpath.join("global"))? {
|
||||
let direntry = direntry?;
|
||||
match direntry.file_name().to_str() {
|
||||
None => continue,
|
||||
|
||||
// These special files appear in the snapshot, but are not needed by the page server
|
||||
Some("pg_control") => continue,
|
||||
Some("pg_filenode.map") => continue,
|
||||
|
||||
// Load any relation files into the page server
|
||||
_ => restore_relfile(
|
||||
conf,
|
||||
pcache,
|
||||
timeline,
|
||||
snapshot,
|
||||
GLOBALTABLESPACE_OID,
|
||||
0,
|
||||
&direntry.path(),
|
||||
)?,
|
||||
}
|
||||
}
|
||||
|
||||
// Scan 'base'. It contains database dirs, the database OID is the filename.
|
||||
// E.g. 'base/12345', where 12345 is the database OID.
|
||||
for direntry in fs::read_dir(snapshotpath.join("base"))? {
|
||||
let direntry = direntry?;
|
||||
|
||||
let dboid = u32::from_str_radix(direntry.file_name().to_str().unwrap(), 10)?;
|
||||
|
||||
for direntry in fs::read_dir(direntry.path())? {
|
||||
let direntry = direntry?;
|
||||
match direntry.file_name().to_str() {
|
||||
None => continue,
|
||||
|
||||
// These special files appear in the snapshot, but are not needed by the page server
|
||||
Some("PG_VERSION") => continue,
|
||||
Some("pg_filenode.map") => continue,
|
||||
|
||||
// Load any relation files into the page server
|
||||
_ => restore_relfile(
|
||||
conf,
|
||||
pcache,
|
||||
timeline,
|
||||
snapshot,
|
||||
DEFAULTTABLESPACE_OID,
|
||||
dboid,
|
||||
&direntry.path(),
|
||||
)?,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Scan pg_tblspc
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn restore_relfile(
|
||||
_conf: &PageServerConf,
|
||||
pcache: &PageCache,
|
||||
_timeline: ZTimelineId,
|
||||
snapshot: &str,
|
||||
spcoid: u32,
|
||||
dboid: u32,
|
||||
path: &Path,
|
||||
) -> Result<()> {
|
||||
let lsn = u64::from_str_radix(snapshot, 16)?;
|
||||
|
||||
// Does it look like a relation file?
|
||||
|
||||
let p = parse_relfilename(path.file_name().unwrap().to_str().unwrap());
|
||||
if p.is_err() {
|
||||
let e = p.unwrap_err();
|
||||
warn!("unrecognized file in snapshot: {:?} ({})", path, e);
|
||||
return Err(e)?;
|
||||
}
|
||||
let (relnode, forknum, segno) = p.unwrap();
|
||||
|
||||
let mut file = File::open(path)?;
|
||||
let mut buf: [u8; 8192] = [0u8; 8192];
|
||||
|
||||
// FIXME: use constants (BLCKSZ)
|
||||
let mut blknum: u32 = segno * (1024 * 1024 * 1024 / 8192);
|
||||
loop {
|
||||
let r = file.read_exact(&mut buf);
|
||||
match r {
|
||||
Ok(_) => {
|
||||
let tag = page_cache::BufferTag {
|
||||
spcnode: spcoid,
|
||||
dbnode: dboid,
|
||||
relnode: relnode,
|
||||
forknum: forknum as u8,
|
||||
blknum: blknum,
|
||||
};
|
||||
pcache.put_page_image(tag, lsn, Bytes::copy_from_slice(&buf));
|
||||
/*
|
||||
if oldest_lsn == 0 || p.lsn < oldest_lsn {
|
||||
oldest_lsn = p.lsn;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
// TODO: UnexpectedEof is expected
|
||||
Err(e) => match e.kind() {
|
||||
std::io::ErrorKind::UnexpectedEof => {
|
||||
// reached EOF. That's expected.
|
||||
// FIXME: maybe check that we read the full length of the file?
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
error!("error reading file: {:?} ({})", path, e);
|
||||
break;
|
||||
}
|
||||
},
|
||||
};
|
||||
blknum += 1;
|
||||
}
|
||||
|
||||
let tag = page_cache::RelTag {
|
||||
spcnode: spcoid,
|
||||
dbnode: dboid,
|
||||
relnode: relnode,
|
||||
forknum: forknum as u8,
|
||||
};
|
||||
pcache.relsize_inc(&tag, blknum);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Scan WAL on a timeline, starting from gien LSN, and load all the records
|
||||
// into the page cache.
|
||||
fn restore_wal(
|
||||
_conf: &PageServerConf,
|
||||
pcache: &PageCache,
|
||||
timeline: ZTimelineId,
|
||||
startpoint: u64,
|
||||
) -> Result<()> {
|
||||
let walpath = format!("timelines/{}/wal", timeline);
|
||||
|
||||
let mut waldecoder = WalStreamDecoder::new(u64::from(startpoint));
|
||||
|
||||
let mut segno = XLByteToSeg(startpoint, 16 * 1024 * 1024);
|
||||
let mut offset = XLogSegmentOffset(startpoint, 16 * 1024 * 1024);
|
||||
let mut last_lsn = 0;
|
||||
loop {
|
||||
// FIXME: assume postgresql tli 1 for now
|
||||
let filename = XLogFileName(1, segno, 16 * 1024 * 1024);
|
||||
let mut path = walpath.clone() + "/" + &filename;
|
||||
|
||||
// It could be as .partial
|
||||
if !PathBuf::from(&path).exists() {
|
||||
path = path + ".partial";
|
||||
}
|
||||
|
||||
// Slurp the WAL file
|
||||
let open_result = File::open(&path);
|
||||
if let Err(e) = open_result {
|
||||
if e.kind() == std::io::ErrorKind::NotFound {
|
||||
break;
|
||||
}
|
||||
return Err(e)?;
|
||||
}
|
||||
let mut file = open_result.unwrap();
|
||||
|
||||
if offset > 0 {
|
||||
file.seek(SeekFrom::Start(offset as u64))?;
|
||||
}
|
||||
|
||||
let mut buf = Vec::new();
|
||||
let nread = file.read_to_end(&mut buf)?;
|
||||
if nread != 16 * 1024 * 1024 - offset as usize {
|
||||
// Maybe allow this for .partial files?
|
||||
error!("read only {} bytes from WAL file", nread);
|
||||
}
|
||||
waldecoder.feed_bytes(&buf);
|
||||
|
||||
let mut nrecords = 0;
|
||||
loop {
|
||||
let rec = waldecoder.poll_decode();
|
||||
if rec.is_err() {
|
||||
// Assume that an error means we've reached the end of
|
||||
// a partial WAL record. So that's ok.
|
||||
break;
|
||||
}
|
||||
if let Some((lsn, recdata)) = rec.unwrap() {
|
||||
let decoded = crate::waldecoder::decode_wal_record(recdata.clone());
|
||||
|
||||
// Put the WAL record to the page cache. We make a separate copy of
|
||||
// it for every block it modifies. (The actual WAL record is kept in
|
||||
// a Bytes, which uses a reference counter for the underlying buffer,
|
||||
// so having multiple copies of it doesn't cost that much)
|
||||
for blk in decoded.blocks.iter() {
|
||||
let tag = BufferTag {
|
||||
spcnode: blk.rnode_spcnode,
|
||||
dbnode: blk.rnode_dbnode,
|
||||
relnode: blk.rnode_relnode,
|
||||
forknum: blk.forknum as u8,
|
||||
blknum: blk.blkno,
|
||||
};
|
||||
|
||||
let rec = page_cache::WALRecord {
|
||||
lsn: lsn,
|
||||
will_init: blk.will_init || blk.apply_image,
|
||||
rec: recdata.clone(),
|
||||
};
|
||||
|
||||
pcache.put_wal_record(tag, rec);
|
||||
}
|
||||
|
||||
// Now that this record has been handled, let the page cache know that
|
||||
// it is up-to-date to this LSN
|
||||
pcache.advance_last_valid_lsn(lsn);
|
||||
last_lsn = lsn;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
nrecords += 1;
|
||||
}
|
||||
|
||||
info!("restored {} records from WAL file {}", nrecords, filename);
|
||||
|
||||
segno += 1;
|
||||
offset = 0;
|
||||
}
|
||||
info!(
|
||||
"reached end of WAL at {:X}/{:X}",
|
||||
last_lsn >> 32,
|
||||
last_lsn & 0xffffffff
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// FIXME: copied from xlog_utils.rs
|
||||
pub const XLOG_FNAME_LEN: usize = 24;
|
||||
pub type XLogRecPtr = u64;
|
||||
pub type XLogSegNo = u64;
|
||||
pub type TimeLineID = u32;
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogSegmentOffset(xlogptr: XLogRecPtr, wal_segsz_bytes: usize) -> u32 {
|
||||
return (xlogptr as u32) & (wal_segsz_bytes as u32 - 1);
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLByteToSeg(xlogptr: XLogRecPtr, wal_segsz_bytes: usize) -> XLogSegNo {
|
||||
return xlogptr / wal_segsz_bytes as u64;
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
|
||||
return format!(
|
||||
"{:>08X}{:>08X}{:>08X}",
|
||||
tli,
|
||||
logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),
|
||||
logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogSegmentsPerXLogId(wal_segsz_bytes: usize) -> XLogSegNo {
|
||||
return (0x100000000u64 / wal_segsz_bytes as u64) as XLogSegNo;
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLineID) {
|
||||
let tli = u32::from_str_radix(&fname[0..8], 16).unwrap();
|
||||
let log = u32::from_str_radix(&fname[8..16], 16).unwrap() as XLogSegNo;
|
||||
let seg = u32::from_str_radix(&fname[16..24], 16).unwrap() as XLogSegNo;
|
||||
return (log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli);
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn IsXLogFileName(fname: &str) -> bool {
|
||||
return fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit());
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn IsPartialXLogFileName(fname: &str) -> bool {
|
||||
if let Some(basefname) = fname.strip_suffix(".partial") {
|
||||
IsXLogFileName(basefname)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct FilePathError {
|
||||
msg: String,
|
||||
}
|
||||
|
||||
impl Error for FilePathError {
|
||||
fn description(&self) -> &str {
|
||||
&self.msg
|
||||
}
|
||||
}
|
||||
impl FilePathError {
|
||||
fn new(msg: &str) -> FilePathError {
|
||||
FilePathError {
|
||||
msg: msg.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<core::num::ParseIntError> for FilePathError {
|
||||
fn from(e: core::num::ParseIntError) -> Self {
|
||||
return FilePathError {
|
||||
msg: format!("invalid filename: {}", e),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FilePathError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "invalid filename")
|
||||
}
|
||||
}
|
||||
|
||||
fn forkname_to_forknum(forkname: Option<&str>) -> Result<u32, FilePathError> {
|
||||
match forkname {
|
||||
// "main" is not in filenames, it's implicit if the fork name is not present
|
||||
None => Ok(0),
|
||||
Some("fsm") => Ok(1),
|
||||
Some("vm") => Ok(2),
|
||||
Some("init") => Ok(3),
|
||||
Some(_) => Err(FilePathError::new("invalid forkname")),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ParsedBaseImageFileName {
|
||||
pub spcnode: u32,
|
||||
pub dbnode: u32,
|
||||
pub relnode: u32,
|
||||
pub forknum: u32,
|
||||
pub segno: u32,
|
||||
|
||||
pub lsn: u64,
|
||||
}
|
||||
|
||||
// formats:
|
||||
// <oid>
|
||||
// <oid>_<fork name>
|
||||
// <oid>.<segment number>
|
||||
// <oid>_<fork name>.<segment number>
|
||||
|
||||
fn parse_relfilename(fname: &str) -> Result<(u32, u32, u32), FilePathError> {
|
||||
let re = Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
|
||||
|
||||
let caps = re
|
||||
.captures(fname)
|
||||
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||
|
||||
let relnode_str = caps.name("relnode").unwrap().as_str();
|
||||
let relnode = u32::from_str_radix(relnode_str, 10)?;
|
||||
|
||||
let forkname_match = caps.name("forkname");
|
||||
let forkname = if forkname_match.is_none() {
|
||||
None
|
||||
} else {
|
||||
Some(forkname_match.unwrap().as_str())
|
||||
};
|
||||
let forknum = forkname_to_forknum(forkname)?;
|
||||
|
||||
let segno_match = caps.name("segno");
|
||||
let segno = if segno_match.is_none() {
|
||||
0
|
||||
} else {
|
||||
u32::from_str_radix(segno_match.unwrap().as_str(), 10)?
|
||||
};
|
||||
|
||||
return Ok((relnode, forknum, segno));
|
||||
}
|
||||
@@ -1,14 +1,7 @@
|
||||
//#![allow(non_upper_case_globals)]
|
||||
//#![allow(non_camel_case_types)]
|
||||
//#![allow(non_snake_case)]
|
||||
//#![allow(dead_code)]
|
||||
//include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
|
||||
use std::cmp::min;
|
||||
|
||||
use log::*;
|
||||
use std::cmp::min;
|
||||
use thiserror::Error;
|
||||
|
||||
const XLOG_BLCKSZ: u32 = 8192;
|
||||
|
||||
@@ -19,7 +12,7 @@ const WAL_SEGMENT_SIZE: u64 = 16 * 1024 * 1024;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
struct XLogPageHeaderData {
|
||||
pub struct XLogPageHeaderData {
|
||||
xlp_magic: u16, /* magic value for correctness checks */
|
||||
xlp_info: u16, /* flag bits, see below */
|
||||
xlp_tli: u32, /* TimeLineID of first record on page */
|
||||
@@ -33,7 +26,7 @@ const SizeOfXLogShortPHD: usize = 2 + 2 + 4 + 8 + 4 + 4;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
struct XLogLongPageHeaderData {
|
||||
pub struct XLogLongPageHeaderData {
|
||||
std: XLogPageHeaderData, /* standard header fields */
|
||||
xlp_sysid: u64, /* system identifier from pg_control */
|
||||
xlp_seg_size: u32, /* just as a cross-check */
|
||||
@@ -57,6 +50,13 @@ pub struct WalStreamDecoder {
|
||||
recordbuf: BytesMut,
|
||||
}
|
||||
|
||||
#[derive(Error, Debug, Clone)]
|
||||
#[error("{msg} at {lsn}")]
|
||||
pub struct WalDecodeError {
|
||||
msg: String,
|
||||
lsn: u64,
|
||||
}
|
||||
|
||||
//
|
||||
// WalRecordStream is a Stream that returns a stream of WAL records
|
||||
// FIXME: This isn't a proper rust stream
|
||||
@@ -79,40 +79,56 @@ impl WalStreamDecoder {
|
||||
self.inputbuf.extend_from_slice(buf);
|
||||
}
|
||||
|
||||
// Returns a tuple:
|
||||
// (end LSN, record)
|
||||
pub fn poll_decode(&mut self) -> Option<(u64, Bytes)> {
|
||||
/// Attempt to decode another WAL record from the input that has been fed to the
|
||||
/// decoder so far.
|
||||
///
|
||||
/// Returns one of the following:
|
||||
/// Ok((u64, Bytes)): a tuple containing the LSN of next record, and the record itself
|
||||
/// Ok(None): there is not enough data in the input buffer. Feed more by calling the `feed_bytes` function
|
||||
/// Err(WalDecodeError): an error occured while decoding, meaning the input was invalid.
|
||||
///
|
||||
pub fn poll_decode(&mut self) -> Result<Option<(u64, Bytes)>, WalDecodeError> {
|
||||
loop {
|
||||
// parse and verify page boundaries as we go
|
||||
if self.lsn % WAL_SEGMENT_SIZE == 0 {
|
||||
// parse long header
|
||||
|
||||
if self.inputbuf.remaining() < SizeOfXLogLongPHD {
|
||||
return None;
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
self.decode_XLogLongPageHeaderData();
|
||||
let hdr = self.decode_XLogLongPageHeaderData();
|
||||
if hdr.std.xlp_pageaddr != self.lsn {
|
||||
return Err(WalDecodeError {
|
||||
msg: "invalid xlog segment header".into(),
|
||||
lsn: self.lsn,
|
||||
});
|
||||
}
|
||||
// TODO: verify the remaining fields in the header
|
||||
|
||||
self.lsn += SizeOfXLogLongPHD as u64;
|
||||
|
||||
// TODO: verify the fields in the header
|
||||
|
||||
continue;
|
||||
} else if self.lsn % (XLOG_BLCKSZ as u64) == 0 {
|
||||
// parse page header
|
||||
|
||||
if self.inputbuf.remaining() < SizeOfXLogShortPHD {
|
||||
return None;
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
self.decode_XLogPageHeaderData();
|
||||
let hdr = self.decode_XLogPageHeaderData();
|
||||
if hdr.xlp_pageaddr != self.lsn {
|
||||
return Err(WalDecodeError {
|
||||
msg: "invalid xlog page header".into(),
|
||||
lsn: self.lsn,
|
||||
});
|
||||
}
|
||||
// TODO: verify the remaining fields in the header
|
||||
|
||||
self.lsn += SizeOfXLogShortPHD as u64;
|
||||
|
||||
// TODO: verify the fields in the header
|
||||
|
||||
continue;
|
||||
} else if self.padlen > 0 {
|
||||
if self.inputbuf.remaining() < self.padlen as usize {
|
||||
return None;
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// skip padding
|
||||
@@ -123,20 +139,17 @@ impl WalStreamDecoder {
|
||||
// need to have at least the xl_tot_len field
|
||||
|
||||
if self.inputbuf.remaining() < 4 {
|
||||
return None;
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// read xl_tot_len FIXME: assumes little-endian
|
||||
self.startlsn = self.lsn;
|
||||
let xl_tot_len = self.inputbuf.get_u32_le();
|
||||
if xl_tot_len < SizeOfXLogRecord {
|
||||
error!(
|
||||
"invalid xl_tot_len {} at {:X}/{:X}",
|
||||
xl_tot_len,
|
||||
self.lsn >> 32,
|
||||
self.lsn & 0xffffffff
|
||||
);
|
||||
panic!();
|
||||
return Err(WalDecodeError {
|
||||
msg: format!("invalid xl_tot_len {}", xl_tot_len),
|
||||
lsn: self.lsn,
|
||||
});
|
||||
}
|
||||
self.lsn += 4;
|
||||
|
||||
@@ -154,7 +167,7 @@ impl WalStreamDecoder {
|
||||
let n = min(self.contlen, pageleft) as usize;
|
||||
|
||||
if self.inputbuf.remaining() < n {
|
||||
return None;
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
self.recordbuf.put(self.inputbuf.split_to(n));
|
||||
@@ -182,7 +195,7 @@ impl WalStreamDecoder {
|
||||
}
|
||||
|
||||
let result = (self.lsn, recordbuf);
|
||||
return Some(result);
|
||||
return Ok(Some(result));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@@ -289,7 +302,6 @@ pub struct DecodedBkpBlock {
|
||||
const SizeOfXLogRecord: u32 = 24;
|
||||
|
||||
pub struct DecodedWALRecord {
|
||||
pub lsn: u64, // LSN at the *end* of the record
|
||||
pub xl_info: u8,
|
||||
pub xl_rmid: u8,
|
||||
pub record: Bytes, // raw XLogRecord
|
||||
@@ -364,14 +376,7 @@ pub fn decode_truncate_record(decoded: &DecodedWALRecord) -> XlSmgrTruncate {
|
||||
//
|
||||
// Routines to decode a WAL record and figure out which blocks are modified
|
||||
//
|
||||
pub fn decode_wal_record(lsn: u64, record: Bytes) -> DecodedWALRecord {
|
||||
trace!(
|
||||
"decoding record with LSN {:08X}/{:08X} ({} bytes)",
|
||||
lsn >> 32,
|
||||
lsn & 0xffff_ffff,
|
||||
record.remaining()
|
||||
);
|
||||
|
||||
pub fn decode_wal_record(record: Bytes) -> DecodedWALRecord {
|
||||
let mut buf = record.clone();
|
||||
|
||||
// FIXME: assume little-endian here
|
||||
@@ -627,7 +632,6 @@ pub fn decode_wal_record(lsn: u64, record: Bytes) -> DecodedWALRecord {
|
||||
// Since we don't care about the data payloads here, we're done.
|
||||
|
||||
return DecodedWALRecord {
|
||||
lsn,
|
||||
xl_info,
|
||||
xl_rmid,
|
||||
record,
|
||||
|
||||
@@ -10,22 +10,87 @@ use crate::page_cache;
|
||||
use crate::page_cache::{BufferTag, RelTag};
|
||||
use crate::waldecoder::*;
|
||||
use crate::PageServerConf;
|
||||
use crate::ZTimelineId;
|
||||
use anyhow::Error;
|
||||
use lazy_static::lazy_static;
|
||||
use log::*;
|
||||
use postgres_protocol::message::backend::ReplicationMessage;
|
||||
use postgres_types::PgLsn;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{Seek, SeekFrom, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Mutex;
|
||||
use std::thread;
|
||||
use tokio::runtime;
|
||||
use tokio::time::{sleep, Duration};
|
||||
use tokio_postgres::replication::{PgTimestamp, ReplicationStream};
|
||||
use tokio_postgres::{NoTls, SimpleQueryMessage, SimpleQueryRow};
|
||||
use tokio_stream::StreamExt;
|
||||
|
||||
//
|
||||
// We keep one WAL Receiver active per timeline.
|
||||
//
|
||||
struct WalReceiverEntry {
|
||||
wal_producer_connstr: String,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref WAL_RECEIVERS: Mutex<HashMap<ZTimelineId, WalReceiverEntry>> =
|
||||
Mutex::new(HashMap::new());
|
||||
}
|
||||
|
||||
// Launch a new WAL receiver, or tell one that's running about change in connection string
|
||||
pub fn launch_wal_receiver(
|
||||
conf: &PageServerConf,
|
||||
timelineid: ZTimelineId,
|
||||
wal_producer_connstr: &str,
|
||||
) {
|
||||
let mut receivers = WAL_RECEIVERS.lock().unwrap();
|
||||
|
||||
match receivers.get_mut(&timelineid) {
|
||||
Some(receiver) => {
|
||||
receiver.wal_producer_connstr = wal_producer_connstr.into();
|
||||
}
|
||||
None => {
|
||||
let receiver = WalReceiverEntry {
|
||||
wal_producer_connstr: wal_producer_connstr.into(),
|
||||
};
|
||||
receivers.insert(timelineid, receiver);
|
||||
|
||||
// Also launch a new thread to handle this connection
|
||||
let conf_copy = conf.clone();
|
||||
let _walreceiver_thread = thread::Builder::new()
|
||||
.name("WAL receiver thread".into())
|
||||
.spawn(move || {
|
||||
thread_main(&conf_copy, timelineid);
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Look up current WAL producer connection string in the hash table
|
||||
fn get_wal_producer_connstr(timelineid: ZTimelineId) -> String {
|
||||
let receivers = WAL_RECEIVERS.lock().unwrap();
|
||||
|
||||
receivers
|
||||
.get(&timelineid)
|
||||
.unwrap()
|
||||
.wal_producer_connstr
|
||||
.clone()
|
||||
}
|
||||
|
||||
//
|
||||
// This is the entry point for the WAL receiver thread.
|
||||
//
|
||||
pub fn thread_main(conf: &PageServerConf, wal_producer_connstr: &str) {
|
||||
info!("WAL receiver thread started: '{}'", wal_producer_connstr);
|
||||
fn thread_main(conf: &PageServerConf, timelineid: ZTimelineId) {
|
||||
info!(
|
||||
"WAL receiver thread started for timeline : '{}'",
|
||||
timelineid
|
||||
);
|
||||
|
||||
let runtime = runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
@@ -34,7 +99,10 @@ pub fn thread_main(conf: &PageServerConf, wal_producer_connstr: &str) {
|
||||
|
||||
runtime.block_on(async {
|
||||
loop {
|
||||
let res = walreceiver_main(conf, wal_producer_connstr).await;
|
||||
// Look up the current WAL producer address
|
||||
let wal_producer_connstr = get_wal_producer_connstr(timelineid);
|
||||
|
||||
let res = walreceiver_main(conf, timelineid, &wal_producer_connstr).await;
|
||||
|
||||
if let Err(e) = res {
|
||||
info!(
|
||||
@@ -47,7 +115,11 @@ pub fn thread_main(conf: &PageServerConf, wal_producer_connstr: &str) {
|
||||
});
|
||||
}
|
||||
|
||||
async fn walreceiver_main(conf: &PageServerConf, wal_producer_connstr: &str) -> Result<(), Error> {
|
||||
async fn walreceiver_main(
|
||||
conf: &PageServerConf,
|
||||
timelineid: ZTimelineId,
|
||||
wal_producer_connstr: &str,
|
||||
) -> Result<(), Error> {
|
||||
// Connect to the database in replication mode.
|
||||
info!("connecting to {:?}", wal_producer_connstr);
|
||||
let connect_cfg = format!("{} replication=true", wal_producer_connstr);
|
||||
@@ -67,7 +139,7 @@ async fn walreceiver_main(conf: &PageServerConf, wal_producer_connstr: &str) ->
|
||||
let end_of_wal = u64::from(identify.xlogpos);
|
||||
let mut caught_up = false;
|
||||
|
||||
let pcache = page_cache::get_pagecache(conf, identify.systemid);
|
||||
let pcache = page_cache::get_pagecache(&conf, timelineid).unwrap();
|
||||
|
||||
//
|
||||
// Start streaming the WAL, from where we left off previously.
|
||||
@@ -95,9 +167,10 @@ async fn walreceiver_main(conf: &PageServerConf, wal_producer_connstr: &str) ->
|
||||
}
|
||||
}
|
||||
debug!(
|
||||
"starting replication from {:X}/{:X}, server is at {:X}/{:X}...",
|
||||
"starting replication from {:X}/{:X} for timeline {}, server is at {:X}/{:X}...",
|
||||
(startpoint >> 32),
|
||||
(startpoint & 0xffffffff),
|
||||
timelineid,
|
||||
(end_of_wal >> 32),
|
||||
(end_of_wal & 0xffffffff)
|
||||
);
|
||||
@@ -120,6 +193,13 @@ async fn walreceiver_main(conf: &PageServerConf, wal_producer_connstr: &str) ->
|
||||
let startlsn = xlog_data.wal_start();
|
||||
let endlsn = startlsn + data.len() as u64;
|
||||
|
||||
write_wal_file(
|
||||
startlsn,
|
||||
timelineid,
|
||||
16 * 1024 * 1024, // FIXME
|
||||
data,
|
||||
)?;
|
||||
|
||||
trace!(
|
||||
"received XLogData between {:X}/{:X} and {:X}/{:X}",
|
||||
(startlsn >> 32),
|
||||
@@ -131,8 +211,8 @@ async fn walreceiver_main(conf: &PageServerConf, wal_producer_connstr: &str) ->
|
||||
waldecoder.feed_bytes(data);
|
||||
|
||||
loop {
|
||||
if let Some((lsn, recdata)) = waldecoder.poll_decode() {
|
||||
let decoded = decode_wal_record(startlsn, recdata.clone());
|
||||
if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
|
||||
let decoded = decode_wal_record(recdata.clone());
|
||||
|
||||
// Put the WAL record to the page cache. We make a separate copy of
|
||||
// it for every block it modifies. (The actual WAL record is kept in
|
||||
@@ -184,7 +264,7 @@ async fn walreceiver_main(conf: &PageServerConf, wal_producer_connstr: &str) ->
|
||||
}
|
||||
// Now that this record has been handled, let the page cache know that
|
||||
// it is up-to-date to this LSN
|
||||
pcache.advance_last_valid_lsn(lsn);
|
||||
pcache.advance_last_record_lsn(lsn);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
@@ -286,3 +366,152 @@ pub async fn identify_system(client: &tokio_postgres::Client) -> Result<Identify
|
||||
Err(IdentifyError)?
|
||||
}
|
||||
}
|
||||
|
||||
pub const XLOG_FNAME_LEN: usize = 24;
|
||||
pub const XLOG_BLCKSZ: usize = 8192;
|
||||
pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
|
||||
pub const XLOG_PAGE_MAGIC: u16 = 0xD109;
|
||||
pub const XLP_REM_LEN_OFFS: usize = 2 + 2 + 4 + 8;
|
||||
pub const XLOG_SIZE_OF_XLOG_SHORT_PHD: usize = XLP_REM_LEN_OFFS + 4 + 4;
|
||||
pub const XLOG_SIZE_OF_XLOG_LONG_PHD: usize = XLOG_SIZE_OF_XLOG_SHORT_PHD + 8 + 4 + 4;
|
||||
pub const XLOG_RECORD_CRC_OFFS: usize = 4 + 4 + 8 + 1 + 1 + 2;
|
||||
pub const XLOG_SIZE_OF_XLOG_RECORD: usize = XLOG_RECORD_CRC_OFFS + 4;
|
||||
pub type XLogRecPtr = u64;
|
||||
pub type TimeLineID = u32;
|
||||
pub type TimestampTz = u64;
|
||||
pub type XLogSegNo = u64;
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogSegmentOffset(xlogptr: XLogRecPtr, wal_segsz_bytes: usize) -> u32 {
|
||||
return (xlogptr as u32) & (wal_segsz_bytes as u32 - 1);
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogSegmentsPerXLogId(wal_segsz_bytes: usize) -> XLogSegNo {
|
||||
return (0x100000000u64 / wal_segsz_bytes as u64) as XLogSegNo;
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLByteToSeg(xlogptr: XLogRecPtr, wal_segsz_bytes: usize) -> XLogSegNo {
|
||||
return xlogptr / wal_segsz_bytes as u64;
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogSegNoOffsetToRecPtr(
|
||||
segno: XLogSegNo,
|
||||
offset: u32,
|
||||
wal_segsz_bytes: usize,
|
||||
) -> XLogRecPtr {
|
||||
return segno * (wal_segsz_bytes as u64) + (offset as u64);
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
|
||||
return format!(
|
||||
"{:>08X}{:>08X}{:>08X}",
|
||||
tli,
|
||||
logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),
|
||||
logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLineID) {
|
||||
let tli = u32::from_str_radix(&fname[0..8], 16).unwrap();
|
||||
let log = u32::from_str_radix(&fname[8..16], 16).unwrap() as XLogSegNo;
|
||||
let seg = u32::from_str_radix(&fname[16..24], 16).unwrap() as XLogSegNo;
|
||||
return (log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli);
|
||||
}
|
||||
|
||||
fn write_wal_file(
|
||||
startpos: XLogRecPtr,
|
||||
timeline: ZTimelineId,
|
||||
wal_seg_size: usize,
|
||||
buf: &[u8],
|
||||
) -> anyhow::Result<()> {
|
||||
let mut bytes_left: usize = buf.len();
|
||||
let mut bytes_written: usize = 0;
|
||||
let mut partial;
|
||||
let mut start_pos = startpos;
|
||||
const ZERO_BLOCK: &'static [u8] = &[0u8; XLOG_BLCKSZ];
|
||||
|
||||
let wal_dir = PathBuf::from(format!("timelines/{}/wal", timeline));
|
||||
|
||||
/* Extract WAL location for this block */
|
||||
let mut xlogoff = XLogSegmentOffset(start_pos, wal_seg_size) as usize;
|
||||
|
||||
while bytes_left != 0 {
|
||||
let bytes_to_write;
|
||||
|
||||
/*
|
||||
* If crossing a WAL boundary, only write up until we reach wal
|
||||
* segment size.
|
||||
*/
|
||||
if xlogoff + bytes_left > wal_seg_size {
|
||||
bytes_to_write = wal_seg_size - xlogoff;
|
||||
} else {
|
||||
bytes_to_write = bytes_left;
|
||||
}
|
||||
|
||||
/* Open file */
|
||||
let segno = XLByteToSeg(start_pos, wal_seg_size);
|
||||
let wal_file_name = XLogFileName(
|
||||
1, // FIXME: always use Postgres timeline 1
|
||||
segno,
|
||||
wal_seg_size,
|
||||
);
|
||||
let wal_file_path = wal_dir.join(wal_file_name.clone());
|
||||
let wal_file_partial_path = wal_dir.join(wal_file_name.clone() + ".partial");
|
||||
|
||||
{
|
||||
let mut wal_file: File;
|
||||
/* Try to open already completed segment */
|
||||
if let Ok(file) = OpenOptions::new().write(true).open(&wal_file_path) {
|
||||
wal_file = file;
|
||||
partial = false;
|
||||
} else if let Ok(file) = OpenOptions::new().write(true).open(&wal_file_partial_path) {
|
||||
/* Try to open existed partial file */
|
||||
wal_file = file;
|
||||
partial = true;
|
||||
} else {
|
||||
/* Create and fill new partial file */
|
||||
partial = true;
|
||||
match OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.open(&wal_file_partial_path)
|
||||
{
|
||||
Ok(mut file) => {
|
||||
for _ in 0..(wal_seg_size / XLOG_BLCKSZ) {
|
||||
file.write_all(&ZERO_BLOCK)?;
|
||||
}
|
||||
wal_file = file;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to open log file {:?}: {}", &wal_file_path, e);
|
||||
return Err(e.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
wal_file.seek(SeekFrom::Start(xlogoff as u64))?;
|
||||
wal_file.write_all(&buf[bytes_written..(bytes_written + bytes_to_write)])?;
|
||||
|
||||
// FIXME: Flush the file
|
||||
//wal_file.sync_all()?;
|
||||
}
|
||||
/* Write was successful, advance our position */
|
||||
bytes_written += bytes_to_write;
|
||||
bytes_left -= bytes_to_write;
|
||||
start_pos += bytes_to_write as u64;
|
||||
xlogoff += bytes_to_write;
|
||||
|
||||
/* Did we reach the end of a WAL segment? */
|
||||
if XLogSegmentOffset(start_pos, wal_seg_size) == 0 {
|
||||
xlogoff = 0;
|
||||
if partial {
|
||||
fs::rename(&wal_file_partial_path, &wal_file_path)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -21,10 +21,10 @@ use std::fs;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::prelude::*;
|
||||
use std::io::Error;
|
||||
use std::process::Stdio;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
use std::{path::PathBuf, process::Stdio};
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
use tokio::process::{Child, ChildStdin, ChildStdout, Command};
|
||||
@@ -36,6 +36,7 @@ use bytes::{BufMut, Bytes, BytesMut};
|
||||
use crate::page_cache;
|
||||
use crate::page_cache::CacheEntry;
|
||||
use crate::page_cache::WALRecord;
|
||||
use crate::ZTimelineId;
|
||||
use crate::{page_cache::BufferTag, PageServerConf};
|
||||
|
||||
static TIMEOUT: Duration = Duration::from_secs(20);
|
||||
@@ -43,8 +44,8 @@ static TIMEOUT: Duration = Duration::from_secs(20);
|
||||
//
|
||||
// Main entry point for the WAL applicator thread.
|
||||
//
|
||||
pub fn wal_redo_main(conf: &PageServerConf, sys_id: u64) {
|
||||
info!("WAL redo thread started {}", sys_id);
|
||||
pub fn wal_redo_main(conf: &PageServerConf, timelineid: ZTimelineId) {
|
||||
info!("WAL redo thread started {}", timelineid);
|
||||
|
||||
// We block on waiting for requests on the walredo request channel, but
|
||||
// use async I/O to communicate with the child process. Initialize the
|
||||
@@ -54,15 +55,15 @@ pub fn wal_redo_main(conf: &PageServerConf, sys_id: u64) {
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let pcache = page_cache::get_pagecache(conf, sys_id);
|
||||
let pcache = page_cache::get_pagecache(conf, timelineid).unwrap();
|
||||
|
||||
// Loop forever, handling requests as they come.
|
||||
let walredo_channel_receiver = &pcache.walredo_receiver;
|
||||
loop {
|
||||
let mut process: WalRedoProcess;
|
||||
let datadir = conf.data_dir.join(format!("wal-redo/{}", sys_id));
|
||||
let datadir = format!("wal-redo/{}", timelineid);
|
||||
|
||||
info!("launching WAL redo postgres process {}", sys_id);
|
||||
info!("launching WAL redo postgres process {}", timelineid);
|
||||
{
|
||||
let _guard = runtime.enter();
|
||||
process = WalRedoProcess::launch(&datadir, &runtime).unwrap();
|
||||
@@ -148,13 +149,13 @@ impl WalRedoProcess {
|
||||
// Tests who run pageserver binary are setting proper PG_BIN_DIR
|
||||
// and PG_LIB_DIR so that WalRedo would start right postgres. We may later
|
||||
// switch to setting same things in pageserver config file.
|
||||
fn launch(datadir: &PathBuf, runtime: &Runtime) -> Result<WalRedoProcess, Error> {
|
||||
fn launch(datadir: &str, runtime: &Runtime) -> Result<WalRedoProcess, Error> {
|
||||
// Create empty data directory for wal-redo postgres deleting old one.
|
||||
fs::remove_dir_all(datadir.to_str().unwrap()).ok();
|
||||
fs::remove_dir_all(datadir).ok();
|
||||
let initdb = runtime
|
||||
.block_on(
|
||||
Command::new("initdb")
|
||||
.args(&["-D", datadir.to_str().unwrap()])
|
||||
.args(&["-D", datadir])
|
||||
.arg("-N")
|
||||
.output(),
|
||||
)
|
||||
@@ -180,14 +181,11 @@ impl WalRedoProcess {
|
||||
.stdin(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.stdout(Stdio::piped())
|
||||
.env("PGDATA", datadir.to_str().unwrap())
|
||||
.env("PGDATA", datadir)
|
||||
.spawn()
|
||||
.expect("postgres --wal-redo command failed to start");
|
||||
|
||||
info!(
|
||||
"launched WAL redo postgres process on {}",
|
||||
datadir.to_str().unwrap()
|
||||
);
|
||||
info!("launched WAL redo postgres process on {}", datadir);
|
||||
|
||||
let stdin = child.stdin.take().expect("failed to open child's stdin");
|
||||
let stderr = child.stderr.take().expect("failed to open child's stderr");
|
||||
|
||||
19
postgres_ffi/Cargo.toml
Normal file
19
postgres_ffi/Cargo.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "postgres_ffi"
|
||||
version = "0.1.0"
|
||||
authors = ["Heikki Linnakangas <heikki@zenith.tech>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
chrono = "0.4.19"
|
||||
rand = "0.8.3"
|
||||
bytes = "1.0.1"
|
||||
byteorder = "1.4.3"
|
||||
anyhow = "1.0"
|
||||
crc32c = "0.6.0"
|
||||
hex = "0.4.3"
|
||||
|
||||
[build-dependencies]
|
||||
bindgen = "0.53.1"
|
||||
42
postgres_ffi/build.rs
Normal file
42
postgres_ffi/build.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
extern crate bindgen;
|
||||
|
||||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn main() {
|
||||
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
||||
println!("cargo:rerun-if-changed=pg_control_ffi.h");
|
||||
|
||||
// The bindgen::Builder is the main entry point
|
||||
// to bindgen, and lets you build up options for
|
||||
// the resulting bindings.
|
||||
let bindings = bindgen::Builder::default()
|
||||
// The input header we would like to generate
|
||||
// bindings for.
|
||||
.header("pg_control_ffi.h")
|
||||
// Tell cargo to invalidate the built crate whenever any of the
|
||||
// included header files changed.
|
||||
.parse_callbacks(Box::new(bindgen::CargoCallbacks))
|
||||
.whitelist_type("ControlFileData")
|
||||
.whitelist_var("PG_CONTROL_FILE_SIZE")
|
||||
.whitelist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
|
||||
.whitelist_type("DBState")
|
||||
// Path the server include dir. It is in tmp_install/include/server, if you did
|
||||
// "configure --prefix=<path to tmp_install>". But if you used "configure --prefix=/",
|
||||
// and used DESTDIR to move it into tmp_install, then it's in
|
||||
// tmp_install/include/postgres/server (that's how the pgbuild.sh script does it).
|
||||
// 'pg_config --includedir-server' would perhaps be the more proper way to find it,
|
||||
// but this will do for now.
|
||||
.clang_arg("-I../tmp_install/include/server")
|
||||
.clang_arg("-I../tmp_install/include/postgresql/server")
|
||||
// Finish the builder and generate the bindings.
|
||||
.generate()
|
||||
// Unwrap the Result and panic on failure.
|
||||
.expect("Unable to generate bindings");
|
||||
|
||||
// Write the bindings to the $OUT_DIR/bindings.rs file.
|
||||
let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
|
||||
bindings
|
||||
.write_to_file(out_path.join("bindings.rs"))
|
||||
.expect("Couldn't write bindings!");
|
||||
}
|
||||
4
postgres_ffi/pg_control_ffi.h
Normal file
4
postgres_ffi/pg_control_ffi.h
Normal file
@@ -0,0 +1,4 @@
|
||||
#include "c.h"
|
||||
#include "catalog/pg_control.h"
|
||||
|
||||
const uint32 PG_CONTROLFILEDATA_OFFSETOF_CRC = offsetof(ControlFileData, crc);
|
||||
67
postgres_ffi/src/lib.rs
Normal file
67
postgres_ffi/src/lib.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
#![allow(non_upper_case_globals)]
|
||||
#![allow(non_camel_case_types)]
|
||||
#![allow(non_snake_case)]
|
||||
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
|
||||
// sizeof(ControlFileData)
|
||||
const SIZEOF_CONTROLDATA: usize = std::mem::size_of::<ControlFileData>();
|
||||
const OFFSETOF_CRC: usize = PG_CONTROLFILEDATA_OFFSETOF_CRC as usize;
|
||||
|
||||
impl ControlFileData {
|
||||
// Initialize an all-zeros ControlFileData struct
|
||||
pub fn new() -> ControlFileData {
|
||||
let controlfile: ControlFileData;
|
||||
|
||||
let b = [0u8; SIZEOF_CONTROLDATA];
|
||||
controlfile =
|
||||
unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) };
|
||||
|
||||
return controlfile;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_pg_control(buf: Bytes) -> Result<ControlFileData, anyhow::Error> {
|
||||
let mut b: [u8; SIZEOF_CONTROLDATA] = [0u8; SIZEOF_CONTROLDATA];
|
||||
buf.clone().copy_to_slice(&mut b);
|
||||
|
||||
let controlfile: ControlFileData;
|
||||
|
||||
// TODO: verify CRC
|
||||
let mut data_without_crc: [u8; OFFSETOF_CRC] = [0u8; OFFSETOF_CRC];
|
||||
data_without_crc.copy_from_slice(&b[0..OFFSETOF_CRC]);
|
||||
let expectedcrc = crc32c::crc32c(&data_without_crc);
|
||||
|
||||
controlfile = unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) };
|
||||
|
||||
if expectedcrc != controlfile.crc {
|
||||
anyhow::bail!(
|
||||
"invalid CRC in control file: expected {:08X}, was {:08X}",
|
||||
expectedcrc,
|
||||
controlfile.crc
|
||||
);
|
||||
}
|
||||
|
||||
Ok(controlfile)
|
||||
}
|
||||
|
||||
pub fn encode_pg_control(controlfile: ControlFileData) -> Bytes {
|
||||
let b: [u8; SIZEOF_CONTROLDATA];
|
||||
|
||||
b = unsafe { std::mem::transmute::<ControlFileData, [u8; SIZEOF_CONTROLDATA]>(controlfile) };
|
||||
|
||||
// Recompute the CRC
|
||||
let mut data_without_crc: [u8; OFFSETOF_CRC] = [0u8; OFFSETOF_CRC];
|
||||
data_without_crc.copy_from_slice(&b[0..OFFSETOF_CRC]);
|
||||
let newcrc = crc32c::crc32c(&data_without_crc);
|
||||
|
||||
let mut buf = BytesMut::with_capacity(PG_CONTROL_FILE_SIZE as usize);
|
||||
|
||||
buf.extend_from_slice(&b[0..OFFSETOF_CRC]);
|
||||
buf.extend_from_slice(&newcrc.to_ne_bytes());
|
||||
// Fill the rest of the control file with zeros.
|
||||
buf.resize(PG_CONTROL_FILE_SIZE as usize, 0);
|
||||
|
||||
return buf.into();
|
||||
}
|
||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: 9f9aa9c300...b898ad7e3b
@@ -34,3 +34,6 @@ postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||
anyhow = "1.0"
|
||||
crc32c = "0.6.0"
|
||||
|
||||
# FIXME: 'pageserver' is needed for ZTimelineId. Refactor
|
||||
pageserver = { path = "../pageserver" }
|
||||
|
||||
@@ -9,6 +9,7 @@ use std::path::PathBuf;
|
||||
use std::thread;
|
||||
use std::{fs::File, fs::OpenOptions};
|
||||
|
||||
use anyhow::Result;
|
||||
use clap::{App, Arg};
|
||||
|
||||
use slog::Drain;
|
||||
@@ -16,7 +17,7 @@ use slog::Drain;
|
||||
use walkeeper::wal_service;
|
||||
use walkeeper::WalAcceptorConf;
|
||||
|
||||
fn main() -> Result<(), io::Error> {
|
||||
fn main() -> Result<()> {
|
||||
let arg_matches = App::new("Zenith wal_acceptor")
|
||||
.about("Store WAL stream to local file system and push it to WAL receivers")
|
||||
.arg(
|
||||
@@ -26,6 +27,13 @@ fn main() -> Result<(), io::Error> {
|
||||
.takes_value(true)
|
||||
.help("Path to the WAL acceptor data directory"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("systemid")
|
||||
.long("systemid")
|
||||
.takes_value(true)
|
||||
.required(true)
|
||||
.help("PostgreSQL system id, from pg_control"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("listen")
|
||||
.short("l")
|
||||
@@ -56,16 +64,23 @@ fn main() -> Result<(), io::Error> {
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let systemid_str = arg_matches.value_of("systemid").unwrap();
|
||||
let systemid: u64 = systemid_str.parse()?;
|
||||
|
||||
let mut conf = WalAcceptorConf {
|
||||
data_dir: PathBuf::from("./"),
|
||||
systemid: systemid,
|
||||
daemonize: false,
|
||||
no_sync: false,
|
||||
pageserver_addr: None,
|
||||
listen_addr: "127.0.0.1:5454".parse().unwrap(),
|
||||
listen_addr: "127.0.0.1:5454".parse()?,
|
||||
};
|
||||
|
||||
if let Some(dir) = arg_matches.value_of("datadir") {
|
||||
conf.data_dir = PathBuf::from(dir);
|
||||
|
||||
// change into the data directory.
|
||||
std::env::set_current_dir(&conf.data_dir)?;
|
||||
}
|
||||
|
||||
if arg_matches.is_present("no-sync") {
|
||||
@@ -87,7 +102,7 @@ fn main() -> Result<(), io::Error> {
|
||||
start_wal_acceptor(conf)
|
||||
}
|
||||
|
||||
fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<(), io::Error> {
|
||||
fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
|
||||
// Initialize logger
|
||||
let _scope_guard = init_logging(&conf)?;
|
||||
let _log_guard = slog_stdlog::init().unwrap();
|
||||
@@ -98,20 +113,20 @@ fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<(), io::Error> {
|
||||
info!("daemonizing...");
|
||||
|
||||
// There should'n be any logging to stdin/stdout. Redirect it to the main log so
|
||||
// that we will see any accidental manual fpritf's or backtraces.
|
||||
// that we will see any accidental manual fprintf's or backtraces.
|
||||
let stdout = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(conf.data_dir.join("wal_acceptor.log"))
|
||||
.open("wal_acceptor.log")
|
||||
.unwrap();
|
||||
let stderr = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(conf.data_dir.join("wal_acceptor.log"))
|
||||
.open("wal_acceptor.log")
|
||||
.unwrap();
|
||||
|
||||
let daemonize = Daemonize::new()
|
||||
.pid_file(conf.data_dir.join("wal_acceptor.pid"))
|
||||
.pid_file("wal_acceptor.pid")
|
||||
.working_directory(Path::new("."))
|
||||
.stdout(stdout)
|
||||
.stderr(stderr);
|
||||
|
||||
@@ -6,9 +6,12 @@ mod pq_protocol;
|
||||
pub mod wal_service;
|
||||
pub mod xlog_utils;
|
||||
|
||||
use crate::pq_protocol::SystemId;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WalAcceptorConf {
|
||||
pub data_dir: PathBuf,
|
||||
pub systemid: SystemId,
|
||||
pub daemonize: bool,
|
||||
pub no_sync: bool,
|
||||
pub listen_addr: SocketAddr,
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
use byteorder::{BigEndian, ByteOrder};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use pageserver::ZTimelineId;
|
||||
use std::io;
|
||||
use std::str;
|
||||
use std::str::FromStr;
|
||||
|
||||
pub type Oid = u32;
|
||||
pub type SystemId = u64;
|
||||
@@ -37,7 +39,7 @@ pub enum BeMessage<'a> {
|
||||
pub struct FeStartupMessage {
|
||||
pub version: u32,
|
||||
pub kind: StartupRequestCode,
|
||||
pub system_id: SystemId,
|
||||
pub timelineid: ZTimelineId,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -83,26 +85,33 @@ impl FeStartupMessage {
|
||||
let params_str = str::from_utf8(¶ms_bytes).unwrap();
|
||||
let params = params_str.split('\0');
|
||||
let mut options = false;
|
||||
let mut system_id: u64 = 0;
|
||||
let mut timelineid: Option<ZTimelineId> = None;
|
||||
for p in params {
|
||||
if p == "options" {
|
||||
options = true;
|
||||
} else if options {
|
||||
for opt in p.split(' ') {
|
||||
if opt.starts_with("system.id=") {
|
||||
system_id = opt[10..].parse::<u64>().unwrap();
|
||||
if opt.starts_with("ztimelineid=") {
|
||||
// FIXME: rethrow parsing error, don't unwrap
|
||||
timelineid = Some(ZTimelineId::from_str(&opt[12..]).unwrap());
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if timelineid.is_none() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"timelineid is required",
|
||||
));
|
||||
}
|
||||
|
||||
buf.advance(len as usize);
|
||||
Ok(Some(FeMessage::StartupMessage(FeStartupMessage {
|
||||
version,
|
||||
kind,
|
||||
system_id,
|
||||
timelineid: timelineid.unwrap(),
|
||||
})))
|
||||
}
|
||||
}
|
||||
@@ -146,20 +155,20 @@ impl<'a> BeMessage<'a> {
|
||||
|
||||
BeMessage::RowDescription(rows) => {
|
||||
buf.put_u8(b'T');
|
||||
let total_len: u32 = rows
|
||||
.iter()
|
||||
.fold(0, |acc, row| acc + row.name.len() as u32 + 3 * (4 + 2));
|
||||
buf.put_u32(4 + 2 + total_len);
|
||||
|
||||
let mut body = BytesMut::new();
|
||||
body.put_i16(rows.len() as i16); // # of fields
|
||||
for row in rows.iter() {
|
||||
buf.put_i16(row.name.len() as i16);
|
||||
buf.put_slice(row.name);
|
||||
buf.put_i32(0); /* table oid */
|
||||
buf.put_i16(0); /* attnum */
|
||||
buf.put_u32(row.typoid);
|
||||
buf.put_i16(row.typlen);
|
||||
buf.put_i32(-1); /* typmod */
|
||||
buf.put_i16(0); /* format code */
|
||||
body.put_slice(row.name);
|
||||
body.put_i32(0); /* table oid */
|
||||
body.put_i16(0); /* attnum */
|
||||
body.put_u32(row.typoid);
|
||||
body.put_i16(row.typlen);
|
||||
body.put_i32(-1); /* typmod */
|
||||
body.put_i16(0); /* format code */
|
||||
}
|
||||
buf.put_i32((4 + body.len()) as i32); // # of bytes, including len field itself
|
||||
buf.put(body);
|
||||
}
|
||||
|
||||
BeMessage::DataRow(vals) => {
|
||||
|
||||
@@ -33,6 +33,7 @@ use tokio_postgres::{connect, Error, NoTls};
|
||||
use crate::pq_protocol::*;
|
||||
use crate::xlog_utils::*;
|
||||
use crate::WalAcceptorConf;
|
||||
use pageserver::ZTimelineId;
|
||||
|
||||
type FullTransactionId = u64;
|
||||
|
||||
@@ -64,7 +65,8 @@ struct ServerInfo {
|
||||
protocol_version: u32, /* proxy-safekeeper protocol version */
|
||||
pg_version: u32, /* Postgres server version */
|
||||
node_id: NodeId,
|
||||
system_id: SystemId, /* Postgres system identifier */
|
||||
system_id: SystemId,
|
||||
timeline_id: ZTimelineId, /* Zenith timelineid */
|
||||
wal_end: XLogRecPtr,
|
||||
timeline: TimeLineID,
|
||||
wal_seg_size: u32,
|
||||
@@ -146,8 +148,8 @@ struct SharedState {
|
||||
* Database instance (tenant)
|
||||
*/
|
||||
#[derive(Debug)]
|
||||
pub struct System {
|
||||
id: SystemId,
|
||||
pub struct Timeline {
|
||||
timelineid: ZTimelineId,
|
||||
mutex: Mutex<SharedState>,
|
||||
cond: Notify, /* conditional variable used to notify wal senders */
|
||||
}
|
||||
@@ -157,7 +159,7 @@ pub struct System {
|
||||
*/
|
||||
#[derive(Debug)]
|
||||
struct Connection {
|
||||
system: Option<Arc<System>>,
|
||||
timeline: Option<Arc<Timeline>>,
|
||||
stream: TcpStream, /* Postgres connection */
|
||||
inbuf: BytesMut, /* input buffer */
|
||||
outbuf: BytesMut, /* output buffer */
|
||||
@@ -211,6 +213,7 @@ impl Serializer for ServerInfo {
|
||||
buf.put_u32_le(self.pg_version);
|
||||
self.node_id.pack(buf);
|
||||
buf.put_u64_le(self.system_id);
|
||||
buf.put_slice(&self.timeline_id.as_arr());
|
||||
buf.put_u64_le(self.wal_end);
|
||||
buf.put_u32_le(self.timeline);
|
||||
buf.put_u32_le(self.wal_seg_size);
|
||||
@@ -221,6 +224,7 @@ impl Serializer for ServerInfo {
|
||||
pg_version: buf.get_u32_le(),
|
||||
node_id: NodeId::unpack(buf),
|
||||
system_id: buf.get_u64_le(),
|
||||
timeline_id: ZTimelineId::get_from_buf(buf),
|
||||
wal_end: buf.get_u64_le(),
|
||||
timeline: buf.get_u32_le(),
|
||||
wal_seg_size: buf.get_u32_le(),
|
||||
@@ -278,6 +282,7 @@ impl SafeKeeperInfo {
|
||||
pg_version: UNKNOWN_SERVER_VERSION, /* Postgres server version */
|
||||
node_id: NodeId { term: 0, uuid: 0 },
|
||||
system_id: 0, /* Postgres system identifier */
|
||||
timeline_id: ZTimelineId::from([0u8; 16]),
|
||||
wal_end: 0,
|
||||
timeline: 0,
|
||||
wal_seg_size: 0,
|
||||
@@ -349,7 +354,8 @@ impl Serializer for SafeKeeperResponse {
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref SYSTEMS: Mutex<HashMap<SystemId, Arc<System>>> = Mutex::new(HashMap::new());
|
||||
pub static ref TIMELINES: Mutex<HashMap<ZTimelineId, Arc<Timeline>>> =
|
||||
Mutex::new(HashMap::new());
|
||||
}
|
||||
|
||||
pub fn thread_main(conf: WalAcceptorConf) {
|
||||
@@ -366,7 +372,7 @@ pub fn thread_main(conf: WalAcceptorConf) {
|
||||
info!("Starting wal acceptor on {}", conf.listen_addr);
|
||||
|
||||
runtime.block_on(async {
|
||||
let _unused = main_loop(&conf).await;
|
||||
main_loop(&conf).await.unwrap();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -389,8 +395,8 @@ async fn main_loop(conf: &WalAcceptorConf) -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
impl System {
|
||||
pub fn new(id: SystemId) -> System {
|
||||
impl Timeline {
|
||||
pub fn new(timelineid: ZTimelineId) -> Timeline {
|
||||
let shared_state = SharedState {
|
||||
commit_lsn: 0,
|
||||
info: SafeKeeperInfo::new(),
|
||||
@@ -401,8 +407,8 @@ impl System {
|
||||
catalog_xmin: u64::MAX,
|
||||
},
|
||||
};
|
||||
System {
|
||||
id,
|
||||
Timeline {
|
||||
timelineid,
|
||||
mutex: Mutex::new(shared_state),
|
||||
cond: Notify::new(),
|
||||
}
|
||||
@@ -443,12 +449,23 @@ impl System {
|
||||
return shared_state.hs_feedback;
|
||||
}
|
||||
|
||||
// Load and lock control file (prevent running more than one instance of safekeeper
|
||||
fn load_control_file(&self, conf: &WalAcceptorConf) {
|
||||
// Load and lock control file (prevent running more than one instance of safekeeper)
|
||||
fn load_control_file(&self, conf: &WalAcceptorConf) -> Result<()> {
|
||||
let mut shared_state = self.mutex.lock().unwrap();
|
||||
|
||||
if shared_state.control_file.is_some() {
|
||||
info!(
|
||||
"control file for timeline {} is already open",
|
||||
self.timelineid
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let control_file_path = conf
|
||||
.data_dir
|
||||
.join(self.id.to_string())
|
||||
.join(self.timelineid.to_string())
|
||||
.join(CONTROL_FILE_NAME);
|
||||
info!("loading control file {}", control_file_path.display());
|
||||
match OpenOptions::new()
|
||||
.read(true)
|
||||
.write(true)
|
||||
@@ -460,13 +477,13 @@ impl System {
|
||||
match file.try_lock_exclusive() {
|
||||
Ok(()) => {}
|
||||
Err(e) => {
|
||||
panic!(
|
||||
io_error!(
|
||||
"Control file {:?} is locked by some other process: {}",
|
||||
&control_file_path, e
|
||||
&control_file_path,
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
let mut shared_state = self.mutex.lock().unwrap();
|
||||
shared_state.control_file = Some(file);
|
||||
|
||||
const SIZE: usize = mem::size_of::<SafeKeeperInfo>();
|
||||
@@ -483,12 +500,13 @@ impl System {
|
||||
let my_info = SafeKeeperInfo::unpack(&mut input);
|
||||
|
||||
if my_info.magic != SK_MAGIC {
|
||||
panic!("Invalid control file magic: {}", my_info.magic);
|
||||
io_error!("Invalid control file magic: {}", my_info.magic);
|
||||
}
|
||||
if my_info.format_version != SK_FORMAT_VERSION {
|
||||
panic!(
|
||||
io_error!(
|
||||
"Incompatible format version: {} vs. {}",
|
||||
my_info.format_version, SK_FORMAT_VERSION
|
||||
my_info.format_version,
|
||||
SK_FORMAT_VERSION
|
||||
);
|
||||
}
|
||||
shared_state.info = my_info;
|
||||
@@ -501,6 +519,7 @@ impl System {
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn save_control_file(&self, sync: bool) -> Result<()> {
|
||||
@@ -521,7 +540,7 @@ impl System {
|
||||
impl Connection {
|
||||
pub fn new(socket: TcpStream, conf: &WalAcceptorConf) -> Connection {
|
||||
Connection {
|
||||
system: None,
|
||||
timeline: None,
|
||||
stream: socket,
|
||||
inbuf: BytesMut::with_capacity(10 * 1024),
|
||||
outbuf: BytesMut::with_capacity(10 * 1024),
|
||||
@@ -530,8 +549,8 @@ impl Connection {
|
||||
}
|
||||
}
|
||||
|
||||
fn system(&self) -> Arc<System> {
|
||||
self.system.as_ref().unwrap().clone()
|
||||
fn timeline(&self) -> Arc<Timeline> {
|
||||
self.timeline.as_ref().unwrap().clone()
|
||||
}
|
||||
|
||||
async fn run(&mut self) -> Result<()> {
|
||||
@@ -563,10 +582,15 @@ impl Connection {
|
||||
"no_user",
|
||||
);
|
||||
let callme = format!(
|
||||
"callmemaybe host={} port={} replication=1 options='-c system.id={}'",
|
||||
"callmemaybe {} host={} port={} options='-c ztimelineid={}'",
|
||||
self.timeline().timelineid,
|
||||
self.conf.listen_addr.ip(),
|
||||
self.conf.listen_addr.port(),
|
||||
self.system().get_info().server.system_id,
|
||||
self.timeline().timelineid
|
||||
);
|
||||
info!(
|
||||
"requesting page server to connect to us: start {} {}",
|
||||
ps_connstr, callme
|
||||
);
|
||||
let (client, connection) = connect(&ps_connstr, NoTls).await?;
|
||||
|
||||
@@ -582,22 +606,14 @@ impl Connection {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_system(&mut self, id: SystemId) -> Result<()> {
|
||||
let mut systems = SYSTEMS.lock().unwrap();
|
||||
if id == 0 {
|
||||
// non-multitenant configuration: just a single instance
|
||||
if let Some(system) = systems.values().next() {
|
||||
self.system = Some(system.clone());
|
||||
return Ok(());
|
||||
}
|
||||
io_error!("No active instances");
|
||||
fn set_timeline(&mut self, timelineid: ZTimelineId) -> Result<()> {
|
||||
let mut timelines = TIMELINES.lock().unwrap();
|
||||
if !timelines.contains_key(&timelineid) {
|
||||
info!("creating timeline dir {}", timelineid);
|
||||
fs::create_dir_all(timelineid.to_string())?;
|
||||
timelines.insert(timelineid, Arc::new(Timeline::new(timelineid)));
|
||||
}
|
||||
if !systems.contains_key(&id) {
|
||||
let system_dir = self.conf.data_dir.join(id.to_string());
|
||||
fs::create_dir_all(system_dir)?;
|
||||
systems.insert(id, Arc::new(System::new(id)));
|
||||
}
|
||||
self.system = Some(systems.get(&id).unwrap().clone());
|
||||
self.timeline = Some(timelines.get(&timelineid).unwrap().clone());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -606,14 +622,16 @@ impl Connection {
|
||||
// Receive information about server
|
||||
let server_info = self.read_req::<ServerInfo>().await?;
|
||||
info!(
|
||||
"Start handshake with wal_proposer {} sysid {}",
|
||||
"Start handshake with wal_proposer {} sysid {} timeline {}",
|
||||
self.stream.peer_addr()?,
|
||||
server_info.system_id
|
||||
server_info.system_id,
|
||||
server_info.timeline_id,
|
||||
);
|
||||
self.set_system(server_info.system_id)?;
|
||||
self.system().load_control_file(&self.conf);
|
||||
// FIXME: also check that the system identifier matches
|
||||
self.set_timeline(server_info.timeline_id)?;
|
||||
self.timeline().load_control_file(&self.conf)?;
|
||||
|
||||
let mut my_info = self.system().get_info();
|
||||
let mut my_info = self.timeline().get_info();
|
||||
|
||||
/* Check protocol compatibility */
|
||||
if server_info.protocol_version != SK_PROTOCOL_VERSION {
|
||||
@@ -662,9 +680,9 @@ impl Connection {
|
||||
);
|
||||
}
|
||||
my_info.server.node_id = prop.node_id;
|
||||
self.system().set_info(&my_info);
|
||||
self.timeline().set_info(&my_info);
|
||||
/* Need to persist our vote first */
|
||||
self.system().save_control_file(true)?;
|
||||
self.timeline().save_control_file(true)?;
|
||||
|
||||
let mut flushed_restart_lsn: XLogRecPtr = 0;
|
||||
let wal_seg_size = server_info.wal_seg_size as usize;
|
||||
@@ -678,12 +696,13 @@ impl Connection {
|
||||
// Add far as replication in postgres is initiated by receiver, we should use callme mechanism
|
||||
if let Err(e) = self.request_callback().await {
|
||||
// Do not treate it as fatal error and continue work
|
||||
// FIXME: we should retry after a while...
|
||||
error!("Failed to send callme request to pageserver: {}", e);
|
||||
}
|
||||
|
||||
info!(
|
||||
"Start streaming from server {} address {:?}",
|
||||
server_info.system_id,
|
||||
"Start streaming from timeline {} address {:?}",
|
||||
server_info.timeline_id,
|
||||
self.stream.peer_addr()?
|
||||
);
|
||||
|
||||
@@ -705,6 +724,15 @@ impl Connection {
|
||||
let rec_size = (end_pos - start_pos) as usize;
|
||||
assert!(rec_size <= MAX_SEND_SIZE);
|
||||
|
||||
debug!(
|
||||
"received for {} bytes between {:X}/{:X} and {:X}/{:X}",
|
||||
rec_size,
|
||||
start_pos >> 32,
|
||||
start_pos & 0xffffffff,
|
||||
end_pos >> 32,
|
||||
end_pos & 0xffffffff
|
||||
);
|
||||
|
||||
/* Receive message body */
|
||||
self.inbuf.resize(rec_size, 0u8);
|
||||
self.stream.read_exact(&mut self.inbuf[0..rec_size]).await?;
|
||||
@@ -735,7 +763,7 @@ impl Connection {
|
||||
* when restart_lsn delta exceeds WAL segment size.
|
||||
*/
|
||||
sync_control_file |= flushed_restart_lsn + (wal_seg_size as u64) < my_info.restart_lsn;
|
||||
self.system().save_control_file(sync_control_file)?;
|
||||
self.timeline().save_control_file(sync_control_file)?;
|
||||
|
||||
if sync_control_file {
|
||||
flushed_restart_lsn = my_info.restart_lsn;
|
||||
@@ -746,7 +774,7 @@ impl Connection {
|
||||
let resp = SafeKeeperResponse {
|
||||
epoch: my_info.epoch,
|
||||
flush_lsn: end_pos,
|
||||
hs_feedback: self.system().get_hs_feedback(),
|
||||
hs_feedback: self.timeline().get_hs_feedback(),
|
||||
};
|
||||
self.start_sending();
|
||||
resp.pack(&mut self.outbuf);
|
||||
@@ -756,7 +784,7 @@ impl Connection {
|
||||
* Ping wal sender that new data is available.
|
||||
* FlushLSN (end_pos) can be smaller than commitLSN in case we are at catching-up safekeeper.
|
||||
*/
|
||||
self.system()
|
||||
self.timeline()
|
||||
.notify_wal_senders(min(req.commit_lsn, end_pos));
|
||||
}
|
||||
Ok(())
|
||||
@@ -807,7 +835,7 @@ impl Connection {
|
||||
}
|
||||
|
||||
//
|
||||
// Send WAL to replica or WAL sender using standard libpq replication protocol
|
||||
// Send WAL to replica or WAL receiver using standard libpq replication protocol
|
||||
//
|
||||
async fn send_wal(&mut self) -> Result<()> {
|
||||
info!("WAL sender to {:?} is started", self.stream.peer_addr()?);
|
||||
@@ -828,7 +856,7 @@ impl Connection {
|
||||
BeMessage::write(&mut self.outbuf, &BeMessage::ReadyForQuery);
|
||||
self.send().await?;
|
||||
self.init_done = true;
|
||||
self.set_system(m.system_id)?;
|
||||
self.set_timeline(m.timelineid)?;
|
||||
}
|
||||
StartupRequestCode::Cancel => return Ok(()),
|
||||
}
|
||||
@@ -861,7 +889,7 @@ impl Connection {
|
||||
let (start_pos, timeline) = self.find_end_of_wal(false);
|
||||
let lsn = format!("{:X}/{:>08X}", (start_pos >> 32) as u32, start_pos as u32);
|
||||
let tli = timeline.to_string();
|
||||
let sysid = self.system().get_info().server.system_id.to_string();
|
||||
let sysid = self.timeline().get_info().server.system_id.to_string();
|
||||
let lsn_bytes = lsn.as_bytes();
|
||||
let tli_bytes = tli.as_bytes();
|
||||
let sysid_bytes = sysid.as_bytes();
|
||||
@@ -893,11 +921,11 @@ impl Connection {
|
||||
);
|
||||
BeMessage::write(
|
||||
&mut self.outbuf,
|
||||
&BeMessage::DataRow(&[Some(lsn_bytes), Some(tli_bytes), Some(sysid_bytes), None]),
|
||||
&BeMessage::DataRow(&[Some(sysid_bytes), Some(tli_bytes), Some(lsn_bytes), None]),
|
||||
);
|
||||
BeMessage::write(
|
||||
&mut self.outbuf,
|
||||
&BeMessage::CommandComplete(b"IDENTIFY_SYSTEM"),
|
||||
&BeMessage::CommandComplete(b"IDENTIFY_SYSTEM\0"),
|
||||
);
|
||||
BeMessage::write(&mut self.outbuf, &BeMessage::ReadyForQuery);
|
||||
self.send().await?;
|
||||
@@ -917,7 +945,7 @@ impl Connection {
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let wal_seg_size = self.system().get_info().server.wal_seg_size as usize;
|
||||
let wal_seg_size = self.timeline().get_info().server.wal_seg_size as usize;
|
||||
if wal_seg_size == 0 {
|
||||
io_error!("Can not start replication before connecting to wal_proposer");
|
||||
}
|
||||
@@ -935,15 +963,6 @@ impl Connection {
|
||||
BeMessage::write(&mut self.outbuf, &BeMessage::Copy);
|
||||
self.send().await?;
|
||||
|
||||
/*
|
||||
* Always start streaming at the beginning of a segment
|
||||
*
|
||||
* FIXME: It is common practice to start streaming at the beginning of
|
||||
* the segment, but it should be up to the client to decide that. We
|
||||
* shouldn't enforce that here.
|
||||
*/
|
||||
start_pos -= XLogSegmentOffset(start_pos, wal_seg_size) as u64;
|
||||
|
||||
let mut end_pos: XLogRecPtr;
|
||||
let mut commit_lsn: XLogRecPtr;
|
||||
let mut wal_file: Option<File> = None;
|
||||
@@ -960,19 +979,18 @@ impl Connection {
|
||||
end_pos = stop_pos;
|
||||
} else {
|
||||
/* normal mode */
|
||||
let timeline = self.timeline();
|
||||
loop {
|
||||
// Rust doesn't allow to grab async result from mutex scope
|
||||
let system = self.system();
|
||||
let notified = system.cond.notified();
|
||||
{
|
||||
let shared_state = system.mutex.lock().unwrap();
|
||||
let shared_state = timeline.mutex.lock().unwrap();
|
||||
commit_lsn = shared_state.commit_lsn;
|
||||
if start_pos < commit_lsn {
|
||||
end_pos = commit_lsn;
|
||||
break;
|
||||
}
|
||||
}
|
||||
notified.await;
|
||||
timeline.cond.notified().await;
|
||||
}
|
||||
}
|
||||
if end_pos == END_REPLICATION_MARKER {
|
||||
@@ -983,7 +1001,7 @@ impl Connection {
|
||||
Ok(0) => break,
|
||||
Ok(_) => match self.parse_message()? {
|
||||
Some(FeMessage::CopyData(m)) => self
|
||||
.system()
|
||||
.timeline()
|
||||
.add_hs_feedback(HotStandbyFeedback::parse(&m.body)),
|
||||
_ => {}
|
||||
},
|
||||
@@ -1004,7 +1022,7 @@ impl Connection {
|
||||
let wal_file_path = self
|
||||
.conf
|
||||
.data_dir
|
||||
.join(self.system().id.to_string())
|
||||
.join(self.timeline().timelineid.to_string())
|
||||
.join(wal_file_name.clone() + ".partial");
|
||||
if let Ok(opened_file) = File::open(&wal_file_path) {
|
||||
file = opened_file;
|
||||
@@ -1012,7 +1030,7 @@ impl Connection {
|
||||
let wal_file_path = self
|
||||
.conf
|
||||
.data_dir
|
||||
.join(self.system().id.to_string())
|
||||
.join(self.timeline().timelineid.to_string())
|
||||
.join(wal_file_name);
|
||||
match File::open(&wal_file_path) {
|
||||
Ok(opened_file) => file = opened_file,
|
||||
@@ -1034,6 +1052,8 @@ impl Connection {
|
||||
let msg_size = LIBPQ_HDR_SIZE + XLOG_HDR_SIZE + send_size;
|
||||
let data_start = LIBPQ_HDR_SIZE + XLOG_HDR_SIZE;
|
||||
let data_end = data_start + send_size;
|
||||
|
||||
file.seek(SeekFrom::Start(xlogoff as u64))?;
|
||||
file.read_exact(&mut self.outbuf[data_start..data_end])?;
|
||||
self.outbuf[0] = b'd';
|
||||
BigEndian::write_u32(
|
||||
@@ -1048,6 +1068,12 @@ impl Connection {
|
||||
self.stream.write_all(&self.outbuf[0..msg_size]).await?;
|
||||
start_pos += send_size as u64;
|
||||
|
||||
debug!(
|
||||
"Sent WAL to page server up to {:X}/{:>08X}",
|
||||
(end_pos >> 32) as u32,
|
||||
end_pos as u32
|
||||
);
|
||||
|
||||
if XLogSegmentOffset(start_pos, wal_seg_size) != 0 {
|
||||
wal_file = Some(file);
|
||||
}
|
||||
@@ -1102,12 +1128,12 @@ impl Connection {
|
||||
let wal_file_path = self
|
||||
.conf
|
||||
.data_dir
|
||||
.join(self.system().id.to_string())
|
||||
.join(self.timeline().timelineid.to_string())
|
||||
.join(wal_file_name.clone());
|
||||
let wal_file_partial_path = self
|
||||
.conf
|
||||
.data_dir
|
||||
.join(self.system().id.to_string())
|
||||
.join(self.timeline().timelineid.to_string())
|
||||
.join(wal_file_name.clone() + ".partial");
|
||||
|
||||
{
|
||||
@@ -1170,7 +1196,7 @@ impl Connection {
|
||||
fn find_end_of_wal(&self, precise: bool) -> (XLogRecPtr, TimeLineID) {
|
||||
find_end_of_wal(
|
||||
&self.conf.data_dir,
|
||||
self.system().get_info().server.wal_seg_size as usize,
|
||||
self.timeline().get_info().server.wal_seg_size as usize,
|
||||
precise,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ use log::*;
|
||||
use std::cmp::min;
|
||||
use std::fs::{self, File};
|
||||
use std::io::prelude::*;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::SystemTime;
|
||||
|
||||
pub const XLOG_FNAME_LEN: usize = 24;
|
||||
@@ -89,7 +89,7 @@ pub fn get_current_timestamp() -> TimestampTz {
|
||||
}
|
||||
|
||||
fn find_end_of_wal_segment(
|
||||
data_dir: &PathBuf,
|
||||
data_dir: &Path,
|
||||
segno: XLogSegNo,
|
||||
tli: TimeLineID,
|
||||
wal_seg_size: usize,
|
||||
@@ -185,7 +185,7 @@ fn find_end_of_wal_segment(
|
||||
}
|
||||
|
||||
pub fn find_end_of_wal(
|
||||
data_dir: &PathBuf,
|
||||
data_dir: &Path,
|
||||
wal_seg_size: usize,
|
||||
precise: bool,
|
||||
) -> (XLogRecPtr, TimeLineID) {
|
||||
|
||||
@@ -8,4 +8,10 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
clap = "2.33.0"
|
||||
anyhow = "1.0"
|
||||
|
||||
# FIXME: 'pageserver' is needed for ZTimelineId. Refactor
|
||||
pageserver = { path = "../pageserver" }
|
||||
walkeeper = { path = "../walkeeper" }
|
||||
control_plane = { path = "../control_plane" }
|
||||
postgres_ffi = { path = "../postgres_ffi" }
|
||||
|
||||
@@ -1,64 +1,94 @@
|
||||
use clap::{App, Arg, ArgMatches, SubCommand};
|
||||
use std::error;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::exit;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{anyhow, bail};
|
||||
use clap::{App, Arg, ArgMatches, SubCommand};
|
||||
|
||||
use control_plane::local_env::LocalEnv;
|
||||
use control_plane::storage::PageServerNode;
|
||||
use control_plane::{compute::ComputeControlPlane, local_env, storage};
|
||||
|
||||
type Result<T> = std::result::Result<T, Box<dyn error::Error>>;
|
||||
use pageserver::ZTimelineId;
|
||||
|
||||
fn main() {
|
||||
fn zenith_repo_dir() -> PathBuf {
|
||||
// Find repository path
|
||||
match std::env::var_os("ZENITH_REPO_DIR") {
|
||||
Some(val) => PathBuf::from(val.to_str().unwrap()),
|
||||
None => ".zenith".into(),
|
||||
}
|
||||
}
|
||||
|
||||
// Main entry point for the 'zenith' CLI utility
|
||||
//
|
||||
// This utility can used to work with a local zenith repository.
|
||||
// In order to run queries in it, you need to launch the page server,
|
||||
// and a compute node against the page server
|
||||
fn main() -> Result<()> {
|
||||
let name_arg = Arg::with_name("NAME")
|
||||
.short("n")
|
||||
.index(1)
|
||||
.help("name of this postgres instance")
|
||||
.required(true);
|
||||
let matches = App::new("zenith")
|
||||
.subcommand(SubCommand::with_name("init"))
|
||||
.subcommand(SubCommand::with_name("start"))
|
||||
.subcommand(SubCommand::with_name("stop"))
|
||||
.subcommand(SubCommand::with_name("status"))
|
||||
.about("Zenith CLI")
|
||||
.subcommand(
|
||||
SubCommand::with_name("init")
|
||||
.about("Initialize a new Zenith repository in current directory"),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("branch")
|
||||
.about("Create a new branch")
|
||||
.arg(Arg::with_name("branchname").required(false).index(1))
|
||||
.arg(Arg::with_name("start-point").required(false).index(2)),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("pageserver")
|
||||
.about("Manage pageserver instance")
|
||||
.subcommand(SubCommand::with_name("status"))
|
||||
.subcommand(SubCommand::with_name("start"))
|
||||
.subcommand(SubCommand::with_name("stop")),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("pg")
|
||||
.about("Manage postgres instances")
|
||||
.subcommand(
|
||||
SubCommand::with_name("create"), // .arg(name_arg.clone()
|
||||
// .required(false)
|
||||
// .help("name of this postgres instance (will be pgN if omitted)"))
|
||||
SubCommand::with_name("create")
|
||||
// .arg(name_arg.clone()
|
||||
// .required(false)
|
||||
// .help("name of this postgres instance (will be pgN if omitted)"))
|
||||
.arg(Arg::with_name("timeline").required(false).index(1)),
|
||||
)
|
||||
.subcommand(SubCommand::with_name("list"))
|
||||
.subcommand(SubCommand::with_name("start").arg(name_arg.clone()))
|
||||
.subcommand(SubCommand::with_name("stop").arg(name_arg.clone()))
|
||||
.subcommand(SubCommand::with_name("destroy").arg(name_arg.clone())),
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("snapshot")
|
||||
.about("Manage database snapshots")
|
||||
.subcommand(SubCommand::with_name("create"))
|
||||
.subcommand(SubCommand::with_name("start"))
|
||||
.subcommand(SubCommand::with_name("stop"))
|
||||
.subcommand(SubCommand::with_name("destroy")),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
// handle init separately and exit
|
||||
if let Some("init") = matches.subcommand_name() {
|
||||
match local_env::init() {
|
||||
Ok(_) => {
|
||||
println!("Initialization complete! You may start zenith with 'zenith start' now.");
|
||||
exit(0);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error during init: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if let ("init", Some(sub_args)) = matches.subcommand() {
|
||||
run_init_cmd(sub_args.clone())?;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// all other commands would need config
|
||||
let env = match local_env::load_config() {
|
||||
|
||||
let repopath = PathBuf::from(zenith_repo_dir());
|
||||
if !repopath.exists() {
|
||||
bail!(
|
||||
"Zenith repository does not exists in {}.\n\
|
||||
Set ZENITH_REPO_DIR or initialize a new repository with 'zenith init'",
|
||||
repopath.display()
|
||||
);
|
||||
}
|
||||
// TODO: check that it looks like a zenith repository
|
||||
let env = match local_env::load_config(&repopath) {
|
||||
Ok(conf) => conf,
|
||||
Err(e) => {
|
||||
eprintln!("Error loading config from ~/.zenith: {}", e);
|
||||
eprintln!("Error loading config from {}: {}", repopath.display(), e);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
@@ -68,6 +98,9 @@ fn main() {
|
||||
panic!() /* Should not happen. Init was handled before */
|
||||
}
|
||||
|
||||
("branch", Some(sub_args)) => run_branch_cmd(&env, sub_args.clone())?,
|
||||
("pageserver", Some(sub_args)) => run_pageserver_cmd(&env, sub_args.clone())?,
|
||||
|
||||
("start", Some(_sub_m)) => {
|
||||
let pageserver = storage::PageServerNode::from_env(&env);
|
||||
|
||||
@@ -94,15 +127,53 @@ fn main() {
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_pageserver_cmd(local_env: &LocalEnv, args: ArgMatches) -> Result<()> {
|
||||
match args.subcommand() {
|
||||
("status", Some(_sub_m)) => {
|
||||
todo!();
|
||||
}
|
||||
("start", Some(_sub_m)) => {
|
||||
let psnode = PageServerNode::from_env(local_env);
|
||||
psnode.start()?;
|
||||
println!("Page server started");
|
||||
}
|
||||
("stop", Some(_sub_m)) => {
|
||||
todo!();
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Peek into the repository, to grab the timeline ID of given branch
|
||||
pub fn get_branch_timeline(repopath: &Path, branchname: &str) -> ZTimelineId {
|
||||
let branchpath = repopath.join("refs/branches/".to_owned() + branchname);
|
||||
|
||||
ZTimelineId::from_str(&(fs::read_to_string(&branchpath).unwrap())).unwrap()
|
||||
}
|
||||
|
||||
fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let mut cplane = ComputeControlPlane::load(env.clone())?;
|
||||
|
||||
match pg_match.subcommand() {
|
||||
("create", Some(_sub_m)) => {
|
||||
cplane.new_node()?;
|
||||
("create", Some(sub_m)) => {
|
||||
// FIXME: cheat and resolve the timeline by peeking into the
|
||||
// repository. In reality, when you're launching a compute node
|
||||
// against a possibly-remote page server, we wouldn't know what
|
||||
// branches exist in the remote repository. Or would we require
|
||||
// that you "zenith fetch" them into a local repoitory first?
|
||||
let timeline_arg = sub_m.value_of("timeline").unwrap_or("main");
|
||||
let timeline = get_branch_timeline(&env.repo_path, timeline_arg);
|
||||
|
||||
println!("Initializing Postgres on timeline {}...", timeline);
|
||||
|
||||
cplane.new_node(timeline)?;
|
||||
}
|
||||
("list", Some(_sub_m)) => {
|
||||
println!("NODE\tADDRESS\tSTATUS");
|
||||
@@ -115,7 +186,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let node = cplane
|
||||
.nodes
|
||||
.get(name)
|
||||
.ok_or(format!("postgres {} is not found", name))?;
|
||||
.ok_or(anyhow!("postgres {} is not found", name))?;
|
||||
node.start()?;
|
||||
}
|
||||
("stop", Some(sub_m)) => {
|
||||
@@ -123,7 +194,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let node = cplane
|
||||
.nodes
|
||||
.get(name)
|
||||
.ok_or(format!("postgres {} is not found", name))?;
|
||||
.ok_or(anyhow!("postgres {} is not found", name))?;
|
||||
node.stop()?;
|
||||
}
|
||||
|
||||
@@ -132,3 +203,134 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// "zenith init" - Initialize a new Zenith repository in current dir
|
||||
fn run_init_cmd(_args: ArgMatches) -> Result<()> {
|
||||
local_env::init()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// handle "zenith branch" subcommand
|
||||
fn run_branch_cmd(local_env: &LocalEnv, args: ArgMatches) -> Result<()> {
|
||||
let repopath = local_env.repo_path.to_str().unwrap();
|
||||
|
||||
if let Some(branchname) = args.value_of("branchname") {
|
||||
if PathBuf::from(format!("{}/refs/branches/{}", repopath, branchname)).exists() {
|
||||
anyhow::bail!("branch {} already exists", branchname);
|
||||
}
|
||||
|
||||
if let Some(startpoint_str) = args.value_of("start-point") {
|
||||
let mut startpoint = parse_point_in_time(startpoint_str)?;
|
||||
|
||||
if startpoint.lsn == 0 {
|
||||
// Find end of WAL on the old timeline
|
||||
let end_of_wal = local_env::find_end_of_wal(local_env, startpoint.timelineid)?;
|
||||
|
||||
println!(
|
||||
"branching at end of WAL: {:X}/{:X}",
|
||||
end_of_wal >> 32,
|
||||
end_of_wal & 0xffffffff
|
||||
);
|
||||
|
||||
startpoint.lsn = end_of_wal;
|
||||
}
|
||||
|
||||
return local_env::create_branch(local_env, branchname, startpoint);
|
||||
} else {
|
||||
panic!("Missing start-point");
|
||||
}
|
||||
} else {
|
||||
// No arguments, list branches
|
||||
list_branches()?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn list_branches() -> Result<()> {
|
||||
// list branches
|
||||
let paths = fs::read_dir(zenith_repo_dir().join("refs").join("branches"))?;
|
||||
|
||||
for path in paths {
|
||||
println!(" {}", path?.file_name().to_str().unwrap());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
//
|
||||
// Parse user-given string that represents a point-in-time.
|
||||
//
|
||||
// We support multiple variants:
|
||||
//
|
||||
// Raw timeline id in hex, meaning the end of that timeline:
|
||||
// bc62e7d612d0e6fe8f99a6dd2f281f9d
|
||||
//
|
||||
// A specific LSN on a timeline:
|
||||
// bc62e7d612d0e6fe8f99a6dd2f281f9d@2/15D3DD8
|
||||
//
|
||||
// Same, with a human-friendly branch name:
|
||||
// main
|
||||
// main@2/15D3DD8
|
||||
//
|
||||
// Human-friendly tag name:
|
||||
// mytag
|
||||
//
|
||||
//
|
||||
fn parse_point_in_time(s: &str) -> Result<local_env::PointInTime> {
|
||||
let mut strings = s.split("@");
|
||||
let name = strings.next().unwrap();
|
||||
|
||||
let lsn: Option<u64>;
|
||||
if let Some(lsnstr) = strings.next() {
|
||||
let mut s = lsnstr.split("/");
|
||||
let lsn_hi: u64 = s
|
||||
.next()
|
||||
.ok_or(anyhow!("invalid LSN in point-in-time specification"))?
|
||||
.parse()?;
|
||||
let lsn_lo: u64 = s
|
||||
.next()
|
||||
.ok_or(anyhow!("invalid LSN in point-in-time specification"))?
|
||||
.parse()?;
|
||||
lsn = Some(lsn_hi << 32 | lsn_lo);
|
||||
} else {
|
||||
lsn = None
|
||||
}
|
||||
|
||||
// Check if it's a tag
|
||||
if lsn.is_none() {
|
||||
let tagpath = zenith_repo_dir().join("refs").join("tags").join(name);
|
||||
if tagpath.exists() {
|
||||
let pointstr = fs::read_to_string(tagpath)?;
|
||||
|
||||
return parse_point_in_time(&pointstr);
|
||||
}
|
||||
}
|
||||
// Check if it's a branch
|
||||
// Check if it's branch @ LSN
|
||||
let branchpath = zenith_repo_dir().join("refs").join("branches").join(name);
|
||||
if branchpath.exists() {
|
||||
let pointstr = fs::read_to_string(branchpath)?;
|
||||
|
||||
let mut result = parse_point_in_time(&pointstr)?;
|
||||
if lsn.is_some() {
|
||||
result.lsn = lsn.unwrap();
|
||||
} else {
|
||||
result.lsn = 0;
|
||||
}
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
// Check if it's a timelineid
|
||||
// Check if it's timelineid @ LSN
|
||||
let tlipath = zenith_repo_dir().join("timelines").join(name);
|
||||
if tlipath.exists() {
|
||||
let result = local_env::PointInTime {
|
||||
timelineid: ZTimelineId::from_str(name)?,
|
||||
lsn: lsn.unwrap_or(0),
|
||||
};
|
||||
|
||||
return Ok(result);
|
||||
}
|
||||
|
||||
panic!("could not parse point-in-time {}", s);
|
||||
}
|
||||
|
||||
7
zenith_utils/Cargo.toml
Normal file
7
zenith_utils/Cargo.toml
Normal file
@@ -0,0 +1,7 @@
|
||||
[package]
|
||||
name = "zenith_utils"
|
||||
version = "0.1.0"
|
||||
authors = ["Eric Seppanen <eric@zenith.tech>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
2
zenith_utils/src/lib.rs
Normal file
2
zenith_utils/src/lib.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
//! zenith_utils is intended to be a place to put code that is shared
|
||||
//! between other crates in this repository.
|
||||
Reference in New Issue
Block a user