mirror of
https://github.com/neondatabase/neon.git
synced 2026-03-18 07:40:37 +00:00
Compare commits
54 Commits
compute_no
...
issue_56
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
babd2339cc | ||
|
|
69b786040e | ||
|
|
c794f128cc | ||
|
|
220a023e51 | ||
|
|
e911427872 | ||
|
|
eb42fbadeb | ||
|
|
92e4f4b3b6 | ||
|
|
b5a5ea5831 | ||
|
|
f387769203 | ||
|
|
7f777a485e | ||
|
|
d8ab2e00cb | ||
|
|
f520ef9a64 | ||
|
|
d047a3abf7 | ||
|
|
f69db17409 | ||
|
|
3600b33f1c | ||
|
|
2c5fb6d6c8 | ||
|
|
fa5d31056b | ||
|
|
583f64768f | ||
|
|
c5d56ffe22 | ||
|
|
b451ede199 | ||
|
|
533087fd5d | ||
|
|
8879f747ee | ||
|
|
9809613c6f | ||
|
|
8d1bf152cf | ||
|
|
3725815935 | ||
|
|
b32cc6a088 | ||
|
|
3c7f810849 | ||
|
|
e03417a7c9 | ||
|
|
52d6275812 | ||
|
|
639c9e8266 | ||
|
|
35e0099ac6 | ||
|
|
4ff248515b | ||
|
|
2246b48348 | ||
|
|
e8032f26e6 | ||
|
|
d2c3ad162a | ||
|
|
b4c5cb2773 | ||
|
|
24c3e961e4 | ||
|
|
92fb7a1641 | ||
|
|
05886b33e5 | ||
|
|
d7eeaec706 | ||
|
|
1190030872 | ||
|
|
913a91c541 | ||
|
|
82dc1e82ba | ||
|
|
2e9c730dd1 | ||
|
|
6266fd102c | ||
|
|
d1d6c968d5 | ||
|
|
3c4ebc4030 | ||
|
|
46543f54a6 | ||
|
|
b07fa4c896 | ||
|
|
f35d13183e | ||
|
|
c5f379bff3 | ||
|
|
39ebec51d1 | ||
|
|
6264dc6aa3 | ||
|
|
59163cf3b3 |
6
.github/workflows/testing.yml
vendored
6
.github/workflows/testing.yml
vendored
@@ -4,6 +4,7 @@ on: [push]
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
regression-check:
|
regression-check:
|
||||||
|
timeout-minutes: 10
|
||||||
name: run regression test suite
|
name: run regression test suite
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
@@ -76,10 +77,7 @@ jobs:
|
|||||||
target
|
target
|
||||||
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
||||||
|
|
||||||
# That build is only to build dependencies and can be skipped if Cargo.lock
|
- name: Build
|
||||||
# wasn't changed. Next steps need their own build
|
|
||||||
- name: Install cargo deps
|
|
||||||
if: steps.cache_cargo.outputs.cache-hit != 'true'
|
|
||||||
run: |
|
run: |
|
||||||
cargo build
|
cargo build
|
||||||
|
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,5 @@
|
|||||||
/target
|
/target
|
||||||
/tmp_check
|
/tmp_check
|
||||||
/tmp_install
|
/tmp_install
|
||||||
|
/tmp_check_cli
|
||||||
|
.vscode
|
||||||
|
|||||||
413
Cargo.lock
generated
413
Cargo.lock
generated
@@ -162,9 +162,9 @@ checksum = "e91831deabf0d6d7ec49552e489aed63b7456a7a3c46cff62adad428110b0af0"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-trait"
|
name = "async-trait"
|
||||||
version = "0.1.48"
|
version = "0.1.49"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "36ea56748e10732c49404c153638a15ec3d6211ec5ff35d9bb20e13b93576adf"
|
checksum = "589652ce7ccb335d1e7ecb3be145425702b290dbcb7029bbeaae263fc1d87b48"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -241,6 +241,30 @@ version = "0.13.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bindgen"
|
||||||
|
version = "0.53.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c72a978d268b1d70b0e963217e60fdabd9523a941457a6c42a7315d15c7e89e5"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags",
|
||||||
|
"cexpr",
|
||||||
|
"cfg-if 0.1.10",
|
||||||
|
"clang-sys",
|
||||||
|
"clap",
|
||||||
|
"env_logger",
|
||||||
|
"lazy_static",
|
||||||
|
"lazycell",
|
||||||
|
"log",
|
||||||
|
"peeking_take_while",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"regex",
|
||||||
|
"rustc-hash",
|
||||||
|
"shlex",
|
||||||
|
"which",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bitflags"
|
name = "bitflags"
|
||||||
version = "1.2.1"
|
version = "1.2.1"
|
||||||
@@ -323,6 +347,15 @@ version = "1.0.67"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
|
checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cexpr"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27"
|
||||||
|
dependencies = [
|
||||||
|
"nom",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cfg-if"
|
name = "cfg-if"
|
||||||
version = "0.1.10"
|
version = "0.1.10"
|
||||||
@@ -348,6 +381,17 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clang-sys"
|
||||||
|
version = "0.29.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fe6837df1d5cba2397b835c8530f51723267e16abbf83892e9e5af4f0e5dd10a"
|
||||||
|
dependencies = [
|
||||||
|
"glob",
|
||||||
|
"libc",
|
||||||
|
"libloading",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "2.33.3"
|
version = "2.33.3"
|
||||||
@@ -378,6 +422,28 @@ version = "0.1.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "control_plane"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"bytes",
|
||||||
|
"fs_extra",
|
||||||
|
"hex",
|
||||||
|
"lazy_static",
|
||||||
|
"pageserver",
|
||||||
|
"postgres",
|
||||||
|
"postgres_ffi",
|
||||||
|
"rand 0.8.3",
|
||||||
|
"regex",
|
||||||
|
"serde",
|
||||||
|
"serde_derive",
|
||||||
|
"tar",
|
||||||
|
"tokio-postgres",
|
||||||
|
"toml",
|
||||||
|
"walkeeper",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "core-foundation"
|
name = "core-foundation"
|
||||||
version = "0.9.1"
|
version = "0.9.1"
|
||||||
@@ -411,9 +477,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-channel"
|
name = "crossbeam-channel"
|
||||||
version = "0.5.0"
|
version = "0.5.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
|
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if 1.0.0",
|
"cfg-if 1.0.0",
|
||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
@@ -528,6 +594,19 @@ dependencies = [
|
|||||||
"cfg-if 1.0.0",
|
"cfg-if 1.0.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "env_logger"
|
||||||
|
version = "0.7.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36"
|
||||||
|
dependencies = [
|
||||||
|
"atty",
|
||||||
|
"humantime",
|
||||||
|
"log",
|
||||||
|
"regex",
|
||||||
|
"termcolor",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "event-listener"
|
name = "event-listener"
|
||||||
version = "2.5.1"
|
version = "2.5.1"
|
||||||
@@ -549,6 +628,18 @@ dependencies = [
|
|||||||
"instant",
|
"instant",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "filetime"
|
||||||
|
version = "0.2.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1d34cfa13a63ae058bfa601fe9e313bbdb3746427c1459185464ce0fcf62e1e8"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if 1.0.0",
|
||||||
|
"libc",
|
||||||
|
"redox_syscall 0.2.6",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fnv"
|
name = "fnv"
|
||||||
version = "1.0.7"
|
version = "1.0.7"
|
||||||
@@ -591,10 +682,16 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures"
|
name = "fs_extra"
|
||||||
version = "0.3.13"
|
version = "1.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7f55667319111d593ba876406af7c409c0ebb44dc4be6132a783ccf163ea14c1"
|
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures"
|
||||||
|
version = "0.3.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a9d5813545e459ad3ca1bff9915e9ad7f1a47dc6a91b627ce321d5863b7dd253"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
@@ -607,9 +704,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-channel"
|
name = "futures-channel"
|
||||||
version = "0.3.13"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8c2dd2df839b57db9ab69c2c9d8f3e8c81984781937fe2807dc6dcf3b2ad2939"
|
checksum = "ce79c6a52a299137a6013061e0cf0e688fce5d7f1bc60125f520912fdb29ec25"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-sink",
|
"futures-sink",
|
||||||
@@ -617,15 +714,15 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-core"
|
name = "futures-core"
|
||||||
version = "0.3.13"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "15496a72fabf0e62bdc3df11a59a3787429221dd0710ba8ef163d6f7a9112c94"
|
checksum = "098cd1c6dda6ca01650f1a37a794245eb73181d0d4d4e955e2f3c37db7af1815"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-executor"
|
name = "futures-executor"
|
||||||
version = "0.3.13"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "891a4b7b96d84d5940084b2a37632dd65deeae662c114ceaa2c879629c9c0ad1"
|
checksum = "10f6cb7042eda00f0049b1d2080aa4b93442997ee507eb3828e8bd7577f94c9d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-task",
|
"futures-task",
|
||||||
@@ -634,9 +731,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-io"
|
name = "futures-io"
|
||||||
version = "0.3.13"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d71c2c65c57704c32f5241c1223167c2c3294fd34ac020c807ddbe6db287ba59"
|
checksum = "365a1a1fb30ea1c03a830fdb2158f5236833ac81fa0ad12fe35b29cddc35cb04"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-lite"
|
name = "futures-lite"
|
||||||
@@ -655,9 +752,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-macro"
|
name = "futures-macro"
|
||||||
version = "0.3.13"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ea405816a5139fb39af82c2beb921d52143f556038378d6db21183a5c37fbfb7"
|
checksum = "668c6733a182cd7deb4f1de7ba3bf2120823835b3bcfbeacf7d2c4a773c1bb8b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro-hack",
|
"proc-macro-hack",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
@@ -667,21 +764,21 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.13"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "85754d98985841b7d4f5e8e6fbfa4a4ac847916893ec511a2917ccd8525b8bb3"
|
checksum = "5c5629433c555de3d82861a7a4e3794a4c40040390907cfbfd7143a92a426c23"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-task"
|
name = "futures-task"
|
||||||
version = "0.3.13"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fa189ef211c15ee602667a6fcfe1c1fd9e07d42250d2156382820fba33c9df80"
|
checksum = "ba7aa51095076f3ba6d9a1f702f74bd05ec65f555d70d2033d55ba8d69f581bc"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-util"
|
name = "futures-util"
|
||||||
version = "0.3.13"
|
version = "0.3.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1812c7ab8aedf8d6f2701a43e1243acdbcc2b36ab26e2ad421eb99ac963d96d1"
|
checksum = "3c144ad54d60f23927f0a6b6d816e4271278b64f005ad65e4e35291d2de9c025"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
@@ -729,6 +826,12 @@ dependencies = [
|
|||||||
"wasi 0.10.0+wasi-snapshot-preview1",
|
"wasi 0.10.0+wasi-snapshot-preview1",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "glob"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "gloo-timers"
|
name = "gloo-timers"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
@@ -797,9 +900,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "http"
|
name = "http"
|
||||||
version = "0.2.3"
|
version = "0.2.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7245cd7449cc792608c3c8a9eaf69bd4eabbabf802713748fd739c98b82f0747"
|
checksum = "527e8c9ac747e28542699a951517aa9a6945af506cd1f2e1b53a576c17b6cc11"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"fnv",
|
"fnv",
|
||||||
@@ -819,9 +922,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "httparse"
|
name = "httparse"
|
||||||
version = "1.3.5"
|
version = "1.3.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "615caabe2c3160b313d52ccc905335f4ed5f10881dd63dc5699d47e90be85691"
|
checksum = "bc35c995b9d93ec174cf9a27d425c7892722101e14993cd227fdb51d70cf9589"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "httpdate"
|
name = "httpdate"
|
||||||
@@ -829,6 +932,15 @@ version = "0.3.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "494b4d60369511e7dea41cf646832512a94e542f68bb9c49e54518e0f468eb47"
|
checksum = "494b4d60369511e7dea41cf646832512a94e542f68bb9c49e54518e0f468eb47"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "humantime"
|
||||||
|
version = "1.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f"
|
||||||
|
dependencies = [
|
||||||
|
"quick-error",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hyper"
|
name = "hyper"
|
||||||
version = "0.14.5"
|
version = "0.14.5"
|
||||||
@@ -846,7 +958,7 @@ dependencies = [
|
|||||||
"httpdate",
|
"httpdate",
|
||||||
"itoa",
|
"itoa",
|
||||||
"pin-project",
|
"pin-project",
|
||||||
"socket2 0.4.0",
|
"socket2",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tower-service",
|
"tower-service",
|
||||||
"tracing",
|
"tracing",
|
||||||
@@ -900,6 +1012,7 @@ dependencies = [
|
|||||||
name = "integration_tests"
|
name = "integration_tests"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"control_plane",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"pageserver",
|
"pageserver",
|
||||||
"postgres",
|
"postgres",
|
||||||
@@ -945,10 +1058,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "lazycell"
|
||||||
version = "0.2.92"
|
version = "1.3.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714"
|
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libc"
|
||||||
|
version = "0.2.93"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "libloading"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f2b111a074963af1d37a139918ac6d49ad1d0d5e47f72fd55388619691a7d753"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
@@ -1062,7 +1191,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "a19900e7eee95eb2b3c2e26d12a874cc80aaf750e31be6fcbe743ead369fa45d"
|
checksum = "a19900e7eee95eb2b3c2e26d12a874cc80aaf750e31be6fcbe743ead369fa45d"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"socket2 0.4.0",
|
"socket2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nom"
|
||||||
|
version = "5.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
"version_check",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1176,12 +1315,14 @@ dependencies = [
|
|||||||
"crc32c",
|
"crc32c",
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"daemonize",
|
"daemonize",
|
||||||
"fs2",
|
|
||||||
"futures",
|
"futures",
|
||||||
|
"hex",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"log",
|
"log",
|
||||||
"postgres",
|
"postgres",
|
||||||
"postgres-protocol",
|
"postgres-protocol",
|
||||||
|
"postgres-types",
|
||||||
|
"postgres_ffi",
|
||||||
"rand 0.8.3",
|
"rand 0.8.3",
|
||||||
"regex",
|
"regex",
|
||||||
"rust-s3",
|
"rust-s3",
|
||||||
@@ -1190,11 +1331,14 @@ dependencies = [
|
|||||||
"slog-scope",
|
"slog-scope",
|
||||||
"slog-stdlog",
|
"slog-stdlog",
|
||||||
"slog-term",
|
"slog-term",
|
||||||
|
"tar",
|
||||||
"termion",
|
"termion",
|
||||||
|
"thiserror",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-postgres",
|
"tokio-postgres",
|
||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
"tui",
|
"tui",
|
||||||
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1223,11 +1367,17 @@ dependencies = [
|
|||||||
"cfg-if 1.0.0",
|
"cfg-if 1.0.0",
|
||||||
"instant",
|
"instant",
|
||||||
"libc",
|
"libc",
|
||||||
"redox_syscall 0.2.5",
|
"redox_syscall 0.2.6",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "peeking_take_while"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "percent-encoding"
|
name = "percent-encoding"
|
||||||
version = "2.1.0"
|
version = "2.1.0"
|
||||||
@@ -1305,8 +1455,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres"
|
name = "postgres"
|
||||||
version = "0.19.0"
|
version = "0.19.1"
|
||||||
source = "git+https://github.com/kelvich/rust-postgres?branch=replication_rebase#f3425d991f75cb7b464a37e6b3d5d05f8bf51c02"
|
source = "git+https://github.com/zenithdb/rust-postgres.git?rev=a0d067b66447951d1276a53fb09886539c3fa094#a0d067b66447951d1276a53fb09886539c3fa094"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"fallible-iterator",
|
"fallible-iterator",
|
||||||
@@ -1318,8 +1468,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres-protocol"
|
name = "postgres-protocol"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
source = "git+https://github.com/kelvich/rust-postgres?branch=replication_rebase#f3425d991f75cb7b464a37e6b3d5d05f8bf51c02"
|
source = "git+https://github.com/zenithdb/rust-postgres.git?rev=a0d067b66447951d1276a53fb09886539c3fa094#a0d067b66447951d1276a53fb09886539c3fa094"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64",
|
"base64",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@@ -1335,14 +1485,28 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "postgres-types"
|
name = "postgres-types"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
source = "git+https://github.com/kelvich/rust-postgres?branch=replication_rebase#f3425d991f75cb7b464a37e6b3d5d05f8bf51c02"
|
source = "git+https://github.com/zenithdb/rust-postgres.git?rev=a0d067b66447951d1276a53fb09886539c3fa094#a0d067b66447951d1276a53fb09886539c3fa094"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"fallible-iterator",
|
"fallible-iterator",
|
||||||
"postgres-protocol",
|
"postgres-protocol",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "postgres_ffi"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"bindgen",
|
||||||
|
"byteorder",
|
||||||
|
"bytes",
|
||||||
|
"chrono",
|
||||||
|
"crc32c",
|
||||||
|
"hex",
|
||||||
|
"rand 0.8.3",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ppv-lite86"
|
name = "ppv-lite86"
|
||||||
version = "0.2.10"
|
version = "0.2.10"
|
||||||
@@ -1370,6 +1534,12 @@ dependencies = [
|
|||||||
"unicode-xid",
|
"unicode-xid",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quick-error"
|
||||||
|
version = "1.2.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quote"
|
name = "quote"
|
||||||
version = "1.0.9"
|
version = "1.0.9"
|
||||||
@@ -1468,9 +1638,9 @@ checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redox_syscall"
|
name = "redox_syscall"
|
||||||
version = "0.2.5"
|
version = "0.2.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "94341e4e44e24f6b591b59e47a8a027df12e008d73fd5672dbea9cc22f4507d9"
|
checksum = "8270314b5ccceb518e7e578952f0b72b88222d02e8f77f5ecf7abbb673539041"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags",
|
"bitflags",
|
||||||
]
|
]
|
||||||
@@ -1481,7 +1651,7 @@ version = "0.1.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8440d8acb4fd3d277125b4bd01a6f38aee8d814b3b5fc09b3f2b825d37d3fe8f"
|
checksum = "8440d8acb4fd3d277125b4bd01a6f38aee8d814b3b5fc09b3f2b825d37d3fe8f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"redox_syscall 0.2.5",
|
"redox_syscall 0.2.6",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1502,7 +1672,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64"
|
checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"getrandom 0.2.2",
|
"getrandom 0.2.2",
|
||||||
"redox_syscall 0.2.5",
|
"redox_syscall 0.2.6",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1533,9 +1703,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "reqwest"
|
name = "reqwest"
|
||||||
version = "0.11.2"
|
version = "0.11.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bf12057f289428dbf5c591c74bf10392e4a8003f993405a902f20117019022d4"
|
checksum = "2296f2fac53979e8ccbc4a1136b25dcefd37be9ed7e4a1f6b05a6029c84ff124"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64",
|
"base64",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -1591,7 +1761,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "rust-s3"
|
name = "rust-s3"
|
||||||
version = "0.27.0-beta1"
|
version = "0.27.0-beta1"
|
||||||
source = "git+https://github.com/hlinnaka/rust-s3#7f15a24ec7daa0a5d9516da706212745f9042818"
|
source = "git+https://github.com/hlinnaka/rust-s3?rev=7f15a24ec7daa0a5d9516da706212745f9042818#7f15a24ec7daa0a5d9516da706212745f9042818"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-std",
|
"async-std",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
@@ -1619,6 +1789,12 @@ dependencies = [
|
|||||||
"url",
|
"url",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc-hash"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc_version"
|
name = "rustc_version"
|
||||||
version = "0.2.3"
|
version = "0.2.3"
|
||||||
@@ -1640,6 +1816,15 @@ version = "1.0.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
|
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "same-file"
|
||||||
|
version = "1.0.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "schannel"
|
name = "schannel"
|
||||||
version = "0.1.19"
|
version = "0.1.19"
|
||||||
@@ -1759,6 +1944,12 @@ dependencies = [
|
|||||||
"opaque-debug",
|
"opaque-debug",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "shlex"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "signal-hook-registry"
|
name = "signal-hook-registry"
|
||||||
version = "1.3.0"
|
version = "1.3.0"
|
||||||
@@ -1845,17 +2036,6 @@ version = "1.6.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
|
checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "socket2"
|
|
||||||
version = "0.3.19"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "122e570113d28d773067fab24266b66753f6ea915758651696b6e35e49f88d6e"
|
|
||||||
dependencies = [
|
|
||||||
"cfg-if 1.0.0",
|
|
||||||
"libc",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "socket2"
|
name = "socket2"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
@@ -1890,9 +2070,9 @@ checksum = "1e81da0851ada1f3e9d4312c704aa4f8806f0f9d69faaf8df2f3464b4a9437c2"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.68"
|
version = "1.0.69"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3ce15dd3ed8aa2f8eeac4716d6ef5ab58b6b9256db41d7e1a0224c2788e8fd87"
|
checksum = "48fe99c6bd8b1cc636890bcc071842de909d902c81ac7dab53ba33c421ab8ffb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -1905,6 +2085,17 @@ version = "0.2.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
|
checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tar"
|
||||||
|
version = "0.4.33"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c0bcfbd6a598361fda270d82469fff3d65089dc33e175c9a131f7b4cd395f228"
|
||||||
|
dependencies = [
|
||||||
|
"filetime",
|
||||||
|
"libc",
|
||||||
|
"xattr",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tempfile"
|
name = "tempfile"
|
||||||
version = "3.2.0"
|
version = "3.2.0"
|
||||||
@@ -1914,7 +2105,7 @@ dependencies = [
|
|||||||
"cfg-if 1.0.0",
|
"cfg-if 1.0.0",
|
||||||
"libc",
|
"libc",
|
||||||
"rand 0.8.3",
|
"rand 0.8.3",
|
||||||
"redox_syscall 0.2.5",
|
"redox_syscall 0.2.6",
|
||||||
"remove_dir_all",
|
"remove_dir_all",
|
||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
@@ -1930,6 +2121,15 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "termcolor"
|
||||||
|
version = "1.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "termion"
|
name = "termion"
|
||||||
version = "1.5.6"
|
version = "1.5.6"
|
||||||
@@ -1938,7 +2138,7 @@ checksum = "077185e2eac69c3f8379a4298e1e07cd36beb962290d4a51199acf0fdc10607e"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"numtoa",
|
"numtoa",
|
||||||
"redox_syscall 0.2.5",
|
"redox_syscall 0.2.6",
|
||||||
"redox_termios",
|
"redox_termios",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -2008,9 +2208,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio"
|
name = "tokio"
|
||||||
version = "1.4.0"
|
version = "1.5.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "134af885d758d645f0f0505c9a8b3f9bf8a348fd822e112ab5248138348f1722"
|
checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg",
|
"autocfg",
|
||||||
"bytes",
|
"bytes",
|
||||||
@@ -2049,8 +2249,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-postgres"
|
name = "tokio-postgres"
|
||||||
version = "0.7.0"
|
version = "0.7.1"
|
||||||
source = "git+https://github.com/kelvich/rust-postgres?branch=replication_rebase#f3425d991f75cb7b464a37e6b3d5d05f8bf51c02"
|
source = "git+https://github.com/zenithdb/rust-postgres.git?rev=a0d067b66447951d1276a53fb09886539c3fa094#a0d067b66447951d1276a53fb09886539c3fa094"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@@ -2061,10 +2261,10 @@ dependencies = [
|
|||||||
"parking_lot",
|
"parking_lot",
|
||||||
"percent-encoding",
|
"percent-encoding",
|
||||||
"phf",
|
"phf",
|
||||||
"pin-project",
|
"pin-project-lite",
|
||||||
"postgres-protocol",
|
"postgres-protocol",
|
||||||
"postgres-types",
|
"postgres-types",
|
||||||
"socket2 0.3.19",
|
"socket2",
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
]
|
]
|
||||||
@@ -2082,9 +2282,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio-util"
|
name = "tokio-util"
|
||||||
version = "0.6.5"
|
version = "0.6.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5143d049e85af7fbc36f5454d990e62c2df705b3589f123b71f441b6b59f443f"
|
checksum = "940a12c99365c31ea8dd9ba04ec1be183ffe4920102bb7122c2f515437601e8e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytes",
|
"bytes",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
@@ -2094,6 +2294,15 @@ dependencies = [
|
|||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml"
|
||||||
|
version = "0.5.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tower-service"
|
name = "tower-service"
|
||||||
version = "0.3.1"
|
version = "0.3.1"
|
||||||
@@ -2147,9 +2356,9 @@ checksum = "879f6906492a7cd215bfa4cf595b600146ccfac0c79bcbd1f3000162af5e8b06"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-bidi"
|
name = "unicode-bidi"
|
||||||
version = "0.3.4"
|
version = "0.3.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
|
checksum = "eeb8be209bb1c96b7c177c7420d26e04eccacb0eeae6b980e35fcb74678107e0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"matches",
|
"matches",
|
||||||
]
|
]
|
||||||
@@ -2232,6 +2441,17 @@ version = "1.1.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca"
|
checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "walkdir"
|
||||||
|
version = "2.3.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
|
||||||
|
dependencies = [
|
||||||
|
"same-file",
|
||||||
|
"winapi",
|
||||||
|
"winapi-util",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "walkeeper"
|
name = "walkeeper"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@@ -2239,31 +2459,24 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
|
||||||
"clap",
|
"clap",
|
||||||
"crc32c",
|
"crc32c",
|
||||||
"crossbeam-channel",
|
|
||||||
"daemonize",
|
"daemonize",
|
||||||
"fs2",
|
"fs2",
|
||||||
"futures",
|
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"log",
|
"log",
|
||||||
"pageserver",
|
"pageserver",
|
||||||
"postgres",
|
"postgres",
|
||||||
"postgres-protocol",
|
"postgres-protocol",
|
||||||
"rand 0.8.3",
|
|
||||||
"regex",
|
"regex",
|
||||||
"rust-s3",
|
|
||||||
"slog",
|
"slog",
|
||||||
"slog-async",
|
"slog-async",
|
||||||
"slog-scope",
|
"slog-scope",
|
||||||
"slog-stdlog",
|
"slog-stdlog",
|
||||||
"slog-term",
|
"slog-term",
|
||||||
"termion",
|
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-postgres",
|
"tokio-postgres",
|
||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
"tui",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2375,6 +2588,15 @@ dependencies = [
|
|||||||
"cc",
|
"cc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "which"
|
||||||
|
version = "3.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wildmatch"
|
name = "wildmatch"
|
||||||
version = "1.1.0"
|
version = "1.1.0"
|
||||||
@@ -2397,6 +2619,15 @@ version = "0.4.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-util"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi-x86_64-pc-windows-gnu"
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
@@ -2412,8 +2643,32 @@ dependencies = [
|
|||||||
"winapi",
|
"winapi",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "xattr"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "244c3741f4240ef46274860397c7c74e50eb23624996930e484c16679633a54c"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xml-rs"
|
name = "xml-rs"
|
||||||
version = "0.8.3"
|
version = "0.8.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a"
|
checksum = "b07db065a5cf61a7e4ba64f29e67db906fb1787316516c4e6e5ff0fea1efcd8a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zenith"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"clap",
|
||||||
|
"control_plane",
|
||||||
|
"pageserver",
|
||||||
|
"postgres_ffi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zenith_utils"
|
||||||
|
version = "0.1.0"
|
||||||
|
|||||||
@@ -3,4 +3,8 @@ members = [
|
|||||||
"integration_tests",
|
"integration_tests",
|
||||||
"pageserver",
|
"pageserver",
|
||||||
"walkeeper",
|
"walkeeper",
|
||||||
|
"zenith",
|
||||||
|
"control_plane",
|
||||||
|
"postgres_ffi",
|
||||||
|
"zenith_utils",
|
||||||
]
|
]
|
||||||
|
|||||||
48
README.md
48
README.md
@@ -2,6 +2,54 @@
|
|||||||
|
|
||||||
Zenith substitutes PostgreSQL storage layer and redistributes data across a cluster of nodes
|
Zenith substitutes PostgreSQL storage layer and redistributes data across a cluster of nodes
|
||||||
|
|
||||||
|
## Running local installation
|
||||||
|
|
||||||
|
1. Build zenith and patched postgres
|
||||||
|
```sh
|
||||||
|
git clone --recursive https://github.com/libzenith/zenith.git
|
||||||
|
cd zenith
|
||||||
|
./pgbuild.sh # builds postgres and installs it to ./tmp_install
|
||||||
|
cargo build
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start pageserver and postggres on top of it (should be called from repo root):
|
||||||
|
```sh
|
||||||
|
# Create ~/.zenith with proper paths to binaries and data
|
||||||
|
# Later that would be responsibility of a package install script
|
||||||
|
>./target/debug/zenith init
|
||||||
|
|
||||||
|
# start pageserver
|
||||||
|
> ./target/debug/zenith pageserver start
|
||||||
|
Starting pageserver at '127.0.0.1:64000'
|
||||||
|
|
||||||
|
# create and configure postgres data dir
|
||||||
|
> ./target/debug/zenith pg create
|
||||||
|
Creating new postgres: path=/Users/user/code/zenith/tmp_check_cli/compute/pg1 port=55432
|
||||||
|
Database initialized
|
||||||
|
|
||||||
|
# start it
|
||||||
|
> ./target/debug/zenith pg start pg1
|
||||||
|
|
||||||
|
# look up status and connection info
|
||||||
|
> ./target/debug/zenith pg list
|
||||||
|
NODE ADDRESS STATUS
|
||||||
|
pg1 127.0.0.1:55432 running
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Now it is possible to connect to postgres and run some queries:
|
||||||
|
```
|
||||||
|
> psql -p55432 -h 127.0.0.1 postgres
|
||||||
|
postgres=# CREATE TABLE t(key int primary key, value text);
|
||||||
|
CREATE TABLE
|
||||||
|
postgres=# insert into t values(1,1);
|
||||||
|
INSERT 0 1
|
||||||
|
postgres=# select * from t;
|
||||||
|
key | value
|
||||||
|
-----+-------
|
||||||
|
1 | 1
|
||||||
|
(1 row)
|
||||||
|
```
|
||||||
|
|
||||||
## Running tests
|
## Running tests
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
|
|||||||
188
cli-v2-story.md
Normal file
188
cli-v2-story.md
Normal file
@@ -0,0 +1,188 @@
|
|||||||
|
Create a new Zenith repository in the current directory:
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli init
|
||||||
|
The files belonging to this database system will be owned by user "heikki".
|
||||||
|
This user must also own the server process.
|
||||||
|
|
||||||
|
The database cluster will be initialized with locale "en_GB.UTF-8".
|
||||||
|
The default database encoding has accordingly been set to "UTF8".
|
||||||
|
The default text search configuration will be set to "english".
|
||||||
|
|
||||||
|
Data page checksums are disabled.
|
||||||
|
|
||||||
|
creating directory tmp ... ok
|
||||||
|
creating subdirectories ... ok
|
||||||
|
selecting dynamic shared memory implementation ... posix
|
||||||
|
selecting default max_connections ... 100
|
||||||
|
selecting default shared_buffers ... 128MB
|
||||||
|
selecting default time zone ... Europe/Helsinki
|
||||||
|
creating configuration files ... ok
|
||||||
|
running bootstrap script ... ok
|
||||||
|
performing post-bootstrap initialization ... ok
|
||||||
|
syncing data to disk ... ok
|
||||||
|
|
||||||
|
initdb: warning: enabling "trust" authentication for local connections
|
||||||
|
You can change this by editing pg_hba.conf or using the option -A, or
|
||||||
|
--auth-local and --auth-host, the next time you run initdb.
|
||||||
|
new zenith repository was created in .zenith
|
||||||
|
|
||||||
|
Initially, there is only one branch:
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
|
||||||
|
main
|
||||||
|
|
||||||
|
Start a local Postgres instance on the branch:
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start main
|
||||||
|
Creating data directory from snapshot at 0/15FFB08...
|
||||||
|
waiting for server to start....2021-04-13 09:27:43.919 EEST [984664] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
||||||
|
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv6 address "::1", port 5432
|
||||||
|
2021-04-13 09:27:43.920 EEST [984664] LOG: listening on IPv4 address "127.0.0.1", port 5432
|
||||||
|
2021-04-13 09:27:43.927 EEST [984664] LOG: listening on Unix socket "/tmp/.s.PGSQL.5432"
|
||||||
|
2021-04-13 09:27:43.939 EEST [984665] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
||||||
|
2021-04-13 09:27:43.939 EEST [984665] LOG: creating missing WAL directory "pg_wal/archive_status"
|
||||||
|
2021-04-13 09:27:44.189 EEST [984665] LOG: database system was not properly shut down; automatic recovery in progress
|
||||||
|
2021-04-13 09:27:44.195 EEST [984665] LOG: invalid record length at 0/15FFB80: wanted 24, got 0
|
||||||
|
2021-04-13 09:27:44.195 EEST [984665] LOG: redo is not required
|
||||||
|
2021-04-13 09:27:44.225 EEST [984664] LOG: database system is ready to accept connections
|
||||||
|
done
|
||||||
|
server started
|
||||||
|
|
||||||
|
Run some commands against it:
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "create table foo (t text);"
|
||||||
|
CREATE TABLE
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "insert into foo values ('inserted on the main branch');"
|
||||||
|
INSERT 0 1
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ psql postgres -c "select * from foo"
|
||||||
|
t
|
||||||
|
-----------------------------
|
||||||
|
inserted on the main branch
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
Create a new branch called 'experimental'. We create it from the
|
||||||
|
current end of the 'main' branch, but you could specify a different
|
||||||
|
LSN as the start point instead.
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch experimental main
|
||||||
|
branching at end of WAL: 0/161F478
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli branch
|
||||||
|
experimental
|
||||||
|
main
|
||||||
|
|
||||||
|
Start another Postgres instance off the 'experimental' branch:
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
|
||||||
|
Creating data directory from snapshot at 0/15FFB08...
|
||||||
|
waiting for server to start....2021-04-13 09:28:41.874 EEST [984766] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
||||||
|
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv6 address "::1", port 5433
|
||||||
|
2021-04-13 09:28:41.875 EEST [984766] LOG: listening on IPv4 address "127.0.0.1", port 5433
|
||||||
|
2021-04-13 09:28:41.883 EEST [984766] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
|
||||||
|
2021-04-13 09:28:41.896 EEST [984767] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
||||||
|
2021-04-13 09:28:42.265 EEST [984767] LOG: database system was not properly shut down; automatic recovery in progress
|
||||||
|
2021-04-13 09:28:42.269 EEST [984767] LOG: redo starts at 0/15FFB80
|
||||||
|
2021-04-13 09:28:42.272 EEST [984767] LOG: invalid record length at 0/161F4B0: wanted 24, got 0
|
||||||
|
2021-04-13 09:28:42.272 EEST [984767] LOG: redo done at 0/161F478 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
|
||||||
|
2021-04-13 09:28:42.321 EEST [984766] LOG: database system is ready to accept connections
|
||||||
|
done
|
||||||
|
server started
|
||||||
|
|
||||||
|
Insert some a row on the 'experimental' branch:
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
||||||
|
t
|
||||||
|
-----------------------------
|
||||||
|
inserted on the main branch
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "insert into foo values ('inserted on experimental')"
|
||||||
|
INSERT 0 1
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
||||||
|
t
|
||||||
|
-----------------------------
|
||||||
|
inserted on the main branch
|
||||||
|
inserted on experimental
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
See that the other Postgres instance is still running on 'main' branch on port 5432:
|
||||||
|
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5432 -c "select * from foo"
|
||||||
|
t
|
||||||
|
-----------------------------
|
||||||
|
inserted on the main branch
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Everything is stored in the .zenith directory:
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/
|
||||||
|
total 12
|
||||||
|
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 datadirs
|
||||||
|
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 refs
|
||||||
|
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:28 timelines
|
||||||
|
|
||||||
|
The 'datadirs' directory contains the datadirs of the running instances:
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/
|
||||||
|
total 8
|
||||||
|
drwx------ 18 heikki heikki 4096 Apr 13 09:27 3c0c634c1674079b2c6d4edf7c91523e
|
||||||
|
drwx------ 18 heikki heikki 4096 Apr 13 09:28 697e3c103d4b1763cd6e82e4ff361d76
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ ls -l .zenith/datadirs/3c0c634c1674079b2c6d4edf7c91523e/
|
||||||
|
total 124
|
||||||
|
drwxr-xr-x 5 heikki heikki 4096 Apr 13 09:27 base
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 global
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_commit_ts
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_dynshmem
|
||||||
|
-rw------- 1 heikki heikki 4760 Apr 13 09:27 pg_hba.conf
|
||||||
|
-rw------- 1 heikki heikki 1636 Apr 13 09:27 pg_ident.conf
|
||||||
|
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:32 pg_logical
|
||||||
|
drwxr-xr-x 4 heikki heikki 4096 Apr 13 09:27 pg_multixact
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_notify
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_replslot
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_serial
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_snapshots
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_stat
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:34 pg_stat_tmp
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_subtrans
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_tblspc
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_twophase
|
||||||
|
-rw------- 1 heikki heikki 3 Apr 13 09:27 PG_VERSION
|
||||||
|
lrwxrwxrwx 1 heikki heikki 52 Apr 13 09:27 pg_wal -> ../../timelines/3c0c634c1674079b2c6d4edf7c91523e/wal
|
||||||
|
drwxr-xr-x 2 heikki heikki 4096 Apr 13 09:27 pg_xact
|
||||||
|
-rw------- 1 heikki heikki 88 Apr 13 09:27 postgresql.auto.conf
|
||||||
|
-rw------- 1 heikki heikki 28688 Apr 13 09:27 postgresql.conf
|
||||||
|
-rw------- 1 heikki heikki 96 Apr 13 09:27 postmaster.opts
|
||||||
|
-rw------- 1 heikki heikki 149 Apr 13 09:27 postmaster.pid
|
||||||
|
|
||||||
|
Note how 'pg_wal' is just a symlink to the 'timelines' directory. The
|
||||||
|
datadir is ephemeral, you can delete it at any time, and it can be reconstructed
|
||||||
|
from the snapshots and WAL stored in the 'timelines' directory. So if you push/pull
|
||||||
|
the repository, the 'datadirs' are not included. (They are like git working trees)
|
||||||
|
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ killall -9 postgres
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ rm -rf .zenith/datadirs/*
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ ./target/debug/cli start experimental -- -o -p5433
|
||||||
|
Creating data directory from snapshot at 0/15FFB08...
|
||||||
|
waiting for server to start....2021-04-13 09:37:05.476 EEST [985340] LOG: starting PostgreSQL 14devel on x86_64-pc-linux-gnu, compiled by gcc (Debian 10.2.1-6) 10.2.1 20210110, 64-bit
|
||||||
|
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv6 address "::1", port 5433
|
||||||
|
2021-04-13 09:37:05.477 EEST [985340] LOG: listening on IPv4 address "127.0.0.1", port 5433
|
||||||
|
2021-04-13 09:37:05.487 EEST [985340] LOG: listening on Unix socket "/tmp/.s.PGSQL.5433"
|
||||||
|
2021-04-13 09:37:05.498 EEST [985341] LOG: database system was interrupted; last known up at 2021-04-13 09:27:33 EEST
|
||||||
|
2021-04-13 09:37:05.808 EEST [985341] LOG: database system was not properly shut down; automatic recovery in progress
|
||||||
|
2021-04-13 09:37:05.813 EEST [985341] LOG: redo starts at 0/15FFB80
|
||||||
|
2021-04-13 09:37:05.815 EEST [985341] LOG: invalid record length at 0/161F770: wanted 24, got 0
|
||||||
|
2021-04-13 09:37:05.815 EEST [985341] LOG: redo done at 0/161F738 system usage: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s
|
||||||
|
2021-04-13 09:37:05.866 EEST [985340] LOG: database system is ready to accept connections
|
||||||
|
done
|
||||||
|
server started
|
||||||
|
~/git-sandbox/zenith (cli-v2)$ psql postgres -p5433 -c "select * from foo"
|
||||||
|
t
|
||||||
|
-----------------------------
|
||||||
|
inserted on the main branch
|
||||||
|
inserted on experimental
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
1
control_plane/.gitignore
vendored
Normal file
1
control_plane/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
tmp_check/
|
||||||
27
control_plane/Cargo.toml
Normal file
27
control_plane/Cargo.toml
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
[package]
|
||||||
|
name = "control_plane"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Stas Kelvich <stas@zenith.tech>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
rand = "0.8.3"
|
||||||
|
tar = "0.4.33"
|
||||||
|
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
|
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
|
|
||||||
|
serde = ""
|
||||||
|
serde_derive = ""
|
||||||
|
toml = ""
|
||||||
|
lazy_static = ""
|
||||||
|
regex = "1"
|
||||||
|
anyhow = "1.0"
|
||||||
|
hex = "0.4.3"
|
||||||
|
bytes = "1.0.1"
|
||||||
|
fs_extra = "1.2.0"
|
||||||
|
|
||||||
|
pageserver = { path = "../pageserver" }
|
||||||
|
walkeeper = { path = "../walkeeper" }
|
||||||
|
postgres_ffi = { path = "../postgres_ffi" }
|
||||||
459
control_plane/src/compute.rs
Normal file
459
control_plane/src/compute.rs
Normal file
@@ -0,0 +1,459 @@
|
|||||||
|
use std::fs::{self, OpenOptions};
|
||||||
|
use std::io::{Read, Write};
|
||||||
|
use std::net::SocketAddr;
|
||||||
|
use std::net::TcpStream;
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
use std::process::Command;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
use std::{collections::BTreeMap, path::PathBuf};
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use regex::Regex;
|
||||||
|
use tar;
|
||||||
|
|
||||||
|
use postgres::{Client, NoTls};
|
||||||
|
|
||||||
|
use crate::local_env::LocalEnv;
|
||||||
|
use crate::storage::{PageServerNode, WalProposerNode};
|
||||||
|
use pageserver::ZTimelineId;
|
||||||
|
|
||||||
|
//
|
||||||
|
// ComputeControlPlane
|
||||||
|
//
|
||||||
|
pub struct ComputeControlPlane {
|
||||||
|
base_port: u16,
|
||||||
|
pageserver: Arc<PageServerNode>,
|
||||||
|
pub nodes: BTreeMap<String, Arc<PostgresNode>>,
|
||||||
|
env: LocalEnv,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ComputeControlPlane {
|
||||||
|
// Load current nodes with ports from data directories on disk
|
||||||
|
pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
|
||||||
|
// TODO: since pageserver do not have config file yet we believe here that
|
||||||
|
// it is running on default port. Change that when pageserver will have config.
|
||||||
|
let pageserver = Arc::new(PageServerNode::from_env(&env));
|
||||||
|
|
||||||
|
let pgdatadirspath = env.repo_path.join("pgdatadirs");
|
||||||
|
let nodes: Result<BTreeMap<_, _>> = fs::read_dir(&pgdatadirspath)
|
||||||
|
.with_context(|| format!("failed to list {}", pgdatadirspath.display()))?
|
||||||
|
.into_iter()
|
||||||
|
.map(|f| {
|
||||||
|
PostgresNode::from_dir_entry(f?, &env, &pageserver)
|
||||||
|
.map(|node| (node.name.clone(), Arc::new(node)))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let nodes = nodes?;
|
||||||
|
|
||||||
|
Ok(ComputeControlPlane {
|
||||||
|
base_port: 55431,
|
||||||
|
pageserver,
|
||||||
|
nodes,
|
||||||
|
env,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_port(&mut self) -> u16 {
|
||||||
|
1 + self
|
||||||
|
.nodes
|
||||||
|
.iter()
|
||||||
|
.map(|(_name, node)| node.address.port())
|
||||||
|
.max()
|
||||||
|
.unwrap_or(self.base_port)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn local(local_env: &LocalEnv, pageserver: &Arc<PageServerNode>) -> ComputeControlPlane {
|
||||||
|
ComputeControlPlane {
|
||||||
|
base_port: 65431,
|
||||||
|
pageserver: Arc::clone(pageserver),
|
||||||
|
nodes: BTreeMap::new(),
|
||||||
|
env: local_env.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Connect to a page server, get base backup, and untar it to initialize a
|
||||||
|
/// new data directory
|
||||||
|
pub fn new_from_page_server(
|
||||||
|
&mut self,
|
||||||
|
is_test: bool,
|
||||||
|
timelineid: ZTimelineId,
|
||||||
|
) -> Result<Arc<PostgresNode>> {
|
||||||
|
let node_id = self.nodes.len() as u32 + 1;
|
||||||
|
|
||||||
|
let node = Arc::new(PostgresNode {
|
||||||
|
name: format!("pg{}", node_id),
|
||||||
|
address: SocketAddr::new("127.0.0.1".parse().unwrap(), self.get_port()),
|
||||||
|
env: self.env.clone(),
|
||||||
|
pageserver: Arc::clone(&self.pageserver),
|
||||||
|
is_test,
|
||||||
|
timelineid,
|
||||||
|
});
|
||||||
|
|
||||||
|
node.init_from_page_server()?;
|
||||||
|
self.nodes.insert(node.name.clone(), Arc::clone(&node));
|
||||||
|
|
||||||
|
Ok(node)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_test_node(&mut self, timelineid: ZTimelineId) -> Arc<PostgresNode> {
|
||||||
|
let node = self.new_from_page_server(true, timelineid);
|
||||||
|
assert!(node.is_ok());
|
||||||
|
let node = node.unwrap();
|
||||||
|
|
||||||
|
// Configure the node to stream WAL directly to the pageserver
|
||||||
|
node.append_conf(
|
||||||
|
"postgresql.conf",
|
||||||
|
format!(
|
||||||
|
"callmemaybe_connstring = '{}'\n", // FIXME escaping
|
||||||
|
node.connstr()
|
||||||
|
)
|
||||||
|
.as_str(),
|
||||||
|
);
|
||||||
|
|
||||||
|
node
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_test_master_node(&mut self, timelineid: ZTimelineId) -> Arc<PostgresNode> {
|
||||||
|
let node = self.new_from_page_server(true, timelineid).unwrap();
|
||||||
|
|
||||||
|
node.append_conf(
|
||||||
|
"postgresql.conf",
|
||||||
|
"synchronous_standby_names = 'safekeeper_proxy'\n",
|
||||||
|
);
|
||||||
|
|
||||||
|
node
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_node(&mut self, timelineid: ZTimelineId) -> Result<Arc<PostgresNode>> {
|
||||||
|
let node = self.new_from_page_server(false, timelineid).unwrap();
|
||||||
|
|
||||||
|
// Configure the node to stream WAL directly to the pageserver
|
||||||
|
node.append_conf(
|
||||||
|
"postgresql.conf",
|
||||||
|
format!(
|
||||||
|
"callmemaybe_connstring = '{}'\n", // FIXME escaping
|
||||||
|
node.connstr()
|
||||||
|
)
|
||||||
|
.as_str(),
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(node)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
pub struct PostgresNode {
|
||||||
|
pub address: SocketAddr,
|
||||||
|
name: String,
|
||||||
|
pub env: LocalEnv,
|
||||||
|
pageserver: Arc<PageServerNode>,
|
||||||
|
is_test: bool,
|
||||||
|
timelineid: ZTimelineId,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PostgresNode {
|
||||||
|
fn from_dir_entry(
|
||||||
|
entry: std::fs::DirEntry,
|
||||||
|
env: &LocalEnv,
|
||||||
|
pageserver: &Arc<PageServerNode>,
|
||||||
|
) -> Result<PostgresNode> {
|
||||||
|
if !entry.file_type()?.is_dir() {
|
||||||
|
anyhow::bail!(
|
||||||
|
"PostgresNode::from_dir_entry failed: '{}' is not a directory",
|
||||||
|
entry.path().display()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref CONF_PORT_RE: Regex = Regex::new(r"(?m)^\s*port\s*=\s*(\d+)\s*$").unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse data directory name
|
||||||
|
let fname = entry.file_name();
|
||||||
|
let name = fname.to_str().unwrap().to_string();
|
||||||
|
|
||||||
|
// find out tcp port in config file
|
||||||
|
let cfg_path = entry.path().join("postgresql.conf");
|
||||||
|
let config = fs::read_to_string(cfg_path.clone()).with_context(|| {
|
||||||
|
format!(
|
||||||
|
"failed to read config file in {}",
|
||||||
|
cfg_path.to_str().unwrap()
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let err_msg = format!(
|
||||||
|
"failed to find port definition in config file {}",
|
||||||
|
cfg_path.to_str().unwrap()
|
||||||
|
);
|
||||||
|
let port: u16 = CONF_PORT_RE
|
||||||
|
.captures(config.as_str())
|
||||||
|
.ok_or(anyhow::Error::msg(err_msg.clone() + " 1"))?
|
||||||
|
.iter()
|
||||||
|
.last()
|
||||||
|
.ok_or(anyhow::Error::msg(err_msg.clone() + " 2"))?
|
||||||
|
.ok_or(anyhow::Error::msg(err_msg.clone() + " 3"))?
|
||||||
|
.as_str()
|
||||||
|
.parse()
|
||||||
|
.with_context(|| err_msg)?;
|
||||||
|
|
||||||
|
// FIXME: What timeline is this server on? Would have to parse the postgresql.conf
|
||||||
|
// file for that, too. It's currently not needed for anything, but it would be
|
||||||
|
// nice to list the timeline in "zenith pg list"
|
||||||
|
let timelineid_buf = [0u8; 16];
|
||||||
|
let timelineid = ZTimelineId::from(timelineid_buf);
|
||||||
|
|
||||||
|
// ok now
|
||||||
|
Ok(PostgresNode {
|
||||||
|
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
|
||||||
|
name,
|
||||||
|
env: env.clone(),
|
||||||
|
pageserver: Arc::clone(pageserver),
|
||||||
|
is_test: false,
|
||||||
|
timelineid,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Connect to a page server, get base backup, and untar it to initialize a
|
||||||
|
// new data directory
|
||||||
|
pub fn init_from_page_server(&self) -> Result<()> {
|
||||||
|
let pgdata = self.pgdata();
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"Extracting base backup to create postgres instance: path={} port={}",
|
||||||
|
pgdata.display(),
|
||||||
|
self.address.port()
|
||||||
|
);
|
||||||
|
|
||||||
|
// initialize data directory
|
||||||
|
if self.is_test {
|
||||||
|
fs::remove_dir_all(&pgdata).ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
let sql = format!("basebackup {}", self.timelineid);
|
||||||
|
let mut client = self
|
||||||
|
.pageserver
|
||||||
|
.page_server_psql_client()
|
||||||
|
.with_context(|| "connecting to page server failed")?;
|
||||||
|
|
||||||
|
fs::create_dir_all(&pgdata)
|
||||||
|
.with_context(|| format!("could not create data directory {}", pgdata.display()))?;
|
||||||
|
fs::set_permissions(pgdata.as_path(), fs::Permissions::from_mode(0o700)).with_context(
|
||||||
|
|| {
|
||||||
|
format!(
|
||||||
|
"could not set permissions in data directory {}",
|
||||||
|
pgdata.display()
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// FIXME: The compute node should be able to stream the WAL it needs from the WAL safekeepers or archive.
|
||||||
|
// But that's not implemented yet. For now, 'pg_wal' is included in the base backup tarball that
|
||||||
|
// we receive from the Page Server, so we don't need to create the empty 'pg_wal' directory here.
|
||||||
|
//fs::create_dir_all(pgdata.join("pg_wal"))?;
|
||||||
|
|
||||||
|
let mut copyreader = client
|
||||||
|
.copy_out(sql.as_str())
|
||||||
|
.with_context(|| "page server 'basebackup' command failed")?;
|
||||||
|
|
||||||
|
// FIXME: Currently, we slurp the whole tarball into memory, and then extract it,
|
||||||
|
// but we really should do this:
|
||||||
|
//let mut ar = tar::Archive::new(copyreader);
|
||||||
|
let mut buf = vec![];
|
||||||
|
copyreader
|
||||||
|
.read_to_end(&mut buf)
|
||||||
|
.with_context(|| "reading base backup from page server failed")?;
|
||||||
|
let mut ar = tar::Archive::new(buf.as_slice());
|
||||||
|
ar.unpack(&pgdata)
|
||||||
|
.with_context(|| "extracting page backup failed")?;
|
||||||
|
|
||||||
|
// listen for selected port
|
||||||
|
self.append_conf(
|
||||||
|
"postgresql.conf",
|
||||||
|
&format!(
|
||||||
|
"max_wal_senders = 10\n\
|
||||||
|
max_replication_slots = 10\n\
|
||||||
|
hot_standby = on\n\
|
||||||
|
shared_buffers = 1MB\n\
|
||||||
|
max_connections = 100\n\
|
||||||
|
wal_level = replica\n\
|
||||||
|
listen_addresses = '{address}'\n\
|
||||||
|
port = {port}\n",
|
||||||
|
address = self.address.ip(),
|
||||||
|
port = self.address.port()
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Never clean up old WAL. TODO: We should use a replication
|
||||||
|
// slot or something proper, to prevent the compute node
|
||||||
|
// from removing WAL that hasn't been streamed to the safekeepr or
|
||||||
|
// page server yet. But this will do for now.
|
||||||
|
self.append_conf("postgresql.conf", &format!("wal_keep_size='10TB'\n"));
|
||||||
|
|
||||||
|
// Connect it to the page server.
|
||||||
|
|
||||||
|
// Configure that node to take pages from pageserver
|
||||||
|
self.append_conf(
|
||||||
|
"postgresql.conf",
|
||||||
|
&format!(
|
||||||
|
"page_server_connstring = 'host={} port={}'\n\
|
||||||
|
zenith_timeline='{}'\n",
|
||||||
|
self.pageserver.address().ip(),
|
||||||
|
self.pageserver.address().port(),
|
||||||
|
self.timelineid
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pgdata(&self) -> PathBuf {
|
||||||
|
self.env.repo_path.join("pgdatadirs").join(&self.name)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn status(&self) -> &str {
|
||||||
|
let timeout = Duration::from_millis(300);
|
||||||
|
let has_pidfile = self.pgdata().join("postmaster.pid").exists();
|
||||||
|
let can_connect = TcpStream::connect_timeout(&self.address, timeout).is_ok();
|
||||||
|
|
||||||
|
match (has_pidfile, can_connect) {
|
||||||
|
(true, true) => "running",
|
||||||
|
(false, false) => "stopped",
|
||||||
|
(true, false) => "crashed",
|
||||||
|
(false, true) => "running, no pidfile",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn append_conf(&self, config: &str, opts: &str) {
|
||||||
|
OpenOptions::new()
|
||||||
|
.append(true)
|
||||||
|
.open(self.pgdata().join(config).to_str().unwrap())
|
||||||
|
.unwrap()
|
||||||
|
.write_all(opts.as_bytes())
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pg_ctl(&self, args: &[&str]) -> Result<()> {
|
||||||
|
let pg_ctl_path = self.env.pg_bin_dir().join("pg_ctl");
|
||||||
|
|
||||||
|
let pg_ctl = Command::new(pg_ctl_path)
|
||||||
|
.args(
|
||||||
|
[
|
||||||
|
&[
|
||||||
|
"-D",
|
||||||
|
self.pgdata().to_str().unwrap(),
|
||||||
|
"-l",
|
||||||
|
self.pgdata().join("log").to_str().unwrap(),
|
||||||
|
],
|
||||||
|
args,
|
||||||
|
]
|
||||||
|
.concat(),
|
||||||
|
)
|
||||||
|
.env_clear()
|
||||||
|
.env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap())
|
||||||
|
.status()
|
||||||
|
.with_context(|| "pg_ctl failed")?;
|
||||||
|
if !pg_ctl.success() {
|
||||||
|
anyhow::bail!("pg_ctl failed");
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn start(&self) -> Result<()> {
|
||||||
|
println!("Starting postgres node at '{}'", self.connstr());
|
||||||
|
self.pg_ctl(&["start"])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn restart(&self) -> Result<()> {
|
||||||
|
self.pg_ctl(&["restart"])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn stop(&self) -> Result<()> {
|
||||||
|
self.pg_ctl(&["-m", "immediate", "stop"])
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn connstr(&self) -> String {
|
||||||
|
format!(
|
||||||
|
"host={} port={} user={}",
|
||||||
|
self.address.ip(),
|
||||||
|
self.address.port(),
|
||||||
|
self.whoami()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX: cache that in control plane
|
||||||
|
pub fn whoami(&self) -> String {
|
||||||
|
let output = Command::new("whoami")
|
||||||
|
.output()
|
||||||
|
.expect("failed to execute whoami");
|
||||||
|
|
||||||
|
if !output.status.success() {
|
||||||
|
panic!("whoami failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
String::from_utf8(output.stdout).unwrap().trim().to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn safe_psql(&self, db: &str, sql: &str) -> Vec<tokio_postgres::Row> {
|
||||||
|
let connstring = format!(
|
||||||
|
"host={} port={} dbname={} user={}",
|
||||||
|
self.address.ip(),
|
||||||
|
self.address.port(),
|
||||||
|
db,
|
||||||
|
self.whoami()
|
||||||
|
);
|
||||||
|
let mut client = Client::connect(connstring.as_str(), NoTls).unwrap();
|
||||||
|
|
||||||
|
println!("Running {}", sql);
|
||||||
|
client.query(sql, &[]).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn open_psql(&self, db: &str) -> Client {
|
||||||
|
let connstring = format!(
|
||||||
|
"host={} port={} dbname={} user={}",
|
||||||
|
self.address.ip(),
|
||||||
|
self.address.port(),
|
||||||
|
db,
|
||||||
|
self.whoami()
|
||||||
|
);
|
||||||
|
Client::connect(connstring.as_str(), NoTls).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn start_proxy(&self, wal_acceptors: &str) -> WalProposerNode {
|
||||||
|
let proxy_path = self.env.pg_bin_dir().join("safekeeper_proxy");
|
||||||
|
match Command::new(proxy_path.as_path())
|
||||||
|
.args(&["--ztimelineid", &self.timelineid.to_string()])
|
||||||
|
.args(&["-s", wal_acceptors])
|
||||||
|
.args(&["-h", &self.address.ip().to_string()])
|
||||||
|
.args(&["-p", &self.address.port().to_string()])
|
||||||
|
.arg("-v")
|
||||||
|
.stderr(
|
||||||
|
OpenOptions::new()
|
||||||
|
.create(true)
|
||||||
|
.append(true)
|
||||||
|
.open(self.pgdata().join("safekeeper_proxy.log"))
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.spawn()
|
||||||
|
{
|
||||||
|
Ok(child) => WalProposerNode { pid: child.id() },
|
||||||
|
Err(e) => panic!("Failed to launch {:?}: {}", proxy_path, e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
pub fn pg_bench() {}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for PostgresNode {
|
||||||
|
// destructor to clean up state after test is done
|
||||||
|
// XXX: we may detect failed test by setting some flag in catch_unwind()
|
||||||
|
// and checking it here. But let just clean datadirs on start.
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if self.is_test {
|
||||||
|
let _ = self.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
12
control_plane/src/lib.rs
Normal file
12
control_plane/src/lib.rs
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
//
|
||||||
|
// Local control plane.
|
||||||
|
//
|
||||||
|
// Can start, cofigure and stop postgres instances running as a local processes.
|
||||||
|
//
|
||||||
|
// Intended to be used in integration tests and in CLI tools for
|
||||||
|
// local installations.
|
||||||
|
//
|
||||||
|
|
||||||
|
pub mod compute;
|
||||||
|
pub mod local_env;
|
||||||
|
pub mod storage;
|
||||||
389
control_plane/src/local_env.rs
Normal file
389
control_plane/src/local_env.rs
Normal file
@@ -0,0 +1,389 @@
|
|||||||
|
//
|
||||||
|
// This module is responsible for locating and loading paths in a local setup.
|
||||||
|
//
|
||||||
|
// Now it also provides init method which acts like a stub for proper installation
|
||||||
|
// script which will use local paths.
|
||||||
|
//
|
||||||
|
use anyhow::Context;
|
||||||
|
use bytes::Bytes;
|
||||||
|
use rand::Rng;
|
||||||
|
use std::env;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::process::{Command, Stdio};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use serde_derive::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use pageserver::ZTimelineId;
|
||||||
|
use walkeeper::xlog_utils;
|
||||||
|
|
||||||
|
//
|
||||||
|
// This data structure represents deserialized zenith config, which should be
|
||||||
|
// located in ~/.zenith
|
||||||
|
//
|
||||||
|
// TODO: should we also support ZENITH_CONF env var?
|
||||||
|
//
|
||||||
|
#[derive(Serialize, Deserialize, Clone)]
|
||||||
|
pub struct LocalEnv {
|
||||||
|
// Path to the Repository. Here page server and compute nodes will create and store their data.
|
||||||
|
pub repo_path: PathBuf,
|
||||||
|
|
||||||
|
// System identifier, from the PostgreSQL control file
|
||||||
|
pub systemid: u64,
|
||||||
|
|
||||||
|
// Path to postgres distribution. It's expected that "bin", "include",
|
||||||
|
// "lib", "share" from postgres distribution are there. If at some point
|
||||||
|
// in time we will be able to run against vanilla postgres we may split that
|
||||||
|
// to four separate paths and match OS-specific installation layout.
|
||||||
|
pub pg_distrib_dir: PathBuf,
|
||||||
|
|
||||||
|
// Path to pageserver binary.
|
||||||
|
pub zenith_distrib_dir: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LocalEnv {
|
||||||
|
// postgres installation
|
||||||
|
pub fn pg_bin_dir(&self) -> PathBuf {
|
||||||
|
self.pg_distrib_dir.join("bin")
|
||||||
|
}
|
||||||
|
pub fn pg_lib_dir(&self) -> PathBuf {
|
||||||
|
self.pg_distrib_dir.join("lib")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn zenith_repo_dir() -> PathBuf {
|
||||||
|
// Find repository path
|
||||||
|
match std::env::var_os("ZENITH_REPO_DIR") {
|
||||||
|
Some(val) => PathBuf::from(val.to_str().unwrap()),
|
||||||
|
None => ".zenith".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Initialize a new Zenith repository
|
||||||
|
//
|
||||||
|
pub fn init() -> Result<()> {
|
||||||
|
// check if config already exists
|
||||||
|
let repo_path = zenith_repo_dir();
|
||||||
|
if repo_path.exists() {
|
||||||
|
anyhow::bail!(
|
||||||
|
"{} already exists. Perhaps already initialized?",
|
||||||
|
repo_path.to_str().unwrap()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now we can run init only from crate directory, so check that current dir is our crate.
|
||||||
|
// Use 'pageserver/Cargo.toml' existence as evidendce.
|
||||||
|
let cargo_path = env::current_dir()?;
|
||||||
|
if !cargo_path.join("pageserver/Cargo.toml").exists() {
|
||||||
|
anyhow::bail!(
|
||||||
|
"Current dirrectory does not look like a zenith repo. \
|
||||||
|
Please, run 'init' from zenith repo root."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ok, now check that expected binaries are present
|
||||||
|
|
||||||
|
// check postgres
|
||||||
|
let pg_distrib_dir = cargo_path.join("tmp_install");
|
||||||
|
let pg_path = pg_distrib_dir.join("bin/postgres");
|
||||||
|
if !pg_path.exists() {
|
||||||
|
anyhow::bail!(
|
||||||
|
"Can't find postres binary at {}. \
|
||||||
|
Perhaps './pgbuild.sh' is needed to build it first.",
|
||||||
|
pg_path.to_str().unwrap()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// check pageserver
|
||||||
|
let zenith_distrib_dir = cargo_path.join("target/debug/");
|
||||||
|
let pageserver_path = zenith_distrib_dir.join("pageserver");
|
||||||
|
if !pageserver_path.exists() {
|
||||||
|
anyhow::bail!(
|
||||||
|
"Can't find pageserver binary at {}. Please build it.",
|
||||||
|
pageserver_path.to_str().unwrap()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ok, we are good to go
|
||||||
|
let mut conf = LocalEnv {
|
||||||
|
repo_path: repo_path.clone(),
|
||||||
|
pg_distrib_dir,
|
||||||
|
zenith_distrib_dir,
|
||||||
|
systemid: 0,
|
||||||
|
};
|
||||||
|
init_repo(&mut conf)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn init_repo(local_env: &mut LocalEnv) -> Result<()> {
|
||||||
|
let repopath = &local_env.repo_path;
|
||||||
|
fs::create_dir(&repopath)
|
||||||
|
.with_context(|| format!("could not create directory {}", repopath.display()))?;
|
||||||
|
fs::create_dir(repopath.join("pgdatadirs"))?;
|
||||||
|
fs::create_dir(repopath.join("timelines"))?;
|
||||||
|
fs::create_dir(repopath.join("refs"))?;
|
||||||
|
fs::create_dir(repopath.join("refs").join("branches"))?;
|
||||||
|
fs::create_dir(repopath.join("refs").join("tags"))?;
|
||||||
|
println!("created directory structure in {}", repopath.display());
|
||||||
|
|
||||||
|
// Create initial timeline
|
||||||
|
let tli = create_timeline(&local_env, None)?;
|
||||||
|
let timelinedir = repopath.join("timelines").join(tli.to_string());
|
||||||
|
println!("created initial timeline {}", timelinedir.display());
|
||||||
|
|
||||||
|
// Run initdb
|
||||||
|
//
|
||||||
|
// FIXME: we create it temporarily in "tmp" directory, and move it into
|
||||||
|
// the repository. Use "tempdir()" or something? Or just create it directly
|
||||||
|
// in the repo?
|
||||||
|
let initdb_path = local_env.pg_bin_dir().join("initdb");
|
||||||
|
let _initdb = Command::new(initdb_path)
|
||||||
|
.args(&["-D", "tmp"])
|
||||||
|
.arg("--no-instructions")
|
||||||
|
.env_clear()
|
||||||
|
.env("LD_LIBRARY_PATH", local_env.pg_lib_dir().to_str().unwrap())
|
||||||
|
.stdout(Stdio::null())
|
||||||
|
.status()
|
||||||
|
.with_context(|| "failed to execute initdb")?;
|
||||||
|
println!("initdb succeeded");
|
||||||
|
|
||||||
|
// Read control file to extract the LSN and system id
|
||||||
|
let controlfile =
|
||||||
|
postgres_ffi::decode_pg_control(Bytes::from(fs::read("tmp/global/pg_control")?))?;
|
||||||
|
let systemid = controlfile.system_identifier;
|
||||||
|
let lsn = controlfile.checkPoint;
|
||||||
|
let lsnstr = format!("{:016X}", lsn);
|
||||||
|
|
||||||
|
// Move the initial WAL file
|
||||||
|
fs::rename(
|
||||||
|
"tmp/pg_wal/000000010000000000000001",
|
||||||
|
timelinedir
|
||||||
|
.join("wal")
|
||||||
|
.join("000000010000000000000001.partial"),
|
||||||
|
)?;
|
||||||
|
println!("moved initial WAL file");
|
||||||
|
|
||||||
|
// Remove pg_wal
|
||||||
|
fs::remove_dir_all("tmp/pg_wal")?;
|
||||||
|
println!("removed tmp/pg_wal");
|
||||||
|
|
||||||
|
force_crash_recovery(&PathBuf::from("tmp"))?;
|
||||||
|
println!("updated pg_control");
|
||||||
|
|
||||||
|
let target = timelinedir.join("snapshots").join(&lsnstr);
|
||||||
|
fs::rename("tmp", &target)?;
|
||||||
|
println!("moved 'tmp' to {}", target.display());
|
||||||
|
|
||||||
|
// Create 'main' branch to refer to the initial timeline
|
||||||
|
let data = tli.to_string();
|
||||||
|
fs::write(repopath.join("refs").join("branches").join("main"), data)?;
|
||||||
|
println!("created main branch");
|
||||||
|
|
||||||
|
// Also update the system id in the LocalEnv
|
||||||
|
local_env.systemid = systemid;
|
||||||
|
|
||||||
|
// write config
|
||||||
|
let toml = toml::to_string(&local_env)?;
|
||||||
|
fs::write(repopath.join("config"), toml)?;
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"new zenith repository was created in {}",
|
||||||
|
repopath.display()
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// If control file says the cluster was shut down cleanly, modify it, to mark
|
||||||
|
// it as crashed. That forces crash recovery when you start the cluster.
|
||||||
|
//
|
||||||
|
// FIXME:
|
||||||
|
// We currently do this to the initial snapshot in "zenith init". It would
|
||||||
|
// be more natural to do this when the snapshot is restored instead, but we
|
||||||
|
// currently don't have any code to create new snapshots, so it doesn't matter
|
||||||
|
// Or better yet, use a less hacky way of putting the cluster into recovery.
|
||||||
|
// Perhaps create a backup label file in the data directory when it's restored.
|
||||||
|
fn force_crash_recovery(datadir: &Path) -> Result<()> {
|
||||||
|
// Read in the control file
|
||||||
|
let controlfilepath = datadir.to_path_buf().join("global").join("pg_control");
|
||||||
|
let mut controlfile =
|
||||||
|
postgres_ffi::decode_pg_control(Bytes::from(fs::read(controlfilepath.as_path())?))?;
|
||||||
|
|
||||||
|
controlfile.state = postgres_ffi::DBState_DB_IN_PRODUCTION;
|
||||||
|
|
||||||
|
fs::write(
|
||||||
|
controlfilepath.as_path(),
|
||||||
|
postgres_ffi::encode_pg_control(controlfile),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// check that config file is present
|
||||||
|
pub fn load_config(repopath: &Path) -> Result<LocalEnv> {
|
||||||
|
if !repopath.exists() {
|
||||||
|
anyhow::bail!(
|
||||||
|
"Zenith config is not found in {}. You need to run 'zenith init' first",
|
||||||
|
repopath.to_str().unwrap()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// load and parse file
|
||||||
|
let config = fs::read_to_string(repopath.join("config"))?;
|
||||||
|
toml::from_str(config.as_str()).map_err(|e| e.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
// local env for tests
|
||||||
|
pub fn test_env(testname: &str) -> LocalEnv {
|
||||||
|
fs::create_dir_all("../tmp_check").expect("could not create directory ../tmp_check");
|
||||||
|
|
||||||
|
let repo_path = Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||||
|
.join("../tmp_check/")
|
||||||
|
.join(testname);
|
||||||
|
|
||||||
|
// Remove remnants of old test repo
|
||||||
|
let _ = fs::remove_dir_all(&repo_path);
|
||||||
|
|
||||||
|
let mut local_env = LocalEnv {
|
||||||
|
repo_path,
|
||||||
|
pg_distrib_dir: Path::new(env!("CARGO_MANIFEST_DIR")).join("../tmp_install"),
|
||||||
|
zenith_distrib_dir: cargo_bin_dir(),
|
||||||
|
systemid: 0,
|
||||||
|
};
|
||||||
|
init_repo(&mut local_env).expect("could not initialize zenith repository");
|
||||||
|
return local_env;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the directory where the binaries were put (i.e. target/debug/)
|
||||||
|
pub fn cargo_bin_dir() -> PathBuf {
|
||||||
|
let mut pathbuf = std::env::current_exe().unwrap();
|
||||||
|
|
||||||
|
pathbuf.pop();
|
||||||
|
if pathbuf.ends_with("deps") {
|
||||||
|
pathbuf.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
return pathbuf;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct PointInTime {
|
||||||
|
pub timelineid: ZTimelineId,
|
||||||
|
pub lsn: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_timeline(local_env: &LocalEnv, ancestor: Option<PointInTime>) -> Result<ZTimelineId> {
|
||||||
|
let repopath = &local_env.repo_path;
|
||||||
|
|
||||||
|
// Create initial timeline
|
||||||
|
let mut tli_buf = [0u8; 16];
|
||||||
|
rand::thread_rng().fill(&mut tli_buf);
|
||||||
|
let timelineid = ZTimelineId::from(tli_buf);
|
||||||
|
|
||||||
|
let timelinedir = repopath.join("timelines").join(timelineid.to_string());
|
||||||
|
|
||||||
|
fs::create_dir(&timelinedir)?;
|
||||||
|
fs::create_dir(&timelinedir.join("snapshots"))?;
|
||||||
|
fs::create_dir(&timelinedir.join("wal"))?;
|
||||||
|
|
||||||
|
if let Some(ancestor) = ancestor {
|
||||||
|
let data = format!(
|
||||||
|
"{}@{:X}/{:X}",
|
||||||
|
ancestor.timelineid,
|
||||||
|
ancestor.lsn >> 32,
|
||||||
|
ancestor.lsn & 0xffffffff
|
||||||
|
);
|
||||||
|
fs::write(timelinedir.join("ancestor"), data)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(timelineid)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse an LSN in the format used in filenames
|
||||||
|
//
|
||||||
|
// For example: 00000000015D3DD8
|
||||||
|
//
|
||||||
|
fn parse_lsn(s: &str) -> std::result::Result<u64, std::num::ParseIntError> {
|
||||||
|
u64::from_str_radix(s, 16)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new branch in the repository (for the "zenith branch" subcommand)
|
||||||
|
pub fn create_branch(
|
||||||
|
local_env: &LocalEnv,
|
||||||
|
branchname: &str,
|
||||||
|
startpoint: PointInTime,
|
||||||
|
) -> Result<()> {
|
||||||
|
let repopath = &local_env.repo_path;
|
||||||
|
|
||||||
|
// create a new timeline for it
|
||||||
|
let newtli = create_timeline(local_env, Some(startpoint))?;
|
||||||
|
let newtimelinedir = repopath.join("timelines").join(newtli.to_string());
|
||||||
|
|
||||||
|
let data = newtli.to_string();
|
||||||
|
fs::write(
|
||||||
|
repopath.join("refs").join("branches").join(branchname),
|
||||||
|
data,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// Copy the latest snapshot (TODO: before the startpoint) and all WAL
|
||||||
|
// TODO: be smarter and avoid the copying...
|
||||||
|
let (_maxsnapshot, oldsnapshotdir) = find_latest_snapshot(local_env, startpoint.timelineid)?;
|
||||||
|
let copy_opts = fs_extra::dir::CopyOptions::new();
|
||||||
|
fs_extra::dir::copy(oldsnapshotdir, newtimelinedir.join("snapshots"), ©_opts)?;
|
||||||
|
|
||||||
|
let oldtimelinedir = repopath
|
||||||
|
.join("timelines")
|
||||||
|
.join(startpoint.timelineid.to_string());
|
||||||
|
let mut copy_opts = fs_extra::dir::CopyOptions::new();
|
||||||
|
copy_opts.content_only = true;
|
||||||
|
fs_extra::dir::copy(
|
||||||
|
oldtimelinedir.join("wal"),
|
||||||
|
newtimelinedir.join("wal"),
|
||||||
|
©_opts,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the end of valid WAL in a wal directory
|
||||||
|
pub fn find_end_of_wal(local_env: &LocalEnv, timeline: ZTimelineId) -> Result<u64> {
|
||||||
|
let repopath = &local_env.repo_path;
|
||||||
|
let waldir = repopath
|
||||||
|
.join("timelines")
|
||||||
|
.join(timeline.to_string())
|
||||||
|
.join("wal");
|
||||||
|
|
||||||
|
let (lsn, _tli) = xlog_utils::find_end_of_wal(&waldir, 16 * 1024 * 1024, true);
|
||||||
|
|
||||||
|
return Ok(lsn);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the latest snapshot for a timeline
|
||||||
|
fn find_latest_snapshot(local_env: &LocalEnv, timeline: ZTimelineId) -> Result<(u64, PathBuf)> {
|
||||||
|
let repopath = &local_env.repo_path;
|
||||||
|
|
||||||
|
let snapshotsdir = repopath
|
||||||
|
.join("timelines")
|
||||||
|
.join(timeline.to_string())
|
||||||
|
.join("snapshots");
|
||||||
|
let paths = fs::read_dir(&snapshotsdir)?;
|
||||||
|
let mut maxsnapshot: u64 = 0;
|
||||||
|
let mut snapshotdir: Option<PathBuf> = None;
|
||||||
|
for path in paths {
|
||||||
|
let path = path?;
|
||||||
|
let filename = path.file_name().to_str().unwrap().to_owned();
|
||||||
|
if let Ok(lsn) = parse_lsn(&filename) {
|
||||||
|
maxsnapshot = std::cmp::max(lsn, maxsnapshot);
|
||||||
|
snapshotdir = Some(path.path());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if maxsnapshot == 0 {
|
||||||
|
// TODO: check ancestor timeline
|
||||||
|
anyhow::bail!("no snapshot found in {}", snapshotsdir.display());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((maxsnapshot, snapshotdir.unwrap()))
|
||||||
|
}
|
||||||
413
control_plane/src/storage.rs
Normal file
413
control_plane/src/storage.rs
Normal file
@@ -0,0 +1,413 @@
|
|||||||
|
use anyhow::Result;
|
||||||
|
use std::fs;
|
||||||
|
use std::io;
|
||||||
|
use std::net::SocketAddr;
|
||||||
|
use std::net::TcpStream;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::process::Command;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::thread;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use postgres::{Client, NoTls};
|
||||||
|
|
||||||
|
use crate::compute::PostgresNode;
|
||||||
|
use crate::local_env::LocalEnv;
|
||||||
|
use pageserver::ZTimelineId;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Collection of several example deployments useful for tests.
|
||||||
|
//
|
||||||
|
// I'm intendedly modelling storage and compute control planes as a separate entities
|
||||||
|
// as it is closer to the actual setup.
|
||||||
|
//
|
||||||
|
pub struct TestStorageControlPlane {
|
||||||
|
pub wal_acceptors: Vec<WalAcceptorNode>,
|
||||||
|
pub pageserver: Arc<PageServerNode>,
|
||||||
|
pub test_done: AtomicBool,
|
||||||
|
pub repopath: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TestStorageControlPlane {
|
||||||
|
// Peek into the repository, to grab the timeline ID of given branch
|
||||||
|
pub fn get_branch_timeline(&self, branchname: &str) -> ZTimelineId {
|
||||||
|
let branchpath = self.repopath.join("refs/branches/".to_owned() + branchname);
|
||||||
|
|
||||||
|
ZTimelineId::from_str(&(fs::read_to_string(&branchpath).unwrap())).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
// postgres <-> page_server
|
||||||
|
//
|
||||||
|
// Initialize a new repository and configure a page server to run in it
|
||||||
|
//
|
||||||
|
pub fn one_page_server(local_env: &LocalEnv) -> TestStorageControlPlane {
|
||||||
|
let repopath = local_env.repo_path.clone();
|
||||||
|
|
||||||
|
let pserver = Arc::new(PageServerNode {
|
||||||
|
env: local_env.clone(),
|
||||||
|
kill_on_exit: true,
|
||||||
|
listen_address: None,
|
||||||
|
});
|
||||||
|
pserver.start().unwrap();
|
||||||
|
|
||||||
|
TestStorageControlPlane {
|
||||||
|
wal_acceptors: Vec::new(),
|
||||||
|
pageserver: pserver,
|
||||||
|
test_done: AtomicBool::new(false),
|
||||||
|
repopath: repopath,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn one_page_server_no_start(local_env: &LocalEnv) -> TestStorageControlPlane {
|
||||||
|
let repopath = local_env.repo_path.clone();
|
||||||
|
|
||||||
|
let pserver = Arc::new(PageServerNode {
|
||||||
|
env: local_env.clone(),
|
||||||
|
kill_on_exit: true,
|
||||||
|
listen_address: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
TestStorageControlPlane {
|
||||||
|
wal_acceptors: Vec::new(),
|
||||||
|
pageserver: pserver,
|
||||||
|
test_done: AtomicBool::new(false),
|
||||||
|
repopath: repopath,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// postgres <-> {wal_acceptor1, wal_acceptor2, ...}
|
||||||
|
pub fn fault_tolerant(local_env: &LocalEnv, redundancy: usize) -> TestStorageControlPlane {
|
||||||
|
let repopath = local_env.repo_path.clone();
|
||||||
|
|
||||||
|
let mut cplane = TestStorageControlPlane {
|
||||||
|
wal_acceptors: Vec::new(),
|
||||||
|
pageserver: Arc::new(PageServerNode {
|
||||||
|
env: local_env.clone(),
|
||||||
|
kill_on_exit: true,
|
||||||
|
listen_address: None,
|
||||||
|
}),
|
||||||
|
test_done: AtomicBool::new(false),
|
||||||
|
repopath: repopath,
|
||||||
|
};
|
||||||
|
cplane.pageserver.start().unwrap();
|
||||||
|
|
||||||
|
const WAL_ACCEPTOR_PORT: usize = 54321;
|
||||||
|
|
||||||
|
for i in 0..redundancy {
|
||||||
|
let wal_acceptor = WalAcceptorNode {
|
||||||
|
listen: format!("127.0.0.1:{}", WAL_ACCEPTOR_PORT + i)
|
||||||
|
.parse()
|
||||||
|
.unwrap(),
|
||||||
|
data_dir: local_env.repo_path.join(format!("wal_acceptor_{}", i)),
|
||||||
|
env: local_env.clone(),
|
||||||
|
};
|
||||||
|
wal_acceptor.init();
|
||||||
|
wal_acceptor.start();
|
||||||
|
cplane.wal_acceptors.push(wal_acceptor);
|
||||||
|
}
|
||||||
|
cplane
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn stop(&self) {
|
||||||
|
self.test_done.store(true, Ordering::Relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_wal_acceptor_conn_info(&self) -> String {
|
||||||
|
self.wal_acceptors
|
||||||
|
.iter()
|
||||||
|
.map(|wa| wa.listen.to_string())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(",")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_running(&self) -> bool {
|
||||||
|
self.test_done.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for TestStorageControlPlane {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
self.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Control routines for pageserver.
|
||||||
|
//
|
||||||
|
// Used in CLI and tests.
|
||||||
|
//
|
||||||
|
pub struct PageServerNode {
|
||||||
|
kill_on_exit: bool,
|
||||||
|
listen_address: Option<SocketAddr>,
|
||||||
|
pub env: LocalEnv,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PageServerNode {
|
||||||
|
pub fn from_env(env: &LocalEnv) -> PageServerNode {
|
||||||
|
PageServerNode {
|
||||||
|
kill_on_exit: false,
|
||||||
|
listen_address: None, // default
|
||||||
|
env: env.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn address(&self) -> SocketAddr {
|
||||||
|
match self.listen_address {
|
||||||
|
Some(addr) => addr,
|
||||||
|
None => "127.0.0.1:64000".parse().unwrap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn repo_path(&self) -> PathBuf {
|
||||||
|
self.env.repo_path.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pid_file(&self) -> PathBuf {
|
||||||
|
self.env.repo_path.join("pageserver.pid")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn start(&self) -> Result<()> {
|
||||||
|
println!(
|
||||||
|
"Starting pageserver at '{}' in {}",
|
||||||
|
self.address(),
|
||||||
|
self.repo_path().display()
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut cmd = Command::new(self.env.zenith_distrib_dir.join("pageserver"));
|
||||||
|
cmd.args(&["-l", self.address().to_string().as_str()])
|
||||||
|
.arg("-d")
|
||||||
|
.env_clear()
|
||||||
|
.env("RUST_BACKTRACE", "1")
|
||||||
|
.env("ZENITH_REPO_DIR", self.repo_path())
|
||||||
|
.env("PATH", self.env.pg_bin_dir().to_str().unwrap()) // needs postres-wal-redo binary
|
||||||
|
.env("LD_LIBRARY_PATH", self.env.pg_lib_dir().to_str().unwrap());
|
||||||
|
|
||||||
|
if !cmd.status()?.success() {
|
||||||
|
anyhow::bail!(
|
||||||
|
"Pageserver failed to start. See '{}' for details.",
|
||||||
|
self.repo_path().join("pageserver.log").display()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// It takes a while for the page server to start up. Wait until it is
|
||||||
|
// open for business.
|
||||||
|
for retries in 1..15 {
|
||||||
|
let client = self.page_server_psql_client();
|
||||||
|
if client.is_ok() {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
println!("page server not responding yet, retrying ({})...", retries);
|
||||||
|
thread::sleep(Duration::from_secs(1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn stop(&self) -> Result<()> {
|
||||||
|
let pidfile = self.pid_file();
|
||||||
|
let pid = read_pidfile(&pidfile)?;
|
||||||
|
|
||||||
|
let status = Command::new("kill")
|
||||||
|
.arg(&pid)
|
||||||
|
.env_clear()
|
||||||
|
.status()
|
||||||
|
.expect("failed to execute kill");
|
||||||
|
|
||||||
|
if !status.success() {
|
||||||
|
anyhow::bail!("Failed to kill pageserver with pid {}", pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
// await for pageserver stop
|
||||||
|
for _ in 0..5 {
|
||||||
|
let stream = TcpStream::connect(self.address());
|
||||||
|
if let Err(_e) = stream {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
println!("Stopping pageserver on {}", self.address());
|
||||||
|
thread::sleep(Duration::from_secs(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ok, we failed to stop pageserver, let's panic
|
||||||
|
if !status.success() {
|
||||||
|
anyhow::bail!("Failed to stop pageserver with pid {}", pid);
|
||||||
|
} else {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
|
||||||
|
let connstring = format!(
|
||||||
|
"host={} port={} dbname={} user={}",
|
||||||
|
self.address().ip(),
|
||||||
|
self.address().port(),
|
||||||
|
"no_db",
|
||||||
|
"no_user",
|
||||||
|
);
|
||||||
|
let mut client = Client::connect(connstring.as_str(), NoTls).unwrap();
|
||||||
|
|
||||||
|
println!("Pageserver query: '{}'", sql);
|
||||||
|
client.simple_query(sql).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn page_server_psql_client(
|
||||||
|
&self,
|
||||||
|
) -> std::result::Result<postgres::Client, postgres::Error> {
|
||||||
|
let connstring = format!(
|
||||||
|
"host={} port={} dbname={} user={}",
|
||||||
|
self.address().ip(),
|
||||||
|
self.address().port(),
|
||||||
|
"no_db",
|
||||||
|
"no_user",
|
||||||
|
);
|
||||||
|
Client::connect(connstring.as_str(), NoTls)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for PageServerNode {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if self.kill_on_exit {
|
||||||
|
let _ = self.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Control routines for WalAcceptor.
|
||||||
|
//
|
||||||
|
// Now used only in test setups.
|
||||||
|
//
|
||||||
|
pub struct WalAcceptorNode {
|
||||||
|
listen: SocketAddr,
|
||||||
|
data_dir: PathBuf,
|
||||||
|
env: LocalEnv,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WalAcceptorNode {
|
||||||
|
pub fn init(&self) {
|
||||||
|
if self.data_dir.exists() {
|
||||||
|
fs::remove_dir_all(self.data_dir.clone()).unwrap();
|
||||||
|
}
|
||||||
|
fs::create_dir_all(self.data_dir.clone()).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn start(&self) {
|
||||||
|
println!(
|
||||||
|
"Starting wal_acceptor in {} listening '{}'",
|
||||||
|
self.data_dir.to_str().unwrap(),
|
||||||
|
self.listen
|
||||||
|
);
|
||||||
|
|
||||||
|
let status = Command::new(self.env.zenith_distrib_dir.join("wal_acceptor"))
|
||||||
|
.args(&["-D", self.data_dir.to_str().unwrap()])
|
||||||
|
.args(&["-l", self.listen.to_string().as_str()])
|
||||||
|
.args(&["--systemid", &self.env.systemid.to_string()])
|
||||||
|
// Tell page server it can receive WAL from this WAL safekeeper
|
||||||
|
// FIXME: If there are multiple safekeepers, they will all inform
|
||||||
|
// the page server. Only the last "notification" will stay in effect.
|
||||||
|
// So it's pretty random which safekeeper the page server will connect to
|
||||||
|
.args(&["--pageserver", "127.0.0.1:64000"])
|
||||||
|
.arg("-d")
|
||||||
|
.arg("-n")
|
||||||
|
.status()
|
||||||
|
.expect("failed to start wal_acceptor");
|
||||||
|
|
||||||
|
if !status.success() {
|
||||||
|
panic!("wal_acceptor start failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn stop(&self) -> std::result::Result<(), io::Error> {
|
||||||
|
println!("Stopping wal acceptor on {}", self.listen);
|
||||||
|
let pidfile = self.data_dir.join("wal_acceptor.pid");
|
||||||
|
let pid = read_pidfile(&pidfile)?;
|
||||||
|
// Ignores any failures when running this command
|
||||||
|
let _status = Command::new("kill")
|
||||||
|
.arg(pid)
|
||||||
|
.env_clear()
|
||||||
|
.status()
|
||||||
|
.expect("failed to execute kill");
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for WalAcceptorNode {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
self.stop().unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
pub struct WalProposerNode {
|
||||||
|
pub pid: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WalProposerNode {
|
||||||
|
pub fn stop(&self) {
|
||||||
|
let status = Command::new("kill")
|
||||||
|
.arg(self.pid.to_string())
|
||||||
|
.env_clear()
|
||||||
|
.status()
|
||||||
|
.expect("failed to execute kill");
|
||||||
|
|
||||||
|
if !status.success() {
|
||||||
|
panic!("kill start failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for WalProposerNode {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
self.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
pub fn regress_check(pg: &PostgresNode) {
|
||||||
|
pg.safe_psql("postgres", "CREATE DATABASE regression");
|
||||||
|
|
||||||
|
let regress_run_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tmp_check/regress");
|
||||||
|
fs::create_dir_all(regress_run_path.clone()).unwrap();
|
||||||
|
std::env::set_current_dir(regress_run_path).unwrap();
|
||||||
|
|
||||||
|
let regress_build_path =
|
||||||
|
Path::new(env!("CARGO_MANIFEST_DIR")).join("../tmp_install/build/src/test/regress");
|
||||||
|
let regress_src_path =
|
||||||
|
Path::new(env!("CARGO_MANIFEST_DIR")).join("../vendor/postgres/src/test/regress");
|
||||||
|
|
||||||
|
let _regress_check = Command::new(regress_build_path.join("pg_regress"))
|
||||||
|
.args(&[
|
||||||
|
"--bindir=''",
|
||||||
|
"--use-existing",
|
||||||
|
format!("--bindir={}", pg.env.pg_bin_dir().to_str().unwrap()).as_str(),
|
||||||
|
format!("--dlpath={}", regress_build_path.to_str().unwrap()).as_str(),
|
||||||
|
format!(
|
||||||
|
"--schedule={}",
|
||||||
|
regress_src_path.join("parallel_schedule").to_str().unwrap()
|
||||||
|
)
|
||||||
|
.as_str(),
|
||||||
|
format!("--inputdir={}", regress_src_path.to_str().unwrap()).as_str(),
|
||||||
|
])
|
||||||
|
.env_clear()
|
||||||
|
.env("LD_LIBRARY_PATH", pg.env.pg_lib_dir().to_str().unwrap())
|
||||||
|
.env("PGHOST", pg.address.ip().to_string())
|
||||||
|
.env("PGPORT", pg.address.port().to_string())
|
||||||
|
.env("PGUSER", pg.whoami())
|
||||||
|
.status()
|
||||||
|
.expect("pg_regress failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read a PID file
|
||||||
|
///
|
||||||
|
/// This should contain an unsigned integer, but we return it as a String
|
||||||
|
/// because our callers only want to pass it back into a subcommand.
|
||||||
|
fn read_pidfile(pidfile: &Path) -> std::result::Result<String, io::Error> {
|
||||||
|
fs::read_to_string(pidfile).map_err(|err| {
|
||||||
|
eprintln!("failed to read pidfile {:?}: {:?}", pidfile, err);
|
||||||
|
err
|
||||||
|
})
|
||||||
|
}
|
||||||
1
integration_tests/.gitignore
vendored
Normal file
1
integration_tests/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
tmp_check/
|
||||||
@@ -9,8 +9,9 @@ edition = "2018"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
rand = "0.8.3"
|
rand = "0.8.3"
|
||||||
postgres = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
|
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
tokio-postgres = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
|
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
|
|
||||||
pageserver = { path = "../pageserver" }
|
pageserver = { path = "../pageserver" }
|
||||||
walkeeper = { path = "../walkeeper" }
|
walkeeper = { path = "../walkeeper" }
|
||||||
|
control_plane = { path = "../control_plane" }
|
||||||
|
|||||||
@@ -1,663 +0,0 @@
|
|||||||
//
|
|
||||||
// Local control plane.
|
|
||||||
//
|
|
||||||
// Can start, cofigure and stop postgres instances running as a local processes.
|
|
||||||
//
|
|
||||||
// Intended to be used in integration tests and in CLI tools for
|
|
||||||
// local installations.
|
|
||||||
//
|
|
||||||
|
|
||||||
use std::fs::File;
|
|
||||||
use std::fs::{self, OpenOptions};
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::process::Command;
|
|
||||||
use std::str;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::{
|
|
||||||
io::Write,
|
|
||||||
net::{IpAddr, Ipv4Addr, SocketAddr},
|
|
||||||
};
|
|
||||||
|
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use postgres::{Client, NoTls};
|
|
||||||
|
|
||||||
lazy_static! {
|
|
||||||
// postgres would be there if it was build by 'make postgres' here in the repo
|
|
||||||
pub static ref PG_BIN_DIR : PathBuf = Path::new(env!("CARGO_MANIFEST_DIR"))
|
|
||||||
.join("../tmp_install/bin");
|
|
||||||
pub static ref PG_LIB_DIR : PathBuf = Path::new(env!("CARGO_MANIFEST_DIR"))
|
|
||||||
.join("../tmp_install/lib");
|
|
||||||
|
|
||||||
pub static ref BIN_DIR : PathBuf = cargo_bin_dir();
|
|
||||||
|
|
||||||
pub static ref TEST_WORKDIR : PathBuf = Path::new(env!("CARGO_MANIFEST_DIR"))
|
|
||||||
.join("tmp_check");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find the directory where the binaries were put (i.e. target/debug/)
|
|
||||||
pub fn cargo_bin_dir() -> PathBuf {
|
|
||||||
let mut pathbuf = std::env::current_exe().ok().unwrap();
|
|
||||||
|
|
||||||
pathbuf.pop();
|
|
||||||
if pathbuf.ends_with("deps") {
|
|
||||||
pathbuf.pop();
|
|
||||||
}
|
|
||||||
|
|
||||||
return pathbuf;
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// I'm intendedly modelling storage and compute control planes as a separate entities
|
|
||||||
// as it is closer to the actual setup.
|
|
||||||
//
|
|
||||||
pub struct StorageControlPlane {
|
|
||||||
pub wal_acceptors: Vec<WalAcceptorNode>,
|
|
||||||
pub page_servers: Vec<PageServerNode>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl StorageControlPlane {
|
|
||||||
// postgres <-> page_server
|
|
||||||
pub fn one_page_server() -> StorageControlPlane {
|
|
||||||
let mut cplane = StorageControlPlane {
|
|
||||||
wal_acceptors: Vec::new(),
|
|
||||||
page_servers: Vec::new(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let pserver = PageServerNode {
|
|
||||||
page_service_addr: "127.0.0.1:65200".parse().unwrap(),
|
|
||||||
data_dir: TEST_WORKDIR.join("pageserver"),
|
|
||||||
};
|
|
||||||
pserver.init();
|
|
||||||
pserver.start();
|
|
||||||
|
|
||||||
cplane.page_servers.push(pserver);
|
|
||||||
cplane
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn fault_tolerant(redundancy: usize) -> StorageControlPlane {
|
|
||||||
let mut cplane = StorageControlPlane {
|
|
||||||
wal_acceptors: Vec::new(),
|
|
||||||
page_servers: Vec::new(),
|
|
||||||
};
|
|
||||||
const WAL_ACCEPTOR_PORT: usize = 54321;
|
|
||||||
|
|
||||||
for i in 0..redundancy {
|
|
||||||
let wal_acceptor = WalAcceptorNode {
|
|
||||||
listen: format!("127.0.0.1:{}", WAL_ACCEPTOR_PORT + i)
|
|
||||||
.parse()
|
|
||||||
.unwrap(),
|
|
||||||
data_dir: TEST_WORKDIR.join(format!("wal_acceptor_{}", i)),
|
|
||||||
};
|
|
||||||
wal_acceptor.init();
|
|
||||||
wal_acceptor.start();
|
|
||||||
cplane.wal_acceptors.push(wal_acceptor);
|
|
||||||
}
|
|
||||||
cplane
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn stop(&self) {
|
|
||||||
for wa in self.wal_acceptors.iter() {
|
|
||||||
wa.stop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// // postgres <-> wal_acceptor x3 <-> page_server
|
|
||||||
// fn local(&mut self) -> StorageControlPlane {
|
|
||||||
// }
|
|
||||||
|
|
||||||
pub fn page_server_addr(&self) -> &SocketAddr {
|
|
||||||
&self.page_servers[0].page_service_addr
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_wal_acceptor_conn_info(&self) -> String {
|
|
||||||
self.wal_acceptors
|
|
||||||
.iter()
|
|
||||||
.map(|wa| wa.listen.to_string().to_string())
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
.join(",")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn page_server_psql(&self, sql: &str) -> Vec<postgres::SimpleQueryMessage> {
|
|
||||||
let addr = &self.page_servers[0].page_service_addr;
|
|
||||||
|
|
||||||
let connstring = format!(
|
|
||||||
"host={} port={} dbname={} user={}",
|
|
||||||
addr.ip(),
|
|
||||||
addr.port(),
|
|
||||||
"no_db",
|
|
||||||
"no_user",
|
|
||||||
);
|
|
||||||
let mut client = Client::connect(connstring.as_str(), NoTls).unwrap();
|
|
||||||
|
|
||||||
println!("Pageserver query: '{}'", sql);
|
|
||||||
client.simple_query(sql).unwrap()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for StorageControlPlane {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
self.stop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct PageServerNode {
|
|
||||||
page_service_addr: SocketAddr,
|
|
||||||
data_dir: PathBuf,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PageServerNode {
|
|
||||||
// TODO: method to force redo on a specific relation
|
|
||||||
|
|
||||||
// TODO: make wal-redo-postgres workable without data directory?
|
|
||||||
pub fn init(&self) {
|
|
||||||
fs::create_dir_all(self.data_dir.clone()).unwrap();
|
|
||||||
|
|
||||||
let datadir_path = self.data_dir.join("wal_redo_pgdata");
|
|
||||||
fs::remove_dir_all(datadir_path.to_str().unwrap()).ok();
|
|
||||||
|
|
||||||
let initdb = Command::new(PG_BIN_DIR.join("initdb"))
|
|
||||||
.args(&["-D", datadir_path.to_str().unwrap()])
|
|
||||||
.arg("-N")
|
|
||||||
.arg("--no-instructions")
|
|
||||||
.env_clear()
|
|
||||||
.env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap())
|
|
||||||
.status()
|
|
||||||
.expect("failed to execute initdb");
|
|
||||||
if !initdb.success() {
|
|
||||||
panic!("initdb failed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn start(&self) {
|
|
||||||
println!("Starting pageserver at '{}'", self.page_service_addr);
|
|
||||||
|
|
||||||
let status = Command::new(BIN_DIR.join("pageserver"))
|
|
||||||
.args(&["-D", self.data_dir.to_str().unwrap()])
|
|
||||||
.args(&["-l", self.page_service_addr.to_string().as_str()])
|
|
||||||
.arg("-d")
|
|
||||||
.arg("--skip-recovery")
|
|
||||||
.env_clear()
|
|
||||||
.env("PATH", PG_BIN_DIR.to_str().unwrap()) // path to postres-wal-redo binary
|
|
||||||
.status()
|
|
||||||
.expect("failed to start pageserver");
|
|
||||||
|
|
||||||
if !status.success() {
|
|
||||||
panic!("pageserver start failed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn stop(&self) {
|
|
||||||
let pidfile = self.data_dir.join("pageserver.pid");
|
|
||||||
let pid = fs::read_to_string(pidfile).unwrap();
|
|
||||||
let status = Command::new("kill")
|
|
||||||
.arg(pid)
|
|
||||||
.env_clear()
|
|
||||||
.status()
|
|
||||||
.expect("failed to execute kill");
|
|
||||||
|
|
||||||
if !status.success() {
|
|
||||||
panic!("kill start failed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for PageServerNode {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
self.stop();
|
|
||||||
// fs::remove_dir_all(self.data_dir.clone()).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct WalAcceptorNode {
|
|
||||||
listen: SocketAddr,
|
|
||||||
data_dir: PathBuf,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl WalAcceptorNode {
|
|
||||||
pub fn init(&self) {
|
|
||||||
if self.data_dir.exists() {
|
|
||||||
fs::remove_dir_all(self.data_dir.clone()).unwrap();
|
|
||||||
}
|
|
||||||
fs::create_dir_all(self.data_dir.clone()).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn start(&self) {
|
|
||||||
println!(
|
|
||||||
"Starting wal_acceptor in {} listening '{}'",
|
|
||||||
self.data_dir.to_str().unwrap(),
|
|
||||||
self.listen
|
|
||||||
);
|
|
||||||
|
|
||||||
let status = Command::new(BIN_DIR.join("wal_acceptor"))
|
|
||||||
.args(&["-D", self.data_dir.to_str().unwrap()])
|
|
||||||
.args(&["-l", self.listen.to_string().as_str()])
|
|
||||||
.arg("-d")
|
|
||||||
.arg("-n")
|
|
||||||
.status()
|
|
||||||
.expect("failed to start wal_acceptor");
|
|
||||||
|
|
||||||
if !status.success() {
|
|
||||||
panic!("wal_acceptor start failed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn stop(&self) {
|
|
||||||
let pidfile = self.data_dir.join("wal_acceptor.pid");
|
|
||||||
if let Ok(pid) = fs::read_to_string(pidfile) {
|
|
||||||
let _status = Command::new("kill")
|
|
||||||
.arg(pid)
|
|
||||||
.env_clear()
|
|
||||||
.status()
|
|
||||||
.expect("failed to execute kill");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for WalAcceptorNode {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
self.stop();
|
|
||||||
// fs::remove_dir_all(self.data_dir.clone()).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
//
|
|
||||||
// ComputeControlPlane
|
|
||||||
//
|
|
||||||
pub struct ComputeControlPlane<'a> {
|
|
||||||
pg_bin_dir: PathBuf,
|
|
||||||
work_dir: PathBuf,
|
|
||||||
last_assigned_port: u16,
|
|
||||||
storage_cplane: &'a StorageControlPlane,
|
|
||||||
nodes: Vec<Arc<PostgresNode>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ComputeControlPlane<'_> {
|
|
||||||
pub fn local(storage_cplane: &StorageControlPlane) -> ComputeControlPlane {
|
|
||||||
ComputeControlPlane {
|
|
||||||
pg_bin_dir: PG_BIN_DIR.to_path_buf(),
|
|
||||||
work_dir: TEST_WORKDIR.to_path_buf(),
|
|
||||||
last_assigned_port: 65431,
|
|
||||||
storage_cplane: storage_cplane,
|
|
||||||
nodes: Vec::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: check port availability and
|
|
||||||
fn get_port(&mut self) -> u16 {
|
|
||||||
let port = self.last_assigned_port + 1;
|
|
||||||
self.last_assigned_port += 1;
|
|
||||||
port
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn new_vanilla_node<'a>(&mut self) -> &Arc<PostgresNode> {
|
|
||||||
// allocate new node entry with generated port
|
|
||||||
let node_id = self.nodes.len() + 1;
|
|
||||||
let node = PostgresNode {
|
|
||||||
_node_id: node_id,
|
|
||||||
port: self.get_port(),
|
|
||||||
ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
|
|
||||||
pgdata: self.work_dir.join(format!("compute/pg{}", node_id)),
|
|
||||||
pg_bin_dir: self.pg_bin_dir.clone(),
|
|
||||||
};
|
|
||||||
self.nodes.push(Arc::new(node));
|
|
||||||
let node = self.nodes.last().unwrap();
|
|
||||||
|
|
||||||
// initialize data directory
|
|
||||||
fs::remove_dir_all(node.pgdata.to_str().unwrap()).ok();
|
|
||||||
let initdb_path = self.pg_bin_dir.join("initdb");
|
|
||||||
println!("initdb_path: {}", initdb_path.to_str().unwrap());
|
|
||||||
let initdb = Command::new(initdb_path)
|
|
||||||
.args(&["-D", node.pgdata.to_str().unwrap()])
|
|
||||||
.arg("-N")
|
|
||||||
.arg("--no-instructions")
|
|
||||||
.env_clear()
|
|
||||||
.env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap())
|
|
||||||
.status()
|
|
||||||
.expect("failed to execute initdb");
|
|
||||||
|
|
||||||
if !initdb.success() {
|
|
||||||
panic!("initdb failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
// // allow local replication connections
|
|
||||||
// node.append_conf("pg_hba.conf", format!("\
|
|
||||||
// host replication all {}/32 sspi include_realm=1 map=regress\n\
|
|
||||||
// ", node.ip).as_str());
|
|
||||||
|
|
||||||
// listen for selected port
|
|
||||||
node.append_conf(
|
|
||||||
"postgresql.conf",
|
|
||||||
format!(
|
|
||||||
"\
|
|
||||||
max_wal_senders = 10\n\
|
|
||||||
max_replication_slots = 10\n\
|
|
||||||
hot_standby = on\n\
|
|
||||||
shared_buffers = 1MB\n\
|
|
||||||
max_connections = 100\n\
|
|
||||||
wal_level = replica\n\
|
|
||||||
listen_addresses = '{address}'\n\
|
|
||||||
port = {port}\n\
|
|
||||||
",
|
|
||||||
address = node.ip,
|
|
||||||
port = node.port
|
|
||||||
)
|
|
||||||
.as_str(),
|
|
||||||
);
|
|
||||||
|
|
||||||
node
|
|
||||||
}
|
|
||||||
|
|
||||||
// Init compute node without files, only datadir structure
|
|
||||||
// use initdb --compute-node flag and GUC 'computenode_mode'
|
|
||||||
// to distinguish the node
|
|
||||||
pub fn new_minimal_node(&mut self) -> &PostgresNode {
|
|
||||||
// allocate new node entry with generated port
|
|
||||||
let node_id = self.nodes.len() + 1;
|
|
||||||
let node = PostgresNode {
|
|
||||||
_node_id: node_id,
|
|
||||||
port: self.get_port(),
|
|
||||||
ip: IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
|
|
||||||
pgdata: self.work_dir.join(format!("compute/pg{}", node_id)),
|
|
||||||
pg_bin_dir: self.pg_bin_dir.clone(),
|
|
||||||
};
|
|
||||||
self.nodes.push(Arc::new(node));
|
|
||||||
let node = self.nodes.last().unwrap();
|
|
||||||
|
|
||||||
// initialize data directory w/o files
|
|
||||||
fs::remove_dir_all(node.pgdata.to_str().unwrap()).ok();
|
|
||||||
let initdb_path = self.pg_bin_dir.join("initdb");
|
|
||||||
println!("initdb_path: {}", initdb_path.to_str().unwrap());
|
|
||||||
let initdb = Command::new(initdb_path)
|
|
||||||
.args(&["-D", node.pgdata.to_str().unwrap()])
|
|
||||||
.arg("-N")
|
|
||||||
.arg("--no-instructions")
|
|
||||||
.arg("--compute-node")
|
|
||||||
.env_clear()
|
|
||||||
.env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap())
|
|
||||||
.status()
|
|
||||||
.expect("failed to execute initdb");
|
|
||||||
|
|
||||||
if !initdb.success() {
|
|
||||||
panic!("initdb failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
// listen for selected port
|
|
||||||
node.append_conf(
|
|
||||||
"postgresql.conf",
|
|
||||||
format!(
|
|
||||||
"\
|
|
||||||
max_wal_senders = 10\n\
|
|
||||||
max_replication_slots = 10\n\
|
|
||||||
hot_standby = on\n\
|
|
||||||
shared_buffers = 1MB\n\
|
|
||||||
max_connections = 100\n\
|
|
||||||
wal_level = replica\n\
|
|
||||||
listen_addresses = '{address}'\n\
|
|
||||||
port = {port}\n\
|
|
||||||
computenode_mode = true\n\
|
|
||||||
",
|
|
||||||
address = node.ip,
|
|
||||||
port = node.port
|
|
||||||
)
|
|
||||||
.as_str(),
|
|
||||||
);
|
|
||||||
|
|
||||||
node
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn new_node(&mut self) -> Arc<PostgresNode> {
|
|
||||||
let storage_cplane = self.storage_cplane;
|
|
||||||
let node = self.new_vanilla_node();
|
|
||||||
|
|
||||||
let pserver = storage_cplane.page_server_addr();
|
|
||||||
|
|
||||||
// Configure that node to take pages from pageserver
|
|
||||||
node.append_conf(
|
|
||||||
"postgresql.conf",
|
|
||||||
format!(
|
|
||||||
"\
|
|
||||||
page_server_connstring = 'host={} port={}'\n\
|
|
||||||
",
|
|
||||||
pserver.ip(),
|
|
||||||
pserver.port()
|
|
||||||
)
|
|
||||||
.as_str(),
|
|
||||||
);
|
|
||||||
|
|
||||||
node.clone()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn new_master_node(&mut self) -> Arc<PostgresNode> {
|
|
||||||
let node = self.new_vanilla_node();
|
|
||||||
|
|
||||||
node.append_conf(
|
|
||||||
"postgresql.conf",
|
|
||||||
"synchronous_standby_names = 'safekeeper_proxy'\n\
|
|
||||||
",
|
|
||||||
);
|
|
||||||
node.clone()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
pub struct WalProposerNode {
|
|
||||||
pid: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl WalProposerNode {
|
|
||||||
pub fn stop(&self) {
|
|
||||||
let status = Command::new("kill")
|
|
||||||
.arg(self.pid.to_string())
|
|
||||||
.env_clear()
|
|
||||||
.status()
|
|
||||||
.expect("failed to execute kill");
|
|
||||||
|
|
||||||
if !status.success() {
|
|
||||||
panic!("kill start failed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for WalProposerNode {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
self.stop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
pub struct PostgresNode {
|
|
||||||
_node_id: usize,
|
|
||||||
pub port: u16,
|
|
||||||
pub ip: IpAddr,
|
|
||||||
pgdata: PathBuf,
|
|
||||||
pg_bin_dir: PathBuf,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PostgresNode {
|
|
||||||
pub fn append_conf(&self, config: &str, opts: &str) {
|
|
||||||
OpenOptions::new()
|
|
||||||
.append(true)
|
|
||||||
.open(self.pgdata.join(config).to_str().unwrap())
|
|
||||||
.unwrap()
|
|
||||||
.write_all(opts.as_bytes())
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
fn pg_ctl(&self, args: &[&str], check_ok: bool) {
|
|
||||||
let pg_ctl_path = self.pg_bin_dir.join("pg_ctl");
|
|
||||||
let pg_ctl = Command::new(pg_ctl_path)
|
|
||||||
.args(
|
|
||||||
[
|
|
||||||
&[
|
|
||||||
"-D",
|
|
||||||
self.pgdata.to_str().unwrap(),
|
|
||||||
"-l",
|
|
||||||
self.pgdata.join("log").to_str().unwrap(),
|
|
||||||
],
|
|
||||||
args,
|
|
||||||
]
|
|
||||||
.concat(),
|
|
||||||
)
|
|
||||||
.env_clear()
|
|
||||||
.env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap())
|
|
||||||
.status()
|
|
||||||
.expect("failed to execute pg_ctl");
|
|
||||||
|
|
||||||
if check_ok && !pg_ctl.success() {
|
|
||||||
panic!("pg_ctl failed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn start(&self, storage_cplane: &StorageControlPlane) {
|
|
||||||
if storage_cplane.page_servers.len() != 0 {
|
|
||||||
let _res =
|
|
||||||
storage_cplane.page_server_psql(format!("callmemaybe {}", self.connstr()).as_str());
|
|
||||||
}
|
|
||||||
println!("Starting postgres node at '{}'", self.connstr());
|
|
||||||
self.pg_ctl(&["start"], true);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn restart(&self) {
|
|
||||||
self.pg_ctl(&["restart"], true);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn stop(&self) {
|
|
||||||
self.pg_ctl(&["-m", "immediate", "stop"], true);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn connstr(&self) -> String {
|
|
||||||
format!("host={} port={} user={}", self.ip, self.port, self.whoami())
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: cache that in control plane
|
|
||||||
pub fn whoami(&self) -> String {
|
|
||||||
let output = Command::new("whoami")
|
|
||||||
.output()
|
|
||||||
.expect("failed to execute whoami");
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
panic!("whoami failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
String::from_utf8(output.stdout).unwrap().trim().to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn safe_psql(&self, db: &str, sql: &str) -> Vec<tokio_postgres::Row> {
|
|
||||||
let connstring = format!(
|
|
||||||
"host={} port={} dbname={} user={}",
|
|
||||||
self.ip,
|
|
||||||
self.port,
|
|
||||||
db,
|
|
||||||
self.whoami()
|
|
||||||
);
|
|
||||||
let mut client = Client::connect(connstring.as_str(), NoTls).unwrap();
|
|
||||||
|
|
||||||
println!("Running {}", sql);
|
|
||||||
client.query(sql, &[]).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn open_psql(&self, db: &str) -> Client {
|
|
||||||
let connstring = format!(
|
|
||||||
"host={} port={} dbname={} user={}",
|
|
||||||
self.ip,
|
|
||||||
self.port,
|
|
||||||
db,
|
|
||||||
self.whoami()
|
|
||||||
);
|
|
||||||
Client::connect(connstring.as_str(), NoTls).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_pgdata(&self) -> Option<&str> {
|
|
||||||
self.pgdata.to_str()
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Create stub controlfile and respective xlog to start computenode */
|
|
||||||
pub fn setup_controlfile(&self) {
|
|
||||||
let filepath = format!("{}/global/pg_control", self.pgdata.to_str().unwrap());
|
|
||||||
|
|
||||||
{
|
|
||||||
File::create(filepath).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
let pg_resetwal_path = self.pg_bin_dir.join("pg_resetwal");
|
|
||||||
|
|
||||||
let pg_resetwal = Command::new(pg_resetwal_path)
|
|
||||||
.args(&["-D", self.pgdata.to_str().unwrap()])
|
|
||||||
.arg("-f")
|
|
||||||
// TODO probably we will have to modify pg_resetwal
|
|
||||||
// .arg("--compute-node")
|
|
||||||
.status()
|
|
||||||
.expect("failed to execute pg_resetwal");
|
|
||||||
|
|
||||||
if !pg_resetwal.success() {
|
|
||||||
panic!("pg_resetwal failed");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn start_proxy(&self, wal_acceptors: String) -> WalProposerNode {
|
|
||||||
let proxy_path = PG_BIN_DIR.join("safekeeper_proxy");
|
|
||||||
match Command::new(proxy_path.as_path())
|
|
||||||
.args(&["-s", &wal_acceptors])
|
|
||||||
.args(&["-h", &self.ip.to_string()])
|
|
||||||
.args(&["-p", &self.port.to_string()])
|
|
||||||
.arg("-v")
|
|
||||||
.stderr(File::create(TEST_WORKDIR.join("safepkeeper_proxy.log")).unwrap())
|
|
||||||
.spawn()
|
|
||||||
{
|
|
||||||
Ok(child) => WalProposerNode { pid: child.id() },
|
|
||||||
Err(e) => panic!("Failed to launch {:?}: {}", proxy_path, e),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO
|
|
||||||
pub fn pg_bench() {}
|
|
||||||
pub fn pg_regress() {}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Drop for PostgresNode {
|
|
||||||
// destructor to clean up state after test is done
|
|
||||||
// XXX: we may detect failed test by setting some flag in catch_unwind()
|
|
||||||
// and checking it here. But let just clean datadirs on start.
|
|
||||||
fn drop(&mut self) {
|
|
||||||
self.stop();
|
|
||||||
// fs::remove_dir_all(self.pgdata.clone()).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn regress_check(pg: &PostgresNode) {
|
|
||||||
pg.safe_psql("postgres", "CREATE DATABASE regression");
|
|
||||||
|
|
||||||
let regress_run_path = Path::new(env!("CARGO_MANIFEST_DIR")).join("tmp_check/regress");
|
|
||||||
fs::create_dir_all(regress_run_path.clone()).unwrap();
|
|
||||||
std::env::set_current_dir(regress_run_path).unwrap();
|
|
||||||
|
|
||||||
let regress_build_path =
|
|
||||||
Path::new(env!("CARGO_MANIFEST_DIR")).join("../tmp_install/build/src/test/regress");
|
|
||||||
let regress_src_path =
|
|
||||||
Path::new(env!("CARGO_MANIFEST_DIR")).join("../vendor/postgres/src/test/regress");
|
|
||||||
|
|
||||||
let _regress_check = Command::new(regress_build_path.join("pg_regress"))
|
|
||||||
.args(&[
|
|
||||||
"--bindir=''",
|
|
||||||
"--use-existing",
|
|
||||||
format!("--bindir={}", PG_BIN_DIR.to_str().unwrap()).as_str(),
|
|
||||||
format!("--dlpath={}", regress_build_path.to_str().unwrap()).as_str(),
|
|
||||||
format!(
|
|
||||||
"--schedule={}",
|
|
||||||
regress_src_path.join("parallel_schedule").to_str().unwrap()
|
|
||||||
)
|
|
||||||
.as_str(),
|
|
||||||
format!("--inputdir={}", regress_src_path.to_str().unwrap()).as_str(),
|
|
||||||
])
|
|
||||||
.env_clear()
|
|
||||||
.env("LD_LIBRARY_PATH", PG_LIB_DIR.to_str().unwrap())
|
|
||||||
.env("PGPORT", pg.port.to_string())
|
|
||||||
.env("PGUSER", pg.whoami())
|
|
||||||
.env("PGHOST", pg.ip.to_string())
|
|
||||||
.status()
|
|
||||||
.expect("pg_regress failed");
|
|
||||||
}
|
|
||||||
@@ -1,7 +1,11 @@
|
|||||||
// test node resettlement to an empty datadir
|
// test node resettlement to an empty datadir
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
/*
|
||||||
#[test]
|
#[test]
|
||||||
fn test_resettlement() {}
|
fn test_resettlement() {}
|
||||||
|
|
||||||
// test seq scan of everythin after restart
|
// test seq scan of everythin after restart
|
||||||
#[test]
|
#[test]
|
||||||
fn test_cold_seqscan() {}
|
fn test_cold_seqscan() {}
|
||||||
|
*/
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
|
// TODO
|
||||||
|
/*
|
||||||
#[test]
|
#[test]
|
||||||
fn test_actions() {}
|
fn test_actions() {}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_regress() {}
|
fn test_regress() {}
|
||||||
|
*/
|
||||||
|
|||||||
@@ -1,23 +1,24 @@
|
|||||||
#[allow(dead_code)]
|
// mod control_plane;
|
||||||
mod control_plane;
|
use control_plane::compute::ComputeControlPlane;
|
||||||
|
use control_plane::local_env;
|
||||||
use control_plane::ComputeControlPlane;
|
use control_plane::local_env::PointInTime;
|
||||||
use control_plane::StorageControlPlane;
|
use control_plane::storage::TestStorageControlPlane;
|
||||||
|
|
||||||
// XXX: force all redo at the end
|
// XXX: force all redo at the end
|
||||||
// -- restart + seqscan won't read deleted stuff
|
// -- restart + seqscan won't read deleted stuff
|
||||||
// -- pageserver api endpoint to check all rels
|
// -- pageserver api endpoint to check all rels
|
||||||
|
|
||||||
// Handcrafted cases with wal records that are (were) problematic for redo.
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_redo_cases() {
|
fn test_redo_cases() {
|
||||||
|
let local_env = local_env::test_env("test_redo_cases");
|
||||||
|
|
||||||
// Start pageserver that reads WAL directly from that postgres
|
// Start pageserver that reads WAL directly from that postgres
|
||||||
let storage_cplane = StorageControlPlane::one_page_server();
|
let storage_cplane = TestStorageControlPlane::one_page_server(&local_env);
|
||||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
|
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||||
|
|
||||||
// start postgres
|
// start postgres
|
||||||
let node = compute_cplane.new_node();
|
let maintli = storage_cplane.get_branch_timeline("main");
|
||||||
node.start(&storage_cplane);
|
let node = compute_cplane.new_test_node(maintli);
|
||||||
|
node.start().unwrap();
|
||||||
|
|
||||||
// check basic work with table
|
// check basic work with table
|
||||||
node.safe_psql(
|
node.safe_psql(
|
||||||
@@ -49,30 +50,51 @@ fn test_redo_cases() {
|
|||||||
|
|
||||||
// Runs pg_regress on a compute node
|
// Runs pg_regress on a compute node
|
||||||
#[test]
|
#[test]
|
||||||
|
#[ignore]
|
||||||
fn test_regress() {
|
fn test_regress() {
|
||||||
|
let local_env = local_env::test_env("test_regress");
|
||||||
|
|
||||||
// Start pageserver that reads WAL directly from that postgres
|
// Start pageserver that reads WAL directly from that postgres
|
||||||
let storage_cplane = StorageControlPlane::one_page_server();
|
let storage_cplane = TestStorageControlPlane::one_page_server(&local_env);
|
||||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
|
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||||
|
|
||||||
// start postgres
|
// start postgres
|
||||||
let node = compute_cplane.new_node();
|
let maintli = storage_cplane.get_branch_timeline("main");
|
||||||
node.start(&storage_cplane);
|
let node = compute_cplane.new_test_node(maintli);
|
||||||
|
node.start().unwrap();
|
||||||
|
|
||||||
control_plane::regress_check(&node);
|
control_plane::storage::regress_check(&node);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run two postgres instances on one pageserver
|
// Run two postgres instances on one pageserver, on different timelines
|
||||||
#[test]
|
#[test]
|
||||||
fn test_pageserver_multitenancy() {
|
fn test_pageserver_two_timelines() {
|
||||||
// Start pageserver that reads WAL directly from that postgres
|
let local_env = local_env::test_env("test_pageserver_two_timelines");
|
||||||
let storage_cplane = StorageControlPlane::one_page_server();
|
|
||||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
|
|
||||||
|
|
||||||
// Allocate postgres instance, but don't start
|
// Start pageserver that reads WAL directly from that postgres
|
||||||
let node1 = compute_cplane.new_node();
|
let storage_cplane = TestStorageControlPlane::one_page_server(&local_env);
|
||||||
let node2 = compute_cplane.new_node();
|
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||||
node1.start(&storage_cplane);
|
|
||||||
node2.start(&storage_cplane);
|
let maintli = storage_cplane.get_branch_timeline("main");
|
||||||
|
|
||||||
|
// Create new branch at the end of 'main'
|
||||||
|
let startpoint = local_env::find_end_of_wal(&local_env, maintli).unwrap();
|
||||||
|
local_env::create_branch(
|
||||||
|
&local_env,
|
||||||
|
"experimental",
|
||||||
|
PointInTime {
|
||||||
|
timelineid: maintli,
|
||||||
|
lsn: startpoint,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let experimentaltli = storage_cplane.get_branch_timeline("experimental");
|
||||||
|
|
||||||
|
// Launch postgres instances on both branches
|
||||||
|
let node1 = compute_cplane.new_test_node(maintli);
|
||||||
|
let node2 = compute_cplane.new_test_node(experimentaltli);
|
||||||
|
node1.start().unwrap();
|
||||||
|
node2.start().unwrap();
|
||||||
|
|
||||||
// check node1
|
// check node1
|
||||||
node1.safe_psql(
|
node1.safe_psql(
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
// Restart acceptors one by one while compute is under the load.
|
// Restart acceptors one by one while compute is under the load.
|
||||||
#[allow(dead_code)]
|
use control_plane::compute::ComputeControlPlane;
|
||||||
mod control_plane;
|
use control_plane::local_env;
|
||||||
use control_plane::ComputeControlPlane;
|
use control_plane::local_env::PointInTime;
|
||||||
use control_plane::StorageControlPlane;
|
use control_plane::storage::TestStorageControlPlane;
|
||||||
|
use pageserver::ZTimelineId;
|
||||||
|
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -11,18 +12,20 @@ use std::{thread, time};
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_acceptors_normal_work() {
|
fn test_acceptors_normal_work() {
|
||||||
// Start pageserver that reads WAL directly from that postgres
|
let local_env = local_env::test_env("test_acceptors_normal_work");
|
||||||
|
|
||||||
const REDUNDANCY: usize = 3;
|
const REDUNDANCY: usize = 3;
|
||||||
let storage_cplane = StorageControlPlane::fault_tolerant(REDUNDANCY);
|
let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
|
||||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
|
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||||
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
||||||
|
|
||||||
// start postgre
|
// start postgres
|
||||||
let node = compute_cplane.new_master_node();
|
let maintli = storage_cplane.get_branch_timeline("main");
|
||||||
node.start(&storage_cplane);
|
let node = compute_cplane.new_test_master_node(maintli);
|
||||||
|
node.start().unwrap();
|
||||||
|
|
||||||
// start proxy
|
// start proxy
|
||||||
let _proxy = node.start_proxy(wal_acceptors);
|
let _proxy = node.start_proxy(&wal_acceptors);
|
||||||
|
|
||||||
// check basic work with table
|
// check basic work with table
|
||||||
node.safe_psql(
|
node.safe_psql(
|
||||||
@@ -43,24 +46,97 @@ fn test_acceptors_normal_work() {
|
|||||||
// check wal files equality
|
// check wal files equality
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Run page server and multiple safekeepers, and multiple compute nodes running
|
||||||
|
// against different timelines.
|
||||||
|
#[test]
|
||||||
|
fn test_many_timelines() {
|
||||||
|
// Initialize a new repository, and set up WAL safekeepers and page server.
|
||||||
|
const REDUNDANCY: usize = 3;
|
||||||
|
const N_TIMELINES: usize = 5;
|
||||||
|
let local_env = local_env::test_env("test_many_timelines");
|
||||||
|
let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
|
||||||
|
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||||
|
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
||||||
|
|
||||||
|
// Create branches
|
||||||
|
let mut timelines: Vec<ZTimelineId> = Vec::new();
|
||||||
|
let maintli = storage_cplane.get_branch_timeline("main"); // main branch
|
||||||
|
timelines.push(maintli);
|
||||||
|
let startpoint = local_env::find_end_of_wal(&local_env, maintli).unwrap();
|
||||||
|
for i in 1..N_TIMELINES {
|
||||||
|
// additional branches
|
||||||
|
let branchname = format!("experimental{}", i);
|
||||||
|
local_env::create_branch(
|
||||||
|
&local_env,
|
||||||
|
&branchname,
|
||||||
|
PointInTime {
|
||||||
|
timelineid: maintli,
|
||||||
|
lsn: startpoint,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
let tli = storage_cplane.get_branch_timeline(&branchname);
|
||||||
|
timelines.push(tli);
|
||||||
|
}
|
||||||
|
|
||||||
|
// start postgres on each timeline
|
||||||
|
let mut nodes = Vec::new();
|
||||||
|
for tli in timelines {
|
||||||
|
let node = compute_cplane.new_test_node(tli);
|
||||||
|
nodes.push(node.clone());
|
||||||
|
node.start().unwrap();
|
||||||
|
node.start_proxy(&wal_acceptors);
|
||||||
|
}
|
||||||
|
|
||||||
|
// create schema
|
||||||
|
for node in &nodes {
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"CREATE TABLE t(key int primary key, value text)",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populate data
|
||||||
|
for node in &nodes {
|
||||||
|
node.safe_psql(
|
||||||
|
"postgres",
|
||||||
|
"INSERT INTO t SELECT generate_series(1,100000), 'payload'",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check data
|
||||||
|
for node in &nodes {
|
||||||
|
let count: i64 = node
|
||||||
|
.safe_psql("postgres", "SELECT sum(key) FROM t")
|
||||||
|
.first()
|
||||||
|
.unwrap()
|
||||||
|
.get(0);
|
||||||
|
println!("sum = {}", count);
|
||||||
|
assert_eq!(count, 5000050000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Majority is always alive
|
// Majority is always alive
|
||||||
#[test]
|
#[test]
|
||||||
fn test_acceptors_restarts() {
|
fn test_acceptors_restarts() {
|
||||||
|
let local_env = local_env::test_env("test_acceptors_restarts");
|
||||||
|
|
||||||
// Start pageserver that reads WAL directly from that postgres
|
// Start pageserver that reads WAL directly from that postgres
|
||||||
const REDUNDANCY: usize = 3;
|
const REDUNDANCY: usize = 3;
|
||||||
const FAULT_PROBABILITY: f32 = 0.01;
|
const FAULT_PROBABILITY: f32 = 0.01;
|
||||||
|
|
||||||
let storage_cplane = StorageControlPlane::fault_tolerant(REDUNDANCY);
|
let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
|
||||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
|
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||||
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
|
|
||||||
// start postgre
|
// start postgres
|
||||||
let node = compute_cplane.new_master_node();
|
let maintli = storage_cplane.get_branch_timeline("main");
|
||||||
node.start(&storage_cplane);
|
let node = compute_cplane.new_test_master_node(maintli);
|
||||||
|
node.start().unwrap();
|
||||||
|
|
||||||
// start proxy
|
// start proxy
|
||||||
let _proxy = node.start_proxy(wal_acceptors);
|
let _proxy = node.start_proxy(&wal_acceptors);
|
||||||
let mut failed_node: Option<usize> = None;
|
let mut failed_node: Option<usize> = None;
|
||||||
|
|
||||||
// check basic work with table
|
// check basic work with table
|
||||||
@@ -80,7 +156,7 @@ fn test_acceptors_restarts() {
|
|||||||
} else {
|
} else {
|
||||||
let node: usize = rng.gen_range(0..REDUNDANCY);
|
let node: usize = rng.gen_range(0..REDUNDANCY);
|
||||||
failed_node = Some(node);
|
failed_node = Some(node);
|
||||||
storage_cplane.wal_acceptors[node].stop();
|
storage_cplane.wal_acceptors[node].stop().unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -93,7 +169,7 @@ fn test_acceptors_restarts() {
|
|||||||
assert_eq!(count, 500500);
|
assert_eq!(count, 500500);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn start_acceptor(cplane: &Arc<StorageControlPlane>, no: usize) {
|
fn start_acceptor(cplane: &Arc<TestStorageControlPlane>, no: usize) {
|
||||||
let cp = cplane.clone();
|
let cp = cplane.clone();
|
||||||
thread::spawn(move || {
|
thread::spawn(move || {
|
||||||
thread::sleep(time::Duration::from_secs(1));
|
thread::sleep(time::Duration::from_secs(1));
|
||||||
@@ -105,20 +181,23 @@ fn start_acceptor(cplane: &Arc<StorageControlPlane>, no: usize) {
|
|||||||
// them again and check that nothing was losed. Repeat.
|
// them again and check that nothing was losed. Repeat.
|
||||||
// N_CRASHES env var
|
// N_CRASHES env var
|
||||||
#[test]
|
#[test]
|
||||||
fn test_acceptors_unavalability() {
|
fn test_acceptors_unavailability() {
|
||||||
|
let local_env = local_env::test_env("test_acceptors_unavailability");
|
||||||
|
|
||||||
// Start pageserver that reads WAL directly from that postgres
|
// Start pageserver that reads WAL directly from that postgres
|
||||||
const REDUNDANCY: usize = 2;
|
const REDUNDANCY: usize = 2;
|
||||||
|
|
||||||
let storage_cplane = StorageControlPlane::fault_tolerant(REDUNDANCY);
|
let storage_cplane = TestStorageControlPlane::fault_tolerant(&local_env, REDUNDANCY);
|
||||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
|
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||||
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
||||||
|
|
||||||
// start postgre
|
// start postgres
|
||||||
let node = compute_cplane.new_master_node();
|
let maintli = storage_cplane.get_branch_timeline("main");
|
||||||
node.start(&storage_cplane);
|
let node = compute_cplane.new_test_master_node(maintli);
|
||||||
|
node.start().unwrap();
|
||||||
|
|
||||||
// start proxy
|
// start proxy
|
||||||
let _proxy = node.start_proxy(wal_acceptors);
|
let _proxy = node.start_proxy(&wal_acceptors);
|
||||||
|
|
||||||
// check basic work with table
|
// check basic work with table
|
||||||
node.safe_psql(
|
node.safe_psql(
|
||||||
@@ -129,7 +208,7 @@ fn test_acceptors_unavalability() {
|
|||||||
psql.execute("INSERT INTO t values (1, 'payload')", &[])
|
psql.execute("INSERT INTO t values (1, 'payload')", &[])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
storage_cplane.wal_acceptors[0].stop();
|
storage_cplane.wal_acceptors[0].stop().unwrap();
|
||||||
let cp = Arc::new(storage_cplane);
|
let cp = Arc::new(storage_cplane);
|
||||||
start_acceptor(&cp, 0);
|
start_acceptor(&cp, 0);
|
||||||
let now = SystemTime::now();
|
let now = SystemTime::now();
|
||||||
@@ -139,7 +218,7 @@ fn test_acceptors_unavalability() {
|
|||||||
psql.execute("INSERT INTO t values (3, 'payload')", &[])
|
psql.execute("INSERT INTO t values (3, 'payload')", &[])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
cp.wal_acceptors[1].stop();
|
cp.wal_acceptors[1].stop().unwrap();
|
||||||
start_acceptor(&cp, 1);
|
start_acceptor(&cp, 1);
|
||||||
psql.execute("INSERT INTO t values (4, 'payload')", &[])
|
psql.execute("INSERT INTO t values (4, 'payload')", &[])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -157,16 +236,16 @@ fn test_acceptors_unavalability() {
|
|||||||
assert_eq!(count, 15);
|
assert_eq!(count, 15);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn simulate_failures(cplane: &Arc<StorageControlPlane>) {
|
fn simulate_failures(cplane: Arc<TestStorageControlPlane>) {
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
let n_acceptors = cplane.wal_acceptors.len();
|
let n_acceptors = cplane.wal_acceptors.len();
|
||||||
let failure_period = time::Duration::from_secs(1);
|
let failure_period = time::Duration::from_secs(1);
|
||||||
loop {
|
while cplane.is_running() {
|
||||||
thread::sleep(failure_period);
|
thread::sleep(failure_period);
|
||||||
let mask: u32 = rng.gen_range(0..(1 << n_acceptors));
|
let mask: u32 = rng.gen_range(0..(1 << n_acceptors));
|
||||||
for i in 0..n_acceptors {
|
for i in 0..n_acceptors {
|
||||||
if (mask & (1 << i)) != 0 {
|
if (mask & (1 << i)) != 0 {
|
||||||
cplane.wal_acceptors[i].stop();
|
cplane.wal_acceptors[i].stop().unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
thread::sleep(failure_period);
|
thread::sleep(failure_period);
|
||||||
@@ -181,29 +260,34 @@ fn simulate_failures(cplane: &Arc<StorageControlPlane>) {
|
|||||||
// Race condition test
|
// Race condition test
|
||||||
#[test]
|
#[test]
|
||||||
fn test_race_conditions() {
|
fn test_race_conditions() {
|
||||||
|
let local_env = local_env::test_env("test_race_conditions");
|
||||||
|
|
||||||
// Start pageserver that reads WAL directly from that postgres
|
// Start pageserver that reads WAL directly from that postgres
|
||||||
const REDUNDANCY: usize = 3;
|
const REDUNDANCY: usize = 3;
|
||||||
|
|
||||||
let storage_cplane = StorageControlPlane::fault_tolerant(REDUNDANCY);
|
let storage_cplane = Arc::new(TestStorageControlPlane::fault_tolerant(
|
||||||
let mut compute_cplane = ComputeControlPlane::local(&storage_cplane);
|
&local_env, REDUNDANCY,
|
||||||
|
));
|
||||||
|
let mut compute_cplane = ComputeControlPlane::local(&local_env, &storage_cplane.pageserver);
|
||||||
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
let wal_acceptors = storage_cplane.get_wal_acceptor_conn_info();
|
||||||
|
|
||||||
// start postgre
|
// start postgres
|
||||||
let node = compute_cplane.new_master_node();
|
let maintli = storage_cplane.get_branch_timeline("main");
|
||||||
node.start(&storage_cplane);
|
let node = compute_cplane.new_test_master_node(maintli);
|
||||||
|
node.start().unwrap();
|
||||||
|
|
||||||
// start proxy
|
// start proxy
|
||||||
let _proxy = node.start_proxy(wal_acceptors);
|
let _proxy = node.start_proxy(&wal_acceptors);
|
||||||
|
|
||||||
// check basic work with table
|
// check basic work with table
|
||||||
node.safe_psql(
|
node.safe_psql(
|
||||||
"postgres",
|
"postgres",
|
||||||
"CREATE TABLE t(key int primary key, value text)",
|
"CREATE TABLE t(key int primary key, value text)",
|
||||||
);
|
);
|
||||||
let cplane = Arc::new(storage_cplane);
|
|
||||||
let cp = cplane.clone();
|
let cp = storage_cplane.clone();
|
||||||
thread::spawn(move || {
|
let failures_thread = thread::spawn(move || {
|
||||||
simulate_failures(&cp);
|
simulate_failures(cp);
|
||||||
});
|
});
|
||||||
|
|
||||||
let mut psql = node.open_psql("postgres");
|
let mut psql = node.open_psql("postgres");
|
||||||
@@ -218,5 +302,7 @@ fn test_race_conditions() {
|
|||||||
.get(0);
|
.get(0);
|
||||||
println!("sum = {}", count);
|
println!("sum = {}", count);
|
||||||
assert_eq!(count, 500500);
|
assert_eq!(count, 500500);
|
||||||
cplane.stop();
|
|
||||||
|
storage_cplane.stop();
|
||||||
|
failures_thread.join().unwrap();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ rand = "0.8.3"
|
|||||||
regex = "1.4.5"
|
regex = "1.4.5"
|
||||||
bytes = "1.0.1"
|
bytes = "1.0.1"
|
||||||
byteorder = "1.4.3"
|
byteorder = "1.4.3"
|
||||||
fs2 = "0.4.3"
|
|
||||||
futures = "0.3.13"
|
futures = "0.3.13"
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
slog-stdlog = "4.1.0"
|
slog-stdlog = "4.1.0"
|
||||||
@@ -26,11 +25,18 @@ clap = "2.33.0"
|
|||||||
termion = "1.5.6"
|
termion = "1.5.6"
|
||||||
tui = "0.14.0"
|
tui = "0.14.0"
|
||||||
daemonize = "0.4.1"
|
daemonize = "0.4.1"
|
||||||
rust-s3 = { git = "https://github.com/hlinnaka/rust-s3", features = ["no-verify-ssl"] }
|
rust-s3 = { git = "https://github.com/hlinnaka/rust-s3", rev="7f15a24ec7daa0a5d9516da706212745f9042818", features = ["no-verify-ssl"] }
|
||||||
tokio = { version = "1.3.0", features = ["full"] }
|
tokio = { version = "1.3.0", features = ["full"] }
|
||||||
tokio-stream = { version = "0.1.4" }
|
tokio-stream = { version = "0.1.4" }
|
||||||
tokio-postgres = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
|
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
postgres-protocol = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
|
postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
postgres = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
|
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
|
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
crc32c = "0.6.0"
|
crc32c = "0.6.0"
|
||||||
|
walkdir = "2"
|
||||||
|
thiserror = "1.0"
|
||||||
|
hex = "0.4.3"
|
||||||
|
tar = "0.4.33"
|
||||||
|
|
||||||
|
postgres_ffi = { path = "../postgres_ffi" }
|
||||||
|
|||||||
202
pageserver/src/basebackup.rs
Normal file
202
pageserver/src/basebackup.rs
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
use log::*;
|
||||||
|
use regex::Regex;
|
||||||
|
use std::fmt;
|
||||||
|
use std::io::Write;
|
||||||
|
use tar::Builder;
|
||||||
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
|
use crate::ZTimelineId;
|
||||||
|
|
||||||
|
pub fn send_snapshot_tarball(
|
||||||
|
write: &mut dyn Write,
|
||||||
|
timelineid: ZTimelineId,
|
||||||
|
snapshotlsn: u64,
|
||||||
|
) -> Result<(), std::io::Error> {
|
||||||
|
let mut ar = Builder::new(write);
|
||||||
|
|
||||||
|
let snappath = format!("timelines/{}/snapshots/{:016X}", timelineid, snapshotlsn);
|
||||||
|
let walpath = format!("timelines/{}/wal", timelineid);
|
||||||
|
|
||||||
|
debug!("sending tarball of snapshot in {}", snappath);
|
||||||
|
//ar.append_dir_all("", &snappath)?;
|
||||||
|
|
||||||
|
for entry in WalkDir::new(&snappath) {
|
||||||
|
let entry = entry?;
|
||||||
|
let fullpath = entry.path();
|
||||||
|
let relpath = entry.path().strip_prefix(&snappath).unwrap();
|
||||||
|
|
||||||
|
if relpath.to_str().unwrap() == "" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if entry.file_type().is_dir() {
|
||||||
|
trace!(
|
||||||
|
"sending dir {} as {}",
|
||||||
|
fullpath.display(),
|
||||||
|
relpath.display()
|
||||||
|
);
|
||||||
|
ar.append_dir(relpath, fullpath)?;
|
||||||
|
} else if entry.file_type().is_symlink() {
|
||||||
|
error!("ignoring symlink in snapshot dir");
|
||||||
|
} else if entry.file_type().is_file() {
|
||||||
|
// Shared catalogs are exempt
|
||||||
|
if relpath.starts_with("global/") {
|
||||||
|
trace!("sending shared catalog {}", relpath.display());
|
||||||
|
ar.append_path_with_name(fullpath, relpath)?;
|
||||||
|
} else if !is_rel_file_path(relpath.to_str().unwrap()) {
|
||||||
|
trace!("sending {}", relpath.display());
|
||||||
|
ar.append_path_with_name(fullpath, relpath)?;
|
||||||
|
} else {
|
||||||
|
trace!("not sending {}", relpath.display());
|
||||||
|
// FIXME: send all files for now
|
||||||
|
ar.append_path_with_name(fullpath, relpath)?;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
error!("unknown file type: {}", fullpath.display());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: also send all the WAL
|
||||||
|
for entry in std::fs::read_dir(&walpath)? {
|
||||||
|
let entry = entry?;
|
||||||
|
let fullpath = &entry.path();
|
||||||
|
let relpath = fullpath.strip_prefix(&walpath).unwrap();
|
||||||
|
|
||||||
|
if !entry.path().is_file() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let archive_fname = relpath.to_str().unwrap().clone();
|
||||||
|
let archive_fname = archive_fname
|
||||||
|
.strip_suffix(".partial")
|
||||||
|
.unwrap_or(&archive_fname);
|
||||||
|
let archive_path = "pg_wal/".to_owned() + archive_fname;
|
||||||
|
ar.append_path_with_name(fullpath, archive_path)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
ar.finish()?;
|
||||||
|
debug!("all tarred up!");
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// formats:
|
||||||
|
// <oid>
|
||||||
|
// <oid>_<fork name>
|
||||||
|
// <oid>.<segment number>
|
||||||
|
// <oid>_<fork name>.<segment number>
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct FilePathError {
|
||||||
|
msg: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FilePathError {
|
||||||
|
fn new(msg: &str) -> FilePathError {
|
||||||
|
FilePathError {
|
||||||
|
msg: msg.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<core::num::ParseIntError> for FilePathError {
|
||||||
|
fn from(e: core::num::ParseIntError) -> Self {
|
||||||
|
return FilePathError {
|
||||||
|
msg: format!("invalid filename: {}", e),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for FilePathError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "invalid filename")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn forkname_to_forknum(forkname: Option<&str>) -> Result<u32, FilePathError> {
|
||||||
|
match forkname {
|
||||||
|
// "main" is not in filenames, it's implicit if the fork name is not present
|
||||||
|
None => Ok(0),
|
||||||
|
Some("fsm") => Ok(1),
|
||||||
|
Some("vm") => Ok(2),
|
||||||
|
Some("init") => Ok(3),
|
||||||
|
Some(_) => Err(FilePathError::new("invalid forkname")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_filename(fname: &str) -> Result<(u32, u32, u32), FilePathError> {
|
||||||
|
let re = Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
|
||||||
|
|
||||||
|
let caps = re
|
||||||
|
.captures(fname)
|
||||||
|
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||||
|
|
||||||
|
let relnode_str = caps.name("relnode").unwrap().as_str();
|
||||||
|
let relnode = u32::from_str_radix(relnode_str, 10)?;
|
||||||
|
|
||||||
|
let forkname_match = caps.name("forkname");
|
||||||
|
let forkname = if forkname_match.is_none() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(forkname_match.unwrap().as_str())
|
||||||
|
};
|
||||||
|
let forknum = forkname_to_forknum(forkname)?;
|
||||||
|
|
||||||
|
let segno_match = caps.name("segno");
|
||||||
|
let segno = if segno_match.is_none() {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
u32::from_str_radix(segno_match.unwrap().as_str(), 10)?
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok((relnode, forknum, segno));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_rel_file_path(path: &str) -> Result<(), FilePathError> {
|
||||||
|
/*
|
||||||
|
* Relation data files can be in one of the following directories:
|
||||||
|
*
|
||||||
|
* global/
|
||||||
|
* shared relations
|
||||||
|
*
|
||||||
|
* base/<db oid>/
|
||||||
|
* regular relations, default tablespace
|
||||||
|
*
|
||||||
|
* pg_tblspc/<tblspc oid>/<tblspc version>/
|
||||||
|
* within a non-default tablespace (the name of the directory
|
||||||
|
* depends on version)
|
||||||
|
*
|
||||||
|
* And the relation data files themselves have a filename like:
|
||||||
|
*
|
||||||
|
* <oid>.<segment number>
|
||||||
|
*/
|
||||||
|
if let Some(fname) = path.strip_prefix("global/") {
|
||||||
|
let (_relnode, _forknum, _segno) = parse_filename(fname)?;
|
||||||
|
|
||||||
|
return Ok(());
|
||||||
|
} else if let Some(dbpath) = path.strip_prefix("base/") {
|
||||||
|
let mut s = dbpath.split("/");
|
||||||
|
let dbnode_str = s
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||||
|
let _dbnode = u32::from_str_radix(dbnode_str, 10)?;
|
||||||
|
let fname = s
|
||||||
|
.next()
|
||||||
|
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||||
|
if s.next().is_some() {
|
||||||
|
return Err(FilePathError::new("invalid relation data file name"));
|
||||||
|
};
|
||||||
|
|
||||||
|
let (_relnode, _forknum, _segno) = parse_filename(fname)?;
|
||||||
|
|
||||||
|
return Ok(());
|
||||||
|
} else if let Some(_) = path.strip_prefix("pg_tblspc/") {
|
||||||
|
// TODO
|
||||||
|
return Err(FilePathError::new("tablespaces not supported"));
|
||||||
|
} else {
|
||||||
|
return Err(FilePathError::new("invalid relation data file name"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_rel_file_path(path: &str) -> bool {
|
||||||
|
return parse_rel_file_path(path).is_ok();
|
||||||
|
}
|
||||||
@@ -1,43 +0,0 @@
|
|||||||
use anyhow::Result;
|
|
||||||
use clap::{App, AppSettings};
|
|
||||||
|
|
||||||
pub mod pg;
|
|
||||||
pub mod snapshot;
|
|
||||||
pub mod storage;
|
|
||||||
mod subcommand;
|
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
|
||||||
let cli_commands = subcommand::ClapCommands {
|
|
||||||
commands: vec![
|
|
||||||
Box::new(pg::PgCmd {
|
|
||||||
clap_cmd: clap::SubCommand::with_name("pg"),
|
|
||||||
}),
|
|
||||||
Box::new(storage::StorageCmd {
|
|
||||||
clap_cmd: clap::SubCommand::with_name("storage"),
|
|
||||||
}),
|
|
||||||
Box::new(snapshot::SnapshotCmd {
|
|
||||||
clap_cmd: clap::SubCommand::with_name("snapshot"),
|
|
||||||
}),
|
|
||||||
],
|
|
||||||
};
|
|
||||||
|
|
||||||
let matches = App::new("zenith")
|
|
||||||
.about("Zenith CLI")
|
|
||||||
.version("1.0")
|
|
||||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
|
||||||
.subcommands(cli_commands.generate())
|
|
||||||
.get_matches();
|
|
||||||
|
|
||||||
if let Some(subcommand) = matches.subcommand_name() {
|
|
||||||
println!("'git {}' was used", subcommand);
|
|
||||||
}
|
|
||||||
|
|
||||||
match matches.subcommand() {
|
|
||||||
("pg", Some(sub_args)) => cli_commands.commands[0].run(sub_args.clone())?,
|
|
||||||
("storage", Some(sub_args)) => cli_commands.commands[1].run(sub_args.clone())?,
|
|
||||||
("snapshot", Some(sub_args)) => cli_commands.commands[2].run(sub_args.clone())?,
|
|
||||||
("", None) => println!("No subcommand"),
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
@@ -1,105 +0,0 @@
|
|||||||
use anyhow::Result;
|
|
||||||
use clap::{App, AppSettings, Arg};
|
|
||||||
|
|
||||||
use crate::subcommand;
|
|
||||||
|
|
||||||
pub struct PgCmd<'a> {
|
|
||||||
pub clap_cmd: clap::App<'a, 'a>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl subcommand::SubCommand for PgCmd<'_> {
|
|
||||||
fn gen_clap_command(&self) -> clap::App {
|
|
||||||
let c = self.clap_cmd.clone();
|
|
||||||
c.about("Operations with zenith compute nodes")
|
|
||||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
|
||||||
.subcommand(App::new("list").about("List existing compute nodes"))
|
|
||||||
.subcommand(
|
|
||||||
App::new("create")
|
|
||||||
.about(
|
|
||||||
"Create (init) new data directory using given storage and start postgres",
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::with_name("name")
|
|
||||||
.short("n")
|
|
||||||
.long("name")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Name of the compute node"),
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::with_name("storage")
|
|
||||||
.short("s")
|
|
||||||
.long("storage")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Name of the storage node to use"),
|
|
||||||
)
|
|
||||||
//TODO should it be just name of uploaded snapshot or some path?
|
|
||||||
.arg(
|
|
||||||
Arg::with_name("snapshot")
|
|
||||||
.long("snapshot")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Name of the snapshot to use"),
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::with_name("nostart")
|
|
||||||
.long("no-start")
|
|
||||||
.takes_value(false)
|
|
||||||
.help("Don't start postgres on the created node"),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
.subcommand(
|
|
||||||
App::new("destroy")
|
|
||||||
.about("Stop postgres and destroy node's data directory")
|
|
||||||
.arg(
|
|
||||||
Arg::with_name("name")
|
|
||||||
.short("n")
|
|
||||||
.long("name")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Name of the compute node"),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
.subcommand(
|
|
||||||
App::new("start")
|
|
||||||
.about("Start postgres on the given node")
|
|
||||||
.arg(
|
|
||||||
Arg::with_name("name")
|
|
||||||
.short("n")
|
|
||||||
.long("name")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Name of the compute node"),
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::with_name("replica")
|
|
||||||
.long("replica")
|
|
||||||
.takes_value(false)
|
|
||||||
.help("Start the compute node as replica"),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
.subcommand(
|
|
||||||
App::new("stop")
|
|
||||||
.about("Stop postgres on the given node")
|
|
||||||
.arg(
|
|
||||||
Arg::with_name("name")
|
|
||||||
.short("n")
|
|
||||||
.long("name")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Name of the compute node"),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
.subcommand(
|
|
||||||
App::new("show")
|
|
||||||
.about("Show info about the given node")
|
|
||||||
.arg(
|
|
||||||
Arg::with_name("name")
|
|
||||||
.short("n")
|
|
||||||
.long("name")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Name of the compute node"),
|
|
||||||
),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn run(&self, args: clap::ArgMatches) -> Result<()> {
|
|
||||||
println!("Run PgCmd with args {:?}", args);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
use anyhow::Result;
|
|
||||||
use clap::{App, AppSettings, Arg};
|
|
||||||
|
|
||||||
use crate::subcommand;
|
|
||||||
|
|
||||||
pub struct SnapshotCmd<'a> {
|
|
||||||
pub clap_cmd: clap::App<'a, 'a>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl subcommand::SubCommand for SnapshotCmd<'_> {
|
|
||||||
fn gen_clap_command(&self) -> clap::App {
|
|
||||||
let c = self.clap_cmd.clone();
|
|
||||||
c.about("Operations with zenith snapshots")
|
|
||||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
|
||||||
.subcommand(App::new("list"))
|
|
||||||
.subcommand(App::new("create").arg(Arg::with_name("pgdata").required(true)))
|
|
||||||
.subcommand(App::new("destroy"))
|
|
||||||
.subcommand(App::new("start"))
|
|
||||||
.subcommand(App::new("stop"))
|
|
||||||
.subcommand(App::new("show"))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn run(&self, args: clap::ArgMatches) -> Result<()> {
|
|
||||||
println!("Run SnapshotCmd with args {:?}", args);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,25 +0,0 @@
|
|||||||
use anyhow::Result;
|
|
||||||
use clap::{App, AppSettings};
|
|
||||||
|
|
||||||
use crate::subcommand;
|
|
||||||
|
|
||||||
pub struct StorageCmd<'a> {
|
|
||||||
pub clap_cmd: clap::App<'a, 'a>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl subcommand::SubCommand for StorageCmd<'_> {
|
|
||||||
fn gen_clap_command(&self) -> clap::App {
|
|
||||||
let c = self.clap_cmd.clone();
|
|
||||||
c.about("Operations with zenith storage nodes")
|
|
||||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
|
||||||
.subcommand(App::new("list"))
|
|
||||||
.subcommand(App::new("attach"))
|
|
||||||
.subcommand(App::new("detach"))
|
|
||||||
.subcommand(App::new("show"))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn run(&self, args: clap::ArgMatches) -> Result<()> {
|
|
||||||
println!("Run StorageCmd with args {:?}", args);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,29 +0,0 @@
|
|||||||
use anyhow::Result;
|
|
||||||
|
|
||||||
/// All subcommands need to implement this interface.
|
|
||||||
pub trait SubCommand {
|
|
||||||
/// Generates the cli-config that Clap requires for the subcommand.
|
|
||||||
fn gen_clap_command(&self) -> clap::App;
|
|
||||||
|
|
||||||
/// Runs the body of the subcommand.
|
|
||||||
fn run(&self, args: clap::ArgMatches) -> Result<()>;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A struct which holds a vector of heap-allocated `Box`es of trait objects all of which must
|
|
||||||
/// implement the `SubCommand` trait, but other than that, can be of any type.
|
|
||||||
pub struct ClapCommands {
|
|
||||||
pub commands: Vec<Box<dyn SubCommand>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ClapCommands {
|
|
||||||
/// Generates a vector of `clap::Apps` that can be passed into clap's `.subcommands()` method in
|
|
||||||
/// order to generate the full CLI.
|
|
||||||
pub fn generate(&self) -> Vec<clap::App> {
|
|
||||||
let mut v: Vec<clap::App> = Vec::new();
|
|
||||||
|
|
||||||
for command in self.commands.iter() {
|
|
||||||
v.push(command.gen_clap_command());
|
|
||||||
}
|
|
||||||
v
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -4,72 +4,63 @@
|
|||||||
|
|
||||||
use log::*;
|
use log::*;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
|
use std::fs::{File, OpenOptions};
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::process::exit;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use std::{fs::File, fs::OpenOptions, str::FromStr};
|
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
use clap::{App, Arg};
|
use clap::{App, Arg};
|
||||||
use daemonize::Daemonize;
|
use daemonize::Daemonize;
|
||||||
|
|
||||||
use slog;
|
|
||||||
use slog::Drain;
|
use slog::Drain;
|
||||||
use slog_scope;
|
|
||||||
use slog_stdlog;
|
|
||||||
|
|
||||||
use pageserver::page_service;
|
use pageserver::page_service;
|
||||||
use pageserver::restore_s3;
|
|
||||||
use pageserver::tui;
|
use pageserver::tui;
|
||||||
use pageserver::walreceiver;
|
//use pageserver::walreceiver;
|
||||||
use pageserver::PageServerConf;
|
use pageserver::PageServerConf;
|
||||||
|
|
||||||
fn main() -> Result<(), io::Error> {
|
fn zenith_repo_dir() -> String {
|
||||||
|
// Find repository path
|
||||||
|
match std::env::var_os("ZENITH_REPO_DIR") {
|
||||||
|
Some(val) => String::from(val.to_str().unwrap()),
|
||||||
|
None => ".zenith".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
let arg_matches = App::new("Zenith page server")
|
let arg_matches = App::new("Zenith page server")
|
||||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||||
.arg(Arg::with_name("datadir")
|
.arg(
|
||||||
.short("D")
|
Arg::with_name("listen")
|
||||||
.long("dir")
|
.short("l")
|
||||||
.takes_value(true)
|
.long("listen")
|
||||||
.help("Path to the page server data directory"))
|
.takes_value(true)
|
||||||
.arg(Arg::with_name("wal_producer")
|
.help("listen for incoming page requests on ip:port (default: 127.0.0.1:5430)"),
|
||||||
.short("w")
|
)
|
||||||
.long("wal-producer")
|
.arg(
|
||||||
.takes_value(true)
|
Arg::with_name("interactive")
|
||||||
.help("connect to the WAL sender (postgres or wal_acceptor) on connstr (default: 'host=127.0.0.1 port=65432 user=zenith')"))
|
.short("i")
|
||||||
.arg(Arg::with_name("listen")
|
.long("interactive")
|
||||||
.short("l")
|
.takes_value(false)
|
||||||
.long("listen")
|
.help("Interactive mode"),
|
||||||
.takes_value(true)
|
)
|
||||||
.help("listen for incoming page requests on ip:port (default: 127.0.0.1:5430)"))
|
.arg(
|
||||||
.arg(Arg::with_name("interactive")
|
Arg::with_name("daemonize")
|
||||||
.short("i")
|
.short("d")
|
||||||
.long("interactive")
|
.long("daemonize")
|
||||||
.takes_value(false)
|
.takes_value(false)
|
||||||
.help("Interactive mode"))
|
.help("Run in the background"),
|
||||||
.arg(Arg::with_name("daemonize")
|
)
|
||||||
.short("d")
|
|
||||||
.long("daemonize")
|
|
||||||
.takes_value(false)
|
|
||||||
.help("Run in the background"))
|
|
||||||
.arg(Arg::with_name("skip_recovery")
|
|
||||||
.long("skip-recovery")
|
|
||||||
.takes_value(false)
|
|
||||||
.help("Skip S3 recovery procedy and start empty"))
|
|
||||||
.get_matches();
|
.get_matches();
|
||||||
|
|
||||||
let mut conf = PageServerConf {
|
let mut conf = PageServerConf {
|
||||||
data_dir: PathBuf::from("./"),
|
|
||||||
daemonize: false,
|
daemonize: false,
|
||||||
interactive: false,
|
interactive: false,
|
||||||
wal_producer_connstr: None,
|
|
||||||
listen_addr: "127.0.0.1:5430".parse().unwrap(),
|
listen_addr: "127.0.0.1:5430".parse().unwrap(),
|
||||||
skip_recovery: false,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(dir) = arg_matches.value_of("datadir") {
|
|
||||||
conf.data_dir = PathBuf::from(dir);
|
|
||||||
}
|
|
||||||
|
|
||||||
if arg_matches.is_present("daemonize") {
|
if arg_matches.is_present("daemonize") {
|
||||||
conf.daemonize = true;
|
conf.daemonize = true;
|
||||||
}
|
}
|
||||||
@@ -79,31 +70,21 @@ fn main() -> Result<(), io::Error> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if conf.daemonize && conf.interactive {
|
if conf.daemonize && conf.interactive {
|
||||||
return Err(io::Error::new(
|
eprintln!("--daemonize is not allowed with --interactive: choose one");
|
||||||
io::ErrorKind::InvalidInput,
|
exit(1);
|
||||||
"--daemonize is not allowed with --interactive: choose one",
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
if arg_matches.is_present("skip_recovery") {
|
|
||||||
conf.skip_recovery = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(addr) = arg_matches.value_of("wal_producer") {
|
|
||||||
conf.wal_producer_connstr = Some(String::from_str(addr).unwrap());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(addr) = arg_matches.value_of("listen") {
|
if let Some(addr) = arg_matches.value_of("listen") {
|
||||||
conf.listen_addr = addr.parse().unwrap();
|
conf.listen_addr = addr.parse()?;
|
||||||
}
|
}
|
||||||
|
|
||||||
start_pageserver(conf)
|
start_pageserver(&conf)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn start_pageserver(conf: PageServerConf) -> Result<(), io::Error> {
|
fn start_pageserver(conf: &PageServerConf) -> Result<()> {
|
||||||
// Initialize logger
|
// Initialize logger
|
||||||
let _scope_guard = init_logging(&conf);
|
let _scope_guard = init_logging(&conf)?;
|
||||||
let _log_guard = slog_stdlog::init().unwrap();
|
let _log_guard = slog_stdlog::init()?;
|
||||||
|
|
||||||
// Note: this `info!(...)` macro comes from `log` crate
|
// Note: this `info!(...)` macro comes from `log` crate
|
||||||
info!("standard logging redirected to slog");
|
info!("standard logging redirected to slog");
|
||||||
@@ -127,22 +108,25 @@ fn start_pageserver(conf: PageServerConf) -> Result<(), io::Error> {
|
|||||||
if conf.daemonize {
|
if conf.daemonize {
|
||||||
info!("daemonizing...");
|
info!("daemonizing...");
|
||||||
|
|
||||||
|
let repodir = PathBuf::from(zenith_repo_dir());
|
||||||
|
|
||||||
// There should'n be any logging to stdin/stdout. Redirect it to the main log so
|
// There should'n be any logging to stdin/stdout. Redirect it to the main log so
|
||||||
// that we will see any accidental manual fpritf's or backtraces.
|
// that we will see any accidental manual fprintf's or backtraces.
|
||||||
|
let log_filename = repodir.join("pageserver.log");
|
||||||
let stdout = OpenOptions::new()
|
let stdout = OpenOptions::new()
|
||||||
.create(true)
|
.create(true)
|
||||||
.append(true)
|
.append(true)
|
||||||
.open(conf.data_dir.join("pageserver.log"))
|
.open(&log_filename)
|
||||||
.unwrap();
|
.with_context(|| format!("failed to open {:?}", &log_filename))?;
|
||||||
let stderr = OpenOptions::new()
|
let stderr = OpenOptions::new()
|
||||||
.create(true)
|
.create(true)
|
||||||
.append(true)
|
.append(true)
|
||||||
.open(conf.data_dir.join("pageserver.log"))
|
.open(&log_filename)
|
||||||
.unwrap();
|
.with_context(|| format!("failed to open {:?}", &log_filename))?;
|
||||||
|
|
||||||
let daemonize = Daemonize::new()
|
let daemonize = Daemonize::new()
|
||||||
.pid_file(conf.data_dir.join("pageserver.pid"))
|
.pid_file(repodir.clone().join("pageserver.pid"))
|
||||||
.working_directory(conf.data_dir.clone())
|
.working_directory(repodir)
|
||||||
.stdout(stdout)
|
.stdout(stdout)
|
||||||
.stderr(stderr);
|
.stderr(stderr);
|
||||||
|
|
||||||
@@ -150,57 +134,37 @@ fn start_pageserver(conf: PageServerConf) -> Result<(), io::Error> {
|
|||||||
Ok(_) => info!("Success, daemonized"),
|
Ok(_) => info!("Success, daemonized"),
|
||||||
Err(e) => error!("Error, {}", e),
|
Err(e) => error!("Error, {}", e),
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// change into the repository directory. In daemon mode, Daemonize
|
||||||
|
// does this for us.
|
||||||
|
let repodir = zenith_repo_dir();
|
||||||
|
std::env::set_current_dir(&repodir)?;
|
||||||
|
info!("Changed current directory to repository in {}", &repodir);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut threads = Vec::new();
|
let mut threads = Vec::new();
|
||||||
|
|
||||||
info!("starting...");
|
// TODO: Check that it looks like a valid repository before going further
|
||||||
|
|
||||||
// Before opening up for connections, restore the latest base backup from S3.
|
|
||||||
// (We don't persist anything to local disk at the moment, so we need to do
|
|
||||||
// this at every startup)
|
|
||||||
if !conf.skip_recovery {
|
|
||||||
restore_s3::restore_main(&conf);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create directory for wal-redo datadirs
|
// Create directory for wal-redo datadirs
|
||||||
match fs::create_dir(conf.data_dir.join("wal-redo")) {
|
match fs::create_dir("wal-redo") {
|
||||||
Ok(_) => {}
|
Ok(_) => {}
|
||||||
Err(e) => match e.kind() {
|
Err(e) => match e.kind() {
|
||||||
io::ErrorKind::AlreadyExists => {}
|
io::ErrorKind::AlreadyExists => {}
|
||||||
_ => {
|
_ => {
|
||||||
panic!("Failed to create wal-redo data directory: {}", e);
|
anyhow::bail!("Failed to create wal-redo data directory: {}", e);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Launch the WAL receiver thread if pageserver was started with --wal-producer
|
|
||||||
// option. It will try to connect to the WAL safekeeper, and stream the WAL. If
|
|
||||||
// the connection is lost, it will reconnect on its own. We just fire and forget
|
|
||||||
// it here.
|
|
||||||
//
|
|
||||||
// All other wal receivers are started on demand by "callmemaybe" command
|
|
||||||
// sent to pageserver.
|
|
||||||
let conf_copy = conf.clone();
|
|
||||||
if let Some(wal_producer) = conf.wal_producer_connstr {
|
|
||||||
let conf = conf_copy.clone();
|
|
||||||
let walreceiver_thread = thread::Builder::new()
|
|
||||||
.name("static WAL receiver thread".into())
|
|
||||||
.spawn(move || {
|
|
||||||
walreceiver::thread_main(conf, &wal_producer);
|
|
||||||
})
|
|
||||||
.unwrap();
|
|
||||||
threads.push(walreceiver_thread);
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetPage@LSN requests are served by another thread. (It uses async I/O,
|
// GetPage@LSN requests are served by another thread. (It uses async I/O,
|
||||||
// but the code in page_service sets up it own thread pool for that)
|
// but the code in page_service sets up it own thread pool for that)
|
||||||
let conf = conf_copy.clone();
|
let conf_copy = conf.clone();
|
||||||
let page_server_thread = thread::Builder::new()
|
let page_server_thread = thread::Builder::new()
|
||||||
.name("Page Service thread".into())
|
.name("Page Service thread".into())
|
||||||
.spawn(|| {
|
.spawn(move || {
|
||||||
// thread code
|
// thread code
|
||||||
page_service::thread_main(conf);
|
page_service::thread_main(&conf_copy);
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
threads.push(page_server_thread);
|
threads.push(page_server_thread);
|
||||||
@@ -217,23 +181,27 @@ fn start_pageserver(conf: PageServerConf) -> Result<(), io::Error> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn init_logging(conf: &PageServerConf) -> slog_scope::GlobalLoggerGuard {
|
fn init_logging(conf: &PageServerConf) -> Result<slog_scope::GlobalLoggerGuard, io::Error> {
|
||||||
if conf.interactive {
|
if conf.interactive {
|
||||||
tui::init_logging()
|
Ok(tui::init_logging())
|
||||||
} else if conf.daemonize {
|
} else if conf.daemonize {
|
||||||
let log = conf.data_dir.join("pageserver.log");
|
let log = zenith_repo_dir() + "/pageserver.log";
|
||||||
let log_file = File::create(log).unwrap_or_else(|_| panic!("Could not create log file"));
|
let log_file = File::create(&log).map_err(|err| {
|
||||||
|
// We failed to initialize logging, so we can't log this message with error!
|
||||||
|
eprintln!("Could not create log file {:?}: {}", log, err);
|
||||||
|
err
|
||||||
|
})?;
|
||||||
let decorator = slog_term::PlainSyncDecorator::new(log_file);
|
let decorator = slog_term::PlainSyncDecorator::new(log_file);
|
||||||
let drain = slog_term::CompactFormat::new(decorator).build();
|
let drain = slog_term::CompactFormat::new(decorator).build();
|
||||||
let drain = slog::Filter::new(drain, |record: &slog::Record| {
|
let drain = slog::Filter::new(drain, |record: &slog::Record| {
|
||||||
if record.level().is_at_least(slog::Level::Info) {
|
if record.level().is_at_least(slog::Level::Debug) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
});
|
});
|
||||||
let drain = std::sync::Mutex::new(drain).fuse();
|
let drain = std::sync::Mutex::new(drain).fuse();
|
||||||
let logger = slog::Logger::root(drain, slog::o!());
|
let logger = slog::Logger::root(drain, slog::o!());
|
||||||
slog_scope::set_global_logger(logger)
|
Ok(slog_scope::set_global_logger(logger))
|
||||||
} else {
|
} else {
|
||||||
let decorator = slog_term::TermDecorator::new().build();
|
let decorator = slog_term::TermDecorator::new().build();
|
||||||
let drain = slog_term::FullFormat::new(decorator).build().fuse();
|
let drain = slog_term::FullFormat::new(decorator).build().fuse();
|
||||||
@@ -251,6 +219,6 @@ fn init_logging(conf: &PageServerConf) -> slog_scope::GlobalLoggerGuard {
|
|||||||
})
|
})
|
||||||
.fuse();
|
.fuse();
|
||||||
let logger = slog::Logger::root(drain, slog::o!());
|
let logger = slog::Logger::root(drain, slog::o!());
|
||||||
slog_scope::set_global_logger(logger)
|
Ok(slog_scope::set_global_logger(logger))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,9 +1,12 @@
|
|||||||
|
use std::fmt;
|
||||||
use std::net::SocketAddr;
|
use std::net::SocketAddr;
|
||||||
use std::path::PathBuf;
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
pub mod basebackup;
|
||||||
pub mod page_cache;
|
pub mod page_cache;
|
||||||
pub mod page_service;
|
pub mod page_service;
|
||||||
pub mod restore_s3;
|
pub mod pg_constants;
|
||||||
|
pub mod restore_local_repo;
|
||||||
pub mod tui;
|
pub mod tui;
|
||||||
pub mod tui_event;
|
pub mod tui_event;
|
||||||
mod tui_logger;
|
mod tui_logger;
|
||||||
@@ -11,13 +14,47 @@ pub mod waldecoder;
|
|||||||
pub mod walreceiver;
|
pub mod walreceiver;
|
||||||
pub mod walredo;
|
pub mod walredo;
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct PageServerConf {
|
pub struct PageServerConf {
|
||||||
pub data_dir: PathBuf,
|
|
||||||
pub daemonize: bool,
|
pub daemonize: bool,
|
||||||
pub interactive: bool,
|
pub interactive: bool,
|
||||||
pub wal_producer_connstr: Option<String>,
|
|
||||||
pub listen_addr: SocketAddr,
|
pub listen_addr: SocketAddr,
|
||||||
pub skip_recovery: bool,
|
}
|
||||||
|
|
||||||
|
// Zenith Timeline ID is a 32-byte random ID.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct ZTimelineId([u8; 16]);
|
||||||
|
|
||||||
|
impl FromStr for ZTimelineId {
|
||||||
|
type Err = hex::FromHexError;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<ZTimelineId, Self::Err> {
|
||||||
|
let timelineid = hex::decode(s)?;
|
||||||
|
|
||||||
|
let mut buf: [u8; 16] = [0u8; 16];
|
||||||
|
buf.copy_from_slice(timelineid.as_slice());
|
||||||
|
Ok(ZTimelineId(buf))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ZTimelineId {
|
||||||
|
pub fn from(b: [u8; 16]) -> ZTimelineId {
|
||||||
|
ZTimelineId(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_from_buf(buf: &mut dyn bytes::Buf) -> ZTimelineId {
|
||||||
|
let mut arr = [0u8; 16];
|
||||||
|
buf.copy_to_slice(&mut arr);
|
||||||
|
ZTimelineId::from(arr)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_arr(&self) -> [u8; 16] {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for ZTimelineId {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.write_str(&hex::encode(self.0))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,25 +6,24 @@
|
|||||||
// per-entry mutex.
|
// per-entry mutex.
|
||||||
//
|
//
|
||||||
|
|
||||||
|
use crate::restore_local_repo::restore_timeline;
|
||||||
|
use crate::ZTimelineId;
|
||||||
|
use crate::{walredo, PageServerConf};
|
||||||
|
use anyhow::bail;
|
||||||
|
use bytes::Bytes;
|
||||||
use core::ops::Bound::Included;
|
use core::ops::Bound::Included;
|
||||||
|
use crossbeam_channel::unbounded;
|
||||||
|
use crossbeam_channel::{Receiver, Sender};
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use log::*;
|
||||||
|
use rand::Rng;
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::error::Error;
|
|
||||||
use std::sync::atomic::AtomicU64;
|
use std::sync::atomic::AtomicU64;
|
||||||
use std::sync::atomic::Ordering;
|
use std::sync::atomic::Ordering;
|
||||||
use std::sync::{Arc, Condvar, Mutex};
|
use std::sync::{Arc, Condvar, Mutex};
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use std::{convert::TryInto, ops::AddAssign};
|
use std::{convert::TryInto, ops::AddAssign};
|
||||||
// use tokio::sync::RwLock;
|
|
||||||
use bytes::Bytes;
|
|
||||||
use lazy_static::lazy_static;
|
|
||||||
use log::*;
|
|
||||||
use rand::Rng;
|
|
||||||
|
|
||||||
use crate::{walredo, PageServerConf};
|
|
||||||
|
|
||||||
use crossbeam_channel::unbounded;
|
|
||||||
use crossbeam_channel::{Receiver, Sender};
|
|
||||||
|
|
||||||
// Timeout when waiting or WAL receiver to catch up to an LSN given in a GetPage@LSN call.
|
// Timeout when waiting or WAL receiver to catch up to an LSN given in a GetPage@LSN call.
|
||||||
static TIMEOUT: Duration = Duration::from_secs(60);
|
static TIMEOUT: Duration = Duration::from_secs(60);
|
||||||
@@ -110,29 +109,53 @@ struct PageCacheShared {
|
|||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
pub static ref PAGECACHES: Mutex<HashMap<u64, Arc<PageCache>>> = Mutex::new(HashMap::new());
|
pub static ref PAGECACHES: Mutex<HashMap<ZTimelineId, Arc<PageCache>>> =
|
||||||
|
Mutex::new(HashMap::new());
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_pagecache(conf: PageServerConf, sys_id: u64) -> Arc<PageCache> {
|
// Get Page Cache for given timeline. It is assumed to already exist.
|
||||||
|
pub fn get_pagecache(_conf: &PageServerConf, timelineid: ZTimelineId) -> Option<Arc<PageCache>> {
|
||||||
|
let pcaches = PAGECACHES.lock().unwrap();
|
||||||
|
|
||||||
|
match pcaches.get(&timelineid) {
|
||||||
|
Some(pcache) => Some(pcache.clone()),
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_or_restore_pagecache(
|
||||||
|
conf: &PageServerConf,
|
||||||
|
timelineid: ZTimelineId,
|
||||||
|
) -> anyhow::Result<Arc<PageCache>> {
|
||||||
let mut pcaches = PAGECACHES.lock().unwrap();
|
let mut pcaches = PAGECACHES.lock().unwrap();
|
||||||
|
|
||||||
if !pcaches.contains_key(&sys_id) {
|
match pcaches.get(&timelineid) {
|
||||||
pcaches.insert(sys_id, Arc::new(init_page_cache()));
|
Some(pcache) => Ok(pcache.clone()),
|
||||||
|
None => {
|
||||||
|
let pcache = init_page_cache();
|
||||||
|
|
||||||
// Initialize the WAL redo thread
|
restore_timeline(conf, &pcache, timelineid)?;
|
||||||
//
|
|
||||||
// Now join_handle is not saved any where and we won'try restart tharead
|
let result = Arc::new(pcache);
|
||||||
// if it is dead. We may later stop that treads after some inactivity period
|
|
||||||
// and restart them on demand.
|
pcaches.insert(timelineid, result.clone());
|
||||||
let _walredo_thread = thread::Builder::new()
|
|
||||||
.name("WAL redo thread".into())
|
// Initialize the WAL redo thread
|
||||||
.spawn(move || {
|
//
|
||||||
walredo::wal_redo_main(conf, sys_id);
|
// Now join_handle is not saved any where and we won'try restart tharead
|
||||||
})
|
// if it is dead. We may later stop that treads after some inactivity period
|
||||||
.unwrap();
|
// and restart them on demand.
|
||||||
|
let conf_copy = conf.clone();
|
||||||
|
let _walredo_thread = thread::Builder::new()
|
||||||
|
.name("WAL redo thread".into())
|
||||||
|
.spawn(move || {
|
||||||
|
walredo::wal_redo_main(&conf_copy, timelineid);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pcaches.get(&sys_id).unwrap().clone()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn init_page_cache() -> PageCache {
|
fn init_page_cache() -> PageCache {
|
||||||
@@ -204,7 +227,7 @@ pub struct CacheEntryContent {
|
|||||||
impl CacheEntry {
|
impl CacheEntry {
|
||||||
fn new(key: CacheKey) -> CacheEntry {
|
fn new(key: CacheKey) -> CacheEntry {
|
||||||
CacheEntry {
|
CacheEntry {
|
||||||
key: key,
|
key,
|
||||||
content: Mutex::new(CacheEntryContent {
|
content: Mutex::new(CacheEntryContent {
|
||||||
page_image: None,
|
page_image: None,
|
||||||
wal_record: None,
|
wal_record: None,
|
||||||
@@ -223,7 +246,7 @@ pub struct RelTag {
|
|||||||
pub forknum: u8,
|
pub forknum: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Debug)]
|
||||||
pub struct BufferTag {
|
pub struct BufferTag {
|
||||||
pub spcnode: u32,
|
pub spcnode: u32,
|
||||||
pub dbnode: u32,
|
pub dbnode: u32,
|
||||||
@@ -237,6 +260,10 @@ pub struct WALRecord {
|
|||||||
pub lsn: u64, // LSN at the *end* of the record
|
pub lsn: u64, // LSN at the *end* of the record
|
||||||
pub will_init: bool,
|
pub will_init: bool,
|
||||||
pub rec: Bytes,
|
pub rec: Bytes,
|
||||||
|
// Remember the offset of main_data in rec,
|
||||||
|
// so that we don't have to parse the record again.
|
||||||
|
// If record has no main_data, this offset equals rec.len().
|
||||||
|
pub main_data_offset: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Public interface functions
|
// Public interface functions
|
||||||
@@ -247,47 +274,73 @@ impl PageCache {
|
|||||||
//
|
//
|
||||||
// Returns an 8k page image
|
// Returns an 8k page image
|
||||||
//
|
//
|
||||||
pub fn get_page_at_lsn(&self, tag: BufferTag, lsn: u64) -> Result<Bytes, Box<dyn Error>> {
|
pub fn get_page_at_lsn(&self, tag: BufferTag, req_lsn: u64) -> anyhow::Result<Bytes> {
|
||||||
self.num_getpage_requests.fetch_add(1, Ordering::Relaxed);
|
self.num_getpage_requests.fetch_add(1, Ordering::Relaxed);
|
||||||
|
|
||||||
|
let mut lsn = req_lsn;
|
||||||
|
//When invalid LSN is requested, it means "don't wait, return latest version of the page"
|
||||||
|
//This is necessary for bootstrap.
|
||||||
|
//TODO should we use last_valid_lsn here instead of maxvalue?
|
||||||
|
if lsn == 0
|
||||||
|
{
|
||||||
|
lsn = 0xffff_ffff_ffff_eeee;
|
||||||
|
}
|
||||||
// Look up cache entry. If it's a page image, return that. If it's a WAL record,
|
// Look up cache entry. If it's a page image, return that. If it's a WAL record,
|
||||||
// ask the WAL redo service to reconstruct the page image from the WAL records.
|
// ask the WAL redo service to reconstruct the page image from the WAL records.
|
||||||
let minkey = CacheKey { tag: tag, lsn: 0 };
|
let minkey = CacheKey { tag, lsn: 0 };
|
||||||
let maxkey = CacheKey { tag: tag, lsn: lsn };
|
let maxkey = CacheKey { tag, lsn };
|
||||||
|
|
||||||
let entry_rc: Arc<CacheEntry>;
|
let entry_rc: Arc<CacheEntry>;
|
||||||
{
|
{
|
||||||
let mut shared = self.shared.lock().unwrap();
|
let mut shared = self.shared.lock().unwrap();
|
||||||
let mut waited = false;
|
let mut waited = false;
|
||||||
|
|
||||||
while lsn > shared.last_valid_lsn {
|
// There is a a race at postgres instance start
|
||||||
// TODO: Wait for the WAL receiver to catch up
|
// when we request a page before walsender established connection
|
||||||
waited = true;
|
// and was able to stream the page. Just don't wait and return what we have.
|
||||||
|
if req_lsn == 0
|
||||||
|
{
|
||||||
trace!(
|
trace!(
|
||||||
"not caught up yet: {}, requested {}",
|
"walsender hasn't started yet. Don't wait. last_valid_lsn {}, requested {}",
|
||||||
shared.last_valid_lsn,
|
shared.last_valid_lsn, lsn);
|
||||||
lsn
|
}
|
||||||
);
|
|
||||||
let wait_result = self
|
|
||||||
.valid_lsn_condvar
|
|
||||||
.wait_timeout(shared, TIMEOUT)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
shared = wait_result.0;
|
if req_lsn != 0
|
||||||
if wait_result.1.timed_out() {
|
{
|
||||||
return Err(format!(
|
while lsn > shared.last_valid_lsn {
|
||||||
"Timed out while waiting for WAL record at LSN {:X}/{:X} to arrive",
|
// TODO: Wait for the WAL receiver to catch up
|
||||||
lsn >> 32, lsn & 0xffff_ffff
|
waited = true;
|
||||||
))?;
|
trace!(
|
||||||
|
"not caught up yet: {}, requested {}",
|
||||||
|
shared.last_valid_lsn,
|
||||||
|
lsn
|
||||||
|
);
|
||||||
|
let wait_result = self
|
||||||
|
.valid_lsn_condvar
|
||||||
|
.wait_timeout(shared, TIMEOUT)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
shared = wait_result.0;
|
||||||
|
if wait_result.1.timed_out() {
|
||||||
|
bail!(
|
||||||
|
"Timed out while waiting for WAL record at LSN {:X}/{:X} to arrive",
|
||||||
|
lsn >> 32,
|
||||||
|
lsn & 0xffff_ffff
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if waited {
|
if waited {
|
||||||
trace!("caught up now, continuing");
|
trace!("caught up now, continuing");
|
||||||
}
|
}
|
||||||
|
|
||||||
if lsn < shared.first_valid_lsn {
|
if lsn < shared.first_valid_lsn {
|
||||||
return Err(format!("LSN {:X}/{:X} has already been removed",
|
bail!(
|
||||||
lsn >> 32, lsn & 0xffff_ffff))?;
|
"LSN {:X}/{:X} has already been removed",
|
||||||
|
lsn >> 32,
|
||||||
|
lsn & 0xffff_ffff
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
let pagecache = &shared.pagecache;
|
let pagecache = &shared.pagecache;
|
||||||
@@ -297,7 +350,7 @@ impl PageCache {
|
|||||||
let entry_opt = entries.next_back();
|
let entry_opt = entries.next_back();
|
||||||
|
|
||||||
if entry_opt.is_none() {
|
if entry_opt.is_none() {
|
||||||
static ZERO_PAGE: [u8; 8192] = [0 as u8; 8192];
|
static ZERO_PAGE: [u8; 8192] = [0u8; 8192];
|
||||||
return Ok(Bytes::from_static(&ZERO_PAGE));
|
return Ok(Bytes::from_static(&ZERO_PAGE));
|
||||||
/* return Err("could not find page image")?; */
|
/* return Err("could not find page image")?; */
|
||||||
}
|
}
|
||||||
@@ -342,12 +395,12 @@ impl PageCache {
|
|||||||
error!(
|
error!(
|
||||||
"could not apply WAL to reconstruct page image for GetPage@LSN request"
|
"could not apply WAL to reconstruct page image for GetPage@LSN request"
|
||||||
);
|
);
|
||||||
return Err("could not apply WAL to reconstruct page image".into());
|
bail!("could not apply WAL to reconstruct page image");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
// No base image, and no WAL record. Huh?
|
// No base image, and no WAL record. Huh?
|
||||||
return Err(format!("no page image or WAL record for requested page"))?;
|
bail!("no page image or WAL record for requested page");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -427,10 +480,8 @@ impl PageCache {
|
|||||||
// Adds a WAL record to the page cache
|
// Adds a WAL record to the page cache
|
||||||
//
|
//
|
||||||
pub fn put_wal_record(&self, tag: BufferTag, rec: WALRecord) {
|
pub fn put_wal_record(&self, tag: BufferTag, rec: WALRecord) {
|
||||||
let key = CacheKey {
|
let lsn = rec.lsn;
|
||||||
tag: tag,
|
let key = CacheKey { tag, lsn };
|
||||||
lsn: rec.lsn,
|
|
||||||
};
|
|
||||||
|
|
||||||
let entry = CacheEntry::new(key.clone());
|
let entry = CacheEntry::new(key.clone());
|
||||||
entry.content.lock().unwrap().wal_record = Some(rec);
|
entry.content.lock().unwrap().wal_record = Some(rec);
|
||||||
@@ -448,13 +499,17 @@ impl PageCache {
|
|||||||
*rel_entry = tag.blknum + 1;
|
*rel_entry = tag.blknum + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
trace!("put_wal_record lsn: {}", key.lsn);
|
//trace!("put_wal_record lsn: {}", lsn);
|
||||||
|
|
||||||
let oldentry = shared.pagecache.insert(key, Arc::new(entry));
|
let oldentry = shared.pagecache.insert(key, Arc::new(entry));
|
||||||
self.num_entries.fetch_add(1, Ordering::Relaxed);
|
self.num_entries.fetch_add(1, Ordering::Relaxed);
|
||||||
|
|
||||||
if !oldentry.is_none() {
|
if !oldentry.is_none() {
|
||||||
error!("overwriting WAL record in page cache");
|
error!(
|
||||||
|
"overwriting WAL record with LSN {:X}/{:X} in page cache",
|
||||||
|
lsn >> 32,
|
||||||
|
lsn & 0xffffffff
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.num_wal_records.fetch_add(1, Ordering::Relaxed);
|
self.num_wal_records.fetch_add(1, Ordering::Relaxed);
|
||||||
@@ -464,7 +519,7 @@ impl PageCache {
|
|||||||
// Memorize a full image of a page version
|
// Memorize a full image of a page version
|
||||||
//
|
//
|
||||||
pub fn put_page_image(&self, tag: BufferTag, lsn: u64, img: Bytes) {
|
pub fn put_page_image(&self, tag: BufferTag, lsn: u64, img: Bytes) {
|
||||||
let key = CacheKey { tag: tag, lsn: lsn };
|
let key = CacheKey { tag, lsn };
|
||||||
|
|
||||||
let entry = CacheEntry::new(key.clone());
|
let entry = CacheEntry::new(key.clone());
|
||||||
entry.content.lock().unwrap().page_image = Some(img);
|
entry.content.lock().unwrap().page_image = Some(img);
|
||||||
@@ -487,12 +542,22 @@ impl PageCache {
|
|||||||
let mut shared = self.shared.lock().unwrap();
|
let mut shared = self.shared.lock().unwrap();
|
||||||
|
|
||||||
// Can't move backwards.
|
// Can't move backwards.
|
||||||
assert!(lsn >= shared.last_valid_lsn);
|
let oldlsn = shared.last_valid_lsn;
|
||||||
|
if lsn >= oldlsn {
|
||||||
|
|
||||||
shared.last_valid_lsn = lsn;
|
shared.last_valid_lsn = lsn;
|
||||||
self.valid_lsn_condvar.notify_all();
|
self.valid_lsn_condvar.notify_all();
|
||||||
|
|
||||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||||
|
} else {
|
||||||
|
warn!(
|
||||||
|
"attempted to move last valid LSN backwards (was {:X}/{:X}, new {:X}/{:X})",
|
||||||
|
oldlsn >> 32,
|
||||||
|
oldlsn & 0xffffffff,
|
||||||
|
lsn >> 32,
|
||||||
|
lsn & 0xffffffff
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -510,7 +575,7 @@ impl PageCache {
|
|||||||
self.valid_lsn_condvar.notify_all();
|
self.valid_lsn_condvar.notify_all();
|
||||||
|
|
||||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
||||||
self.last_valid_lsn.store(lsn, Ordering::Relaxed);
|
self.last_record_lsn.store(lsn, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -598,17 +663,19 @@ impl PageCache {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME: Shouldn't relation size also be tracked with an LSN?
|
/// Remember a relation's size in blocks.
|
||||||
// If a replica is lagging behind, it needs to get the size as it was on
|
///
|
||||||
// the replica's current replay LSN.
|
/// If 'to' is larger than the previously remembered size, the remembered size is increased to 'to'.
|
||||||
pub fn relsize_inc(&self, rel: &RelTag, to: Option<u32>) {
|
/// But if it's smaller, there is no change.
|
||||||
|
pub fn relsize_inc(&self, rel: &RelTag, to: u32) {
|
||||||
|
// FIXME: Shouldn't relation size also be tracked with an LSN?
|
||||||
|
// If a replica is lagging behind, it needs to get the size as it was on
|
||||||
|
// the replica's current replay LSN.
|
||||||
let mut shared = self.shared.lock().unwrap();
|
let mut shared = self.shared.lock().unwrap();
|
||||||
let entry = shared.relsize_cache.entry(*rel).or_insert(0);
|
let entry = shared.relsize_cache.entry(*rel).or_insert(0);
|
||||||
|
|
||||||
if let Some(to) = to {
|
if to >= *entry {
|
||||||
if to >= *entry {
|
*entry = to;
|
||||||
*entry = to + 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -7,29 +7,43 @@
|
|||||||
// *status* -- show actual info about this pageserver,
|
// *status* -- show actual info about this pageserver,
|
||||||
// *pagestream* -- enter mode where smgr and pageserver talk with their
|
// *pagestream* -- enter mode where smgr and pageserver talk with their
|
||||||
// custom protocol.
|
// custom protocol.
|
||||||
// *callmemaybe $url* -- ask pageserver to start walreceiver on $url
|
// *callmemaybe <zenith timelineid> $url* -- ask pageserver to start walreceiver on $url
|
||||||
//
|
//
|
||||||
|
|
||||||
use byteorder::{BigEndian, ByteOrder};
|
use byteorder::{BigEndian, ByteOrder};
|
||||||
use bytes::{Buf, Bytes, BytesMut};
|
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||||
use log::*;
|
use log::*;
|
||||||
|
use regex::Regex;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::Arc;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use tokio::io::{AsyncReadExt, AsyncWriteExt, BufWriter};
|
use tokio::io::{AsyncReadExt, AsyncWriteExt, BufWriter};
|
||||||
use tokio::net::{TcpListener, TcpStream};
|
use tokio::net::{TcpListener, TcpStream};
|
||||||
use tokio::runtime;
|
use tokio::runtime;
|
||||||
|
use tokio::runtime::Runtime;
|
||||||
|
use tokio::sync::mpsc;
|
||||||
use tokio::task;
|
use tokio::task;
|
||||||
|
|
||||||
|
use crate::basebackup;
|
||||||
use crate::page_cache;
|
use crate::page_cache;
|
||||||
|
use crate::restore_local_repo;
|
||||||
use crate::walreceiver;
|
use crate::walreceiver;
|
||||||
use crate::PageServerConf;
|
use crate::PageServerConf;
|
||||||
|
use crate::ZTimelineId;
|
||||||
|
|
||||||
type Result<T> = std::result::Result<T, io::Error>;
|
type Result<T> = std::result::Result<T, io::Error>;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum FeMessage {
|
enum FeMessage {
|
||||||
StartupMessage(FeStartupMessage),
|
StartupMessage(FeStartupMessage),
|
||||||
Query(FeQueryMessage),
|
Query(FeQueryMessage), // Simple query
|
||||||
|
Parse(FeParseMessage), // Extended query protocol
|
||||||
|
Describe(FeDescribeMessage),
|
||||||
|
Bind(FeBindMessage),
|
||||||
|
Execute(FeExecuteMessage),
|
||||||
|
Close(FeCloseMessage),
|
||||||
|
Sync,
|
||||||
Terminate,
|
Terminate,
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -49,6 +63,11 @@ enum BeMessage {
|
|||||||
AuthenticationOk,
|
AuthenticationOk,
|
||||||
ReadyForQuery,
|
ReadyForQuery,
|
||||||
RowDescription,
|
RowDescription,
|
||||||
|
ParseComplete,
|
||||||
|
ParameterDescription,
|
||||||
|
NoData,
|
||||||
|
BindComplete,
|
||||||
|
CloseComplete,
|
||||||
DataRow,
|
DataRow,
|
||||||
CommandComplete,
|
CommandComplete,
|
||||||
ControlFile,
|
ControlFile,
|
||||||
@@ -145,6 +164,176 @@ struct FeQueryMessage {
|
|||||||
body: Bytes,
|
body: Bytes,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We only support the simple case of Parse on unnamed prepared statement and
|
||||||
|
// no params
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct FeParseMessage {
|
||||||
|
query_string: Bytes,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_null_terminated(buf: &mut Bytes) -> Result<Bytes> {
|
||||||
|
let mut result = BytesMut::new();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
if !buf.has_remaining() {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"no null-terminator in string",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let byte = buf.get_u8();
|
||||||
|
|
||||||
|
if byte == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
result.put_u8(byte);
|
||||||
|
}
|
||||||
|
return Ok(result.freeze());
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FeParseMessage {
|
||||||
|
pub fn parse(body: Bytes) -> Result<FeMessage> {
|
||||||
|
let mut buf = body.clone();
|
||||||
|
let _pstmt_name = read_null_terminated(&mut buf)?;
|
||||||
|
let query_string = read_null_terminated(&mut buf)?;
|
||||||
|
let nparams = buf.get_i16();
|
||||||
|
|
||||||
|
// FIXME: the rust-postgres driver uses a named prepared statement
|
||||||
|
// for copy_out(). We're not prepared to handle that correctly. For
|
||||||
|
// now, just ignore the statement name, assuming that the client never
|
||||||
|
// uses more than one prepared statement at a time.
|
||||||
|
/*
|
||||||
|
if pstmt_name.len() != 0 {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"named prepared statements not implemented in Parse",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
if nparams != 0 {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"query params not implemented",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(FeMessage::Parse(FeParseMessage { query_string }))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct FeDescribeMessage {
|
||||||
|
kind: u8, // 'S' to describe a prepared statement; or 'P' to describe a portal.
|
||||||
|
// we only support unnamed prepared stmt or portal
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FeDescribeMessage {
|
||||||
|
pub fn parse(body: Bytes) -> Result<FeMessage> {
|
||||||
|
let mut buf = body.clone();
|
||||||
|
let kind = buf.get_u8();
|
||||||
|
let _pstmt_name = read_null_terminated(&mut buf)?;
|
||||||
|
|
||||||
|
// FIXME: see FeParseMessage::parse
|
||||||
|
/*
|
||||||
|
if pstmt_name.len() != 0 {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"named prepared statements not implemented in Describe",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
if kind != b'S' {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"only prepared statmement Describe is implemented",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(FeMessage::Describe(FeDescribeMessage { kind }))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we only support unnamed prepared stmt or portal
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct FeExecuteMessage {
|
||||||
|
/// max # of rows
|
||||||
|
maxrows: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FeExecuteMessage {
|
||||||
|
pub fn parse(body: Bytes) -> Result<FeMessage> {
|
||||||
|
let mut buf = body.clone();
|
||||||
|
let portal_name = read_null_terminated(&mut buf)?;
|
||||||
|
let maxrows = buf.get_i32();
|
||||||
|
|
||||||
|
if portal_name.len() != 0 {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"named portals not implemented",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if maxrows != 0 {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"row limit in Execute message not supported",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(FeMessage::Execute(FeExecuteMessage { maxrows }))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we only support unnamed prepared stmt and portal
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct FeBindMessage {}
|
||||||
|
|
||||||
|
impl FeBindMessage {
|
||||||
|
pub fn parse(body: Bytes) -> Result<FeMessage> {
|
||||||
|
let mut buf = body.clone();
|
||||||
|
let portal_name = read_null_terminated(&mut buf)?;
|
||||||
|
let _pstmt_name = read_null_terminated(&mut buf)?;
|
||||||
|
|
||||||
|
if portal_name.len() != 0 {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"named portals not implemented",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: see FeParseMessage::parse
|
||||||
|
/*
|
||||||
|
if pstmt_name.len() != 0 {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"named prepared statements not implemented",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
Ok(FeMessage::Bind(FeBindMessage {}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// we only support unnamed prepared stmt and portal
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct FeCloseMessage {}
|
||||||
|
|
||||||
|
impl FeCloseMessage {
|
||||||
|
pub fn parse(body: Bytes) -> Result<FeMessage> {
|
||||||
|
let mut buf = body.clone();
|
||||||
|
let _kind = buf.get_u8();
|
||||||
|
let _pstmt_or_portal_name = read_null_terminated(&mut buf)?;
|
||||||
|
|
||||||
|
// FIXME: we do nothing with Close
|
||||||
|
|
||||||
|
Ok(FeMessage::Close(FeCloseMessage {}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl FeMessage {
|
impl FeMessage {
|
||||||
pub fn parse(buf: &mut BytesMut) -> Result<Option<FeMessage>> {
|
pub fn parse(buf: &mut BytesMut) -> Result<Option<FeMessage>> {
|
||||||
if buf.len() < 5 {
|
if buf.len() < 5 {
|
||||||
@@ -173,10 +362,16 @@ impl FeMessage {
|
|||||||
let mut body = buf.split_to(total_len);
|
let mut body = buf.split_to(total_len);
|
||||||
body.advance(5);
|
body.advance(5);
|
||||||
|
|
||||||
|
let mut body = body.freeze();
|
||||||
|
|
||||||
match tag {
|
match tag {
|
||||||
b'Q' => Ok(Some(FeMessage::Query(FeQueryMessage {
|
b'Q' => Ok(Some(FeMessage::Query(FeQueryMessage { body: body }))),
|
||||||
body: body.freeze(),
|
b'P' => Ok(Some(FeParseMessage::parse(body)?)),
|
||||||
}))),
|
b'D' => Ok(Some(FeDescribeMessage::parse(body)?)),
|
||||||
|
b'E' => Ok(Some(FeExecuteMessage::parse(body)?)),
|
||||||
|
b'B' => Ok(Some(FeBindMessage::parse(body)?)),
|
||||||
|
b'C' => Ok(Some(FeCloseMessage::parse(body)?)),
|
||||||
|
b'S' => Ok(Some(FeMessage::Sync)),
|
||||||
b'X' => Ok(Some(FeMessage::Terminate)),
|
b'X' => Ok(Some(FeMessage::Terminate)),
|
||||||
b'd' => {
|
b'd' => {
|
||||||
let smgr_tag = body.get_u8();
|
let smgr_tag = body.get_u8();
|
||||||
@@ -215,26 +410,33 @@ impl FeMessage {
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
pub fn thread_main(conf: PageServerConf) {
|
pub fn thread_main(conf: &PageServerConf) {
|
||||||
// Create a new thread pool
|
// Create a new thread pool
|
||||||
//
|
//
|
||||||
// FIXME: keep it single-threaded for now, make it easier to debug with gdb,
|
// FIXME: It would be nice to keep this single-threaded for debugging purposes,
|
||||||
// and we're not concerned with performance yet.
|
// but that currently leads to a deadlock: if a GetPage@LSN request arrives
|
||||||
//let runtime = runtime::Runtime::new().unwrap();
|
// for an LSN that hasn't been received yet, the thread gets stuck waiting for
|
||||||
let runtime = runtime::Builder::new_current_thread()
|
// the WAL to arrive. If the WAL receiver hasn't been launched yet, i.e
|
||||||
.enable_all()
|
// we haven't received a "callmemaybe" request yet to tell us where to get the
|
||||||
.build()
|
// WAL, we will not have a thread available to process the "callmemaybe"
|
||||||
.unwrap();
|
// request when it does arrive. Using a thread pool alleviates the problem so
|
||||||
|
// that it doesn't happen in the tests anymore, but in principle it could still
|
||||||
|
// happen if we receive enough GetPage@LSN requests to consume all of the
|
||||||
|
// available threads.
|
||||||
|
//let runtime = runtime::Builder::new_current_thread().enable_all().build().unwrap();
|
||||||
|
let runtime = runtime::Runtime::new().unwrap();
|
||||||
|
|
||||||
info!("Starting page server on {}", conf.listen_addr);
|
info!("Starting page server on {}", conf.listen_addr);
|
||||||
|
|
||||||
runtime.block_on(async {
|
let runtime_ref = Arc::new(runtime);
|
||||||
|
|
||||||
|
runtime_ref.clone().block_on(async {
|
||||||
let listener = TcpListener::bind(conf.listen_addr).await.unwrap();
|
let listener = TcpListener::bind(conf.listen_addr).await.unwrap();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let (socket, peer_addr) = listener.accept().await.unwrap();
|
let (socket, peer_addr) = listener.accept().await.unwrap();
|
||||||
debug!("accepted connection from {}", peer_addr);
|
debug!("accepted connection from {}", peer_addr);
|
||||||
let mut conn_handler = Connection::new(conf.clone(), socket);
|
let mut conn_handler = Connection::new(conf.clone(), socket, &runtime_ref);
|
||||||
|
|
||||||
task::spawn(async move {
|
task::spawn(async move {
|
||||||
if let Err(err) = conn_handler.run().await {
|
if let Err(err) = conn_handler.run().await {
|
||||||
@@ -251,15 +453,17 @@ struct Connection {
|
|||||||
buffer: BytesMut,
|
buffer: BytesMut,
|
||||||
init_done: bool,
|
init_done: bool,
|
||||||
conf: PageServerConf,
|
conf: PageServerConf,
|
||||||
|
runtime: Arc<Runtime>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Connection {
|
impl Connection {
|
||||||
pub fn new(conf: PageServerConf, socket: TcpStream) -> Connection {
|
pub fn new(conf: PageServerConf, socket: TcpStream, runtime: &Arc<Runtime>) -> Connection {
|
||||||
Connection {
|
Connection {
|
||||||
stream: BufWriter::new(socket),
|
stream: BufWriter::new(socket),
|
||||||
buffer: BytesMut::with_capacity(10 * 1024),
|
buffer: BytesMut::with_capacity(10 * 1024),
|
||||||
init_done: false,
|
init_done: false,
|
||||||
conf: conf,
|
conf,
|
||||||
|
runtime: Arc::clone(runtime),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -307,6 +511,33 @@ impl Connection {
|
|||||||
self.stream.write_u8(b'I').await?;
|
self.stream.write_u8(b'I').await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BeMessage::ParseComplete => {
|
||||||
|
self.stream.write_u8(b'1').await?;
|
||||||
|
self.stream.write_i32(4).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
BeMessage::BindComplete => {
|
||||||
|
self.stream.write_u8(b'2').await?;
|
||||||
|
self.stream.write_i32(4).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
BeMessage::CloseComplete => {
|
||||||
|
self.stream.write_u8(b'3').await?;
|
||||||
|
self.stream.write_i32(4).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
BeMessage::NoData => {
|
||||||
|
self.stream.write_u8(b'n').await?;
|
||||||
|
self.stream.write_i32(4).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
BeMessage::ParameterDescription => {
|
||||||
|
self.stream.write_u8(b't').await?;
|
||||||
|
self.stream.write_i32(6).await?;
|
||||||
|
// we don't support params, so always 0
|
||||||
|
self.stream.write_i16(0).await?;
|
||||||
|
}
|
||||||
|
|
||||||
BeMessage::RowDescription => {
|
BeMessage::RowDescription => {
|
||||||
// XXX
|
// XXX
|
||||||
let mut b = Bytes::from("data\0");
|
let mut b = Bytes::from("data\0");
|
||||||
@@ -317,7 +548,7 @@ impl Connection {
|
|||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
self.stream.write_i16(1).await?;
|
self.stream.write_i16(1).await?;
|
||||||
self.stream.write_buf(&mut b).await?;
|
self.stream.write_all(&mut b).await?;
|
||||||
self.stream.write_i32(0).await?; /* table oid */
|
self.stream.write_i32(0).await?; /* table oid */
|
||||||
self.stream.write_i16(0).await?; /* attnum */
|
self.stream.write_i16(0).await?; /* attnum */
|
||||||
self.stream.write_i32(25).await?; /* TEXTOID */
|
self.stream.write_i32(25).await?; /* TEXTOID */
|
||||||
@@ -336,7 +567,7 @@ impl Connection {
|
|||||||
|
|
||||||
self.stream.write_i16(1).await?;
|
self.stream.write_i16(1).await?;
|
||||||
self.stream.write_i32(b.len() as i32).await?;
|
self.stream.write_i32(b.len() as i32).await?;
|
||||||
self.stream.write_buf(&mut b).await?;
|
self.stream.write_all(&mut b).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
BeMessage::ControlFile => {
|
BeMessage::ControlFile => {
|
||||||
@@ -348,7 +579,7 @@ impl Connection {
|
|||||||
|
|
||||||
self.stream.write_i16(1).await?;
|
self.stream.write_i16(1).await?;
|
||||||
self.stream.write_i32(b.len() as i32).await?;
|
self.stream.write_i32(b.len() as i32).await?;
|
||||||
self.stream.write_buf(&mut b).await?;
|
self.stream.write_all(&mut b).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
BeMessage::CommandComplete => {
|
BeMessage::CommandComplete => {
|
||||||
@@ -356,7 +587,7 @@ impl Connection {
|
|||||||
|
|
||||||
self.stream.write_u8(b'C').await?;
|
self.stream.write_u8(b'C').await?;
|
||||||
self.stream.write_i32(4 + b.len() as i32).await?;
|
self.stream.write_i32(4 + b.len() as i32).await?;
|
||||||
self.stream.write_buf(&mut b).await?;
|
self.stream.write_all(&mut b).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
BeMessage::ZenithStatusResponse(resp) => {
|
BeMessage::ZenithStatusResponse(resp) => {
|
||||||
@@ -383,7 +614,7 @@ impl Connection {
|
|||||||
self.stream.write_u8(102).await?; /* tag from pagestore_client.h */
|
self.stream.write_u8(102).await?; /* tag from pagestore_client.h */
|
||||||
self.stream.write_u8(resp.ok as u8).await?;
|
self.stream.write_u8(resp.ok as u8).await?;
|
||||||
self.stream.write_u32(resp.n_blocks).await?;
|
self.stream.write_u32(resp.n_blocks).await?;
|
||||||
self.stream.write_buf(&mut resp.page.clone()).await?;
|
self.stream.write_all(&mut resp.page.clone()).await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -396,15 +627,18 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn run(&mut self) -> Result<()> {
|
async fn run(&mut self) -> Result<()> {
|
||||||
|
let mut unnamed_query_string = Bytes::new();
|
||||||
loop {
|
loop {
|
||||||
match self.read_message().await? {
|
let msg = self.read_message().await?;
|
||||||
|
info!("got message {:?}", msg);
|
||||||
|
match msg {
|
||||||
Some(FeMessage::StartupMessage(m)) => {
|
Some(FeMessage::StartupMessage(m)) => {
|
||||||
trace!("got message {:?}", m);
|
trace!("got message {:?}", m);
|
||||||
|
|
||||||
match m.kind {
|
match m.kind {
|
||||||
StartupRequestCode::NegotiateGss | StartupRequestCode::NegotiateSsl => {
|
StartupRequestCode::NegotiateGss | StartupRequestCode::NegotiateSsl => {
|
||||||
let mut b = Bytes::from("N");
|
let mut b = Bytes::from("N");
|
||||||
self.stream.write_buf(&mut b).await?;
|
self.stream.write_all(&mut b).await?;
|
||||||
self.stream.flush().await?;
|
self.stream.flush().await?;
|
||||||
}
|
}
|
||||||
StartupRequestCode::Normal => {
|
StartupRequestCode::Normal => {
|
||||||
@@ -417,7 +651,28 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(FeMessage::Query(m)) => {
|
Some(FeMessage::Query(m)) => {
|
||||||
self.process_query(&m).await?;
|
self.process_query(m.body).await?;
|
||||||
|
}
|
||||||
|
Some(FeMessage::Parse(m)) => {
|
||||||
|
unnamed_query_string = m.query_string;
|
||||||
|
self.write_message(&BeMessage::ParseComplete).await?;
|
||||||
|
}
|
||||||
|
Some(FeMessage::Describe(_)) => {
|
||||||
|
self.write_message_noflush(&BeMessage::ParameterDescription)
|
||||||
|
.await?;
|
||||||
|
self.write_message(&BeMessage::NoData).await?;
|
||||||
|
}
|
||||||
|
Some(FeMessage::Bind(_)) => {
|
||||||
|
self.write_message(&BeMessage::BindComplete).await?;
|
||||||
|
}
|
||||||
|
Some(FeMessage::Close(_)) => {
|
||||||
|
self.write_message(&BeMessage::CloseComplete).await?;
|
||||||
|
}
|
||||||
|
Some(FeMessage::Execute(_)) => {
|
||||||
|
self.process_query(unnamed_query_string.clone()).await?;
|
||||||
|
}
|
||||||
|
Some(FeMessage::Sync) => {
|
||||||
|
self.write_message(&BeMessage::ReadyForQuery).await?;
|
||||||
}
|
}
|
||||||
Some(FeMessage::Terminate) => {
|
Some(FeMessage::Terminate) => {
|
||||||
break;
|
break;
|
||||||
@@ -426,7 +681,8 @@ impl Connection {
|
|||||||
info!("connection closed");
|
info!("connection closed");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
_ => {
|
x => {
|
||||||
|
error!("unexpected message type : {:?}", x);
|
||||||
return Err(io::Error::new(io::ErrorKind::Other, "unexpected message"));
|
return Err(io::Error::new(io::ErrorKind::Other, "unexpected message"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -435,41 +691,62 @@ impl Connection {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn process_query(&mut self, q: &FeQueryMessage) -> Result<()> {
|
async fn process_query(&mut self, query_string: Bytes) -> Result<()> {
|
||||||
trace!("got query {:?}", q.body);
|
debug!("process query {:?}", query_string);
|
||||||
|
|
||||||
if q.body.starts_with(b"controlfile") {
|
// remove null terminator, if any
|
||||||
|
let mut query_string = query_string.clone();
|
||||||
|
if query_string.last() == Some(&0) {
|
||||||
|
query_string.truncate(query_string.len() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if query_string.starts_with(b"controlfile") {
|
||||||
self.handle_controlfile().await
|
self.handle_controlfile().await
|
||||||
} else if q.body.starts_with(b"pagestream ") {
|
} else if query_string.starts_with(b"pagestream ") {
|
||||||
let (_l, r) = q.body.split_at("pagestream ".len());
|
let (_l, r) = query_string.split_at("pagestream ".len());
|
||||||
let mut r = r.to_vec();
|
let timelineid_str = String::from_utf8(r.to_vec()).unwrap();
|
||||||
r.pop();
|
let timelineid = ZTimelineId::from_str(&timelineid_str).unwrap();
|
||||||
let sysid = String::from_utf8(r).unwrap().trim().to_string();
|
|
||||||
let sysid: u64 = sysid.parse().unwrap(); // XXX
|
|
||||||
|
|
||||||
self.handle_pagerequests(sysid).await
|
self.handle_pagerequests(timelineid).await
|
||||||
} else if q.body.starts_with(b"callmemaybe ") {
|
} else if query_string.starts_with(b"basebackup ") {
|
||||||
let (_l, r) = q.body.split_at("callmemaybe ".len());
|
let (_l, r) = query_string.split_at("basebackup ".len());
|
||||||
let mut r = r.to_vec();
|
let r = r.to_vec();
|
||||||
r.pop();
|
let timelineid_str = String::from(String::from_utf8(r).unwrap().trim_end());
|
||||||
let connstr = String::from_utf8(r).unwrap().trim().to_string();
|
info!("got basebackup command: \"{}\"", timelineid_str);
|
||||||
|
let timelineid = ZTimelineId::from_str(&timelineid_str).unwrap();
|
||||||
|
|
||||||
let conf_copy = self.conf.clone();
|
// Check that the timeline exists
|
||||||
let _walreceiver_thread = thread::Builder::new()
|
self.handle_basebackup_request(timelineid).await?;
|
||||||
.name("WAL receiver thread".into())
|
|
||||||
.spawn(move || {
|
|
||||||
walreceiver::thread_main(conf_copy, &connstr);
|
|
||||||
})
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// generic ack:
|
|
||||||
self.write_message_noflush(&BeMessage::RowDescription)
|
|
||||||
.await?;
|
|
||||||
self.write_message_noflush(&BeMessage::DataRow).await?;
|
|
||||||
self.write_message_noflush(&BeMessage::CommandComplete)
|
self.write_message_noflush(&BeMessage::CommandComplete)
|
||||||
.await?;
|
.await?;
|
||||||
self.write_message(&BeMessage::ReadyForQuery).await
|
self.write_message(&BeMessage::ReadyForQuery).await
|
||||||
} else if q.body.starts_with(b"status") {
|
} else if query_string.starts_with(b"callmemaybe ") {
|
||||||
|
let query_str = String::from_utf8(query_string.to_vec())
|
||||||
|
.unwrap()
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
// callmemaybe <zenith timelineid as hex string> <connstr>
|
||||||
|
let re = Regex::new(r"^callmemaybe ([[:xdigit:]]+) (.*)$").unwrap();
|
||||||
|
let caps = re.captures(&query_str);
|
||||||
|
let caps = caps.unwrap();
|
||||||
|
|
||||||
|
let timelineid = ZTimelineId::from_str(caps.get(1).unwrap().as_str().clone()).unwrap();
|
||||||
|
let connstr: String = String::from(caps.get(2).unwrap().as_str());
|
||||||
|
|
||||||
|
// Check that the timeline exists
|
||||||
|
let pcache = page_cache::get_or_restore_pagecache(&self.conf, timelineid);
|
||||||
|
if pcache.is_err() {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
format!("client requested callmemaybe on timeline {} which does not exist in page server", timelineid)));
|
||||||
|
}
|
||||||
|
|
||||||
|
walreceiver::launch_wal_receiver(&self.conf, timelineid, &connstr);
|
||||||
|
|
||||||
|
self.write_message_noflush(&BeMessage::CommandComplete)
|
||||||
|
.await?;
|
||||||
|
self.write_message(&BeMessage::ReadyForQuery).await
|
||||||
|
} else if query_string.starts_with(b"status") {
|
||||||
self.write_message_noflush(&BeMessage::RowDescription)
|
self.write_message_noflush(&BeMessage::RowDescription)
|
||||||
.await?;
|
.await?;
|
||||||
self.write_message_noflush(&BeMessage::DataRow).await?;
|
self.write_message_noflush(&BeMessage::DataRow).await?;
|
||||||
@@ -495,7 +772,16 @@ impl Connection {
|
|||||||
self.write_message(&BeMessage::ReadyForQuery).await
|
self.write_message(&BeMessage::ReadyForQuery).await
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_pagerequests(&mut self, sysid: u64) -> Result<()> {
|
async fn handle_pagerequests(&mut self, timelineid: ZTimelineId) -> Result<()> {
|
||||||
|
// Check that the timeline exists
|
||||||
|
let pcache = page_cache::get_or_restore_pagecache(&self.conf, timelineid);
|
||||||
|
if pcache.is_err() {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
format!("client requested pagestream on timeline {} which does not exist in page server", timelineid)));
|
||||||
|
}
|
||||||
|
let pcache = pcache.unwrap();
|
||||||
|
|
||||||
/* switch client to COPYBOTH */
|
/* switch client to COPYBOTH */
|
||||||
self.stream.write_u8(b'W').await?;
|
self.stream.write_u8(b'W').await?;
|
||||||
self.stream.write_i32(4 + 1 + 2).await?;
|
self.stream.write_i32(4 + 1 + 2).await?;
|
||||||
@@ -503,13 +789,11 @@ impl Connection {
|
|||||||
self.stream.write_i16(0).await?; /* numAttributes */
|
self.stream.write_i16(0).await?; /* numAttributes */
|
||||||
self.stream.flush().await?;
|
self.stream.flush().await?;
|
||||||
|
|
||||||
let pcache = page_cache::get_pagecache(self.conf.clone(), sysid);
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let message = self.read_message().await?;
|
let message = self.read_message().await?;
|
||||||
|
|
||||||
if let Some(m) = &message {
|
if let Some(m) = &message {
|
||||||
info!("query({}): {:?}", sysid, m);
|
info!("query({:?}): {:?}", timelineid, m);
|
||||||
};
|
};
|
||||||
|
|
||||||
if message.is_none() {
|
if message.is_none() {
|
||||||
@@ -560,7 +844,7 @@ impl Connection {
|
|||||||
|
|
||||||
self.write_message(&BeMessage::ZenithNblocksResponse(ZenithStatusResponse {
|
self.write_message(&BeMessage::ZenithNblocksResponse(ZenithStatusResponse {
|
||||||
ok: true,
|
ok: true,
|
||||||
n_blocks: n_blocks,
|
n_blocks,
|
||||||
}))
|
}))
|
||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
@@ -600,7 +884,7 @@ impl Connection {
|
|||||||
forknum: req.forknum,
|
forknum: req.forknum,
|
||||||
};
|
};
|
||||||
|
|
||||||
pcache.relsize_inc(&tag, None);
|
pcache.relsize_inc(&tag, 0);
|
||||||
|
|
||||||
self.write_message(&BeMessage::ZenithStatusResponse(ZenithStatusResponse {
|
self.write_message(&BeMessage::ZenithStatusResponse(ZenithStatusResponse {
|
||||||
ok: true,
|
ok: true,
|
||||||
@@ -616,7 +900,7 @@ impl Connection {
|
|||||||
forknum: req.forknum,
|
forknum: req.forknum,
|
||||||
};
|
};
|
||||||
|
|
||||||
pcache.relsize_inc(&tag, Some(req.blkno));
|
pcache.relsize_inc(&tag, req.blkno + 1);
|
||||||
|
|
||||||
self.write_message(&BeMessage::ZenithStatusResponse(ZenithStatusResponse {
|
self.write_message(&BeMessage::ZenithStatusResponse(ZenithStatusResponse {
|
||||||
ok: true,
|
ok: true,
|
||||||
@@ -628,4 +912,101 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn handle_basebackup_request(&mut self, timelineid: ZTimelineId) -> Result<()> {
|
||||||
|
// check that the timeline exists
|
||||||
|
let pcache = page_cache::get_or_restore_pagecache(&self.conf, timelineid);
|
||||||
|
if pcache.is_err() {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
format!("client requested basebackup on timeline {} which does not exist in page server", timelineid)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* switch client to COPYOUT */
|
||||||
|
let stream = &mut self.stream;
|
||||||
|
stream.write_u8(b'H').await?;
|
||||||
|
stream.write_i32(4 + 1 + 2).await?;
|
||||||
|
stream.write_u8(0).await?; /* copy_is_binary */
|
||||||
|
stream.write_i16(0).await?; /* numAttributes */
|
||||||
|
stream.flush().await?;
|
||||||
|
info!("sent CopyOut");
|
||||||
|
|
||||||
|
/* Send a tarball of the latest snapshot on the timeline */
|
||||||
|
|
||||||
|
// find latest snapshot
|
||||||
|
let snapshotlsn = restore_local_repo::find_latest_snapshot(&self.conf, timelineid).unwrap();
|
||||||
|
|
||||||
|
// Stream it
|
||||||
|
let (s, mut r) = mpsc::channel(5);
|
||||||
|
|
||||||
|
let f_tar = task::spawn_blocking(move || {
|
||||||
|
basebackup::send_snapshot_tarball(&mut CopyDataSink(s), timelineid, snapshotlsn)?;
|
||||||
|
Ok(())
|
||||||
|
});
|
||||||
|
let f_tar2 = async {
|
||||||
|
let joinres = f_tar.await;
|
||||||
|
|
||||||
|
if joinres.is_err() {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidData,
|
||||||
|
joinres.unwrap_err(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
return joinres.unwrap();
|
||||||
|
};
|
||||||
|
|
||||||
|
let f_pump = async move {
|
||||||
|
loop {
|
||||||
|
let buf = r.recv().await;
|
||||||
|
if buf.is_none() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let mut buf = buf.unwrap();
|
||||||
|
|
||||||
|
// CopyData
|
||||||
|
stream.write_u8(b'd').await?;
|
||||||
|
stream.write_u32((4 + buf.len()) as u32).await?;
|
||||||
|
stream.write_all(&mut buf).await?;
|
||||||
|
trace!("CopyData sent for {} bytes!", buf.len());
|
||||||
|
|
||||||
|
// FIXME: flush isn't really required, but makes it easier
|
||||||
|
// to view in wireshark
|
||||||
|
stream.flush().await?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
};
|
||||||
|
|
||||||
|
tokio::try_join!(f_tar2, f_pump)?;
|
||||||
|
|
||||||
|
// CopyDone
|
||||||
|
self.stream.write_u8(b'c').await?;
|
||||||
|
self.stream.write_u32(4).await?;
|
||||||
|
self.stream.flush().await?;
|
||||||
|
debug!("CopyDone sent!");
|
||||||
|
|
||||||
|
// FIXME: I'm getting an error from the tokio copyout driver without this.
|
||||||
|
// I think it happens when the CommandComplete, CloseComplete and ReadyForQuery
|
||||||
|
// are sent in the same TCP packet as the CopyDone. I don't understand why.
|
||||||
|
thread::sleep(std::time::Duration::from_secs(1));
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CopyDataSink(mpsc::Sender<Bytes>);
|
||||||
|
|
||||||
|
impl std::io::Write for CopyDataSink {
|
||||||
|
fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> {
|
||||||
|
let buf = Bytes::copy_from_slice(data);
|
||||||
|
|
||||||
|
if let Err(e) = self.0.blocking_send(buf) {
|
||||||
|
return Err(io::Error::new(io::ErrorKind::Other, e));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(data.len())
|
||||||
|
}
|
||||||
|
fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
|
||||||
|
// no-op
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
53
pageserver/src/pg_constants.rs
Normal file
53
pageserver/src/pg_constants.rs
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
// From pg_tablespace_d.h
|
||||||
|
//
|
||||||
|
pub const DEFAULTTABLESPACE_OID: u32 = 1663;
|
||||||
|
pub const GLOBALTABLESPACE_OID: u32 = 1664;
|
||||||
|
//Special values for non-rel files' tags
|
||||||
|
//TODO maybe use enum?
|
||||||
|
pub const PG_CONTROLFILE_FORKNUM: u32 = 42;
|
||||||
|
pub const PG_FILENODEMAP_FORKNUM: u32 = 43;
|
||||||
|
pub const PG_XACT_FORKNUM: u32 = 44;
|
||||||
|
pub const PG_MXACT_OFFSETS_FORKNUM: u32 = 45;
|
||||||
|
pub const PG_MXACT_MEMBERS_FORKNUM: u32 = 46;
|
||||||
|
|
||||||
|
//
|
||||||
|
// constants from clog.h
|
||||||
|
//
|
||||||
|
pub const CLOG_XACTS_PER_BYTE: u32 = 4;
|
||||||
|
pub const CLOG_XACTS_PER_PAGE: u32 = 8192 * CLOG_XACTS_PER_BYTE;
|
||||||
|
pub const CLOG_BITS_PER_XACT: u8 = 2;
|
||||||
|
pub const CLOG_XACT_BITMASK: u8 = (1 << CLOG_BITS_PER_XACT) - 1;
|
||||||
|
|
||||||
|
pub const TRANSACTION_STATUS_COMMITTED: u8 = 0x01;
|
||||||
|
pub const TRANSACTION_STATUS_ABORTED: u8 = 0x02;
|
||||||
|
pub const TRANSACTION_STATUS_SUB_COMMITTED: u8 = 0x03;
|
||||||
|
|
||||||
|
pub const CLOG_ZEROPAGE: u8 = 0x00;
|
||||||
|
pub const CLOG_TRUNCATE: u8 = 0x10;
|
||||||
|
|
||||||
|
// From xact.h
|
||||||
|
pub const XLOG_XACT_COMMIT: u8 = 0x00;
|
||||||
|
pub const XLOG_XACT_ABORT: u8 = 0x20;
|
||||||
|
|
||||||
|
/* mask for filtering opcodes out of xl_info */
|
||||||
|
pub const XLOG_XACT_OPMASK: u8 = 0x70;
|
||||||
|
/* does this record have a 'xinfo' field or not */
|
||||||
|
pub const XLOG_XACT_HAS_INFO: u8 = 0x80;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following flags, stored in xinfo, determine which information is
|
||||||
|
* contained in commit/abort records.
|
||||||
|
*/
|
||||||
|
pub const XACT_XINFO_HAS_DBINFO: u32 = 1;
|
||||||
|
pub const XACT_XINFO_HAS_SUBXACTS: u32 = 2;
|
||||||
|
pub const XACT_XINFO_HAS_RELFILENODES: u32 = 4;
|
||||||
|
|
||||||
|
// From pg_control.h and rmgrlist.h
|
||||||
|
pub const XLOG_SWITCH: u8 = 0x40;
|
||||||
|
pub const RM_XLOG_ID: u8 = 0;
|
||||||
|
pub const RM_XACT_ID: u8 = 1;
|
||||||
|
pub const RM_CLOG_ID: u8 = 3;
|
||||||
|
// pub const RM_MULTIXACT_ID:u8 = 6;
|
||||||
|
|
||||||
|
// from xlogreader.h
|
||||||
|
pub const XLR_INFO_MASK: u8 = 0x0F;
|
||||||
489
pageserver/src/restore_local_repo.rs
Normal file
489
pageserver/src/restore_local_repo.rs
Normal file
@@ -0,0 +1,489 @@
|
|||||||
|
//
|
||||||
|
// Restore chunks from local Zenith repository
|
||||||
|
//
|
||||||
|
// This runs once at Page Server startup. It loads all the "snapshots" and all
|
||||||
|
// WAL from all timelines from the local zenith repository into the in-memory page
|
||||||
|
// cache.
|
||||||
|
//
|
||||||
|
// This also initializes the "last valid LSN" in the page cache to the last LSN
|
||||||
|
// seen in the WAL, so that when the WAL receiver is started, it starts
|
||||||
|
// streaming from that LSN.
|
||||||
|
//
|
||||||
|
|
||||||
|
use log::*;
|
||||||
|
use regex::Regex;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
use std::cmp::max;
|
||||||
|
use std::error::Error;
|
||||||
|
use std::fs;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::Read;
|
||||||
|
use std::io::Seek;
|
||||||
|
use std::io::SeekFrom;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use bytes::Bytes;
|
||||||
|
|
||||||
|
use crate::page_cache;
|
||||||
|
use crate::page_cache::BufferTag;
|
||||||
|
use crate::page_cache::PageCache;
|
||||||
|
use crate::waldecoder::{decode_wal_record, WalStreamDecoder};
|
||||||
|
use crate::PageServerConf;
|
||||||
|
use crate::ZTimelineId;
|
||||||
|
|
||||||
|
// From pg_tablespace_d.h
|
||||||
|
//
|
||||||
|
// FIXME: we'll probably need these elsewhere too, move to some common location
|
||||||
|
const DEFAULTTABLESPACE_OID: u32 = 1663;
|
||||||
|
const GLOBALTABLESPACE_OID: u32 = 1664;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Load it all into the page cache.
|
||||||
|
//
|
||||||
|
pub fn restore_timeline(
|
||||||
|
conf: &PageServerConf,
|
||||||
|
pcache: &PageCache,
|
||||||
|
timeline: ZTimelineId,
|
||||||
|
) -> Result<()> {
|
||||||
|
let timelinepath = PathBuf::from("timelines").join(timeline.to_string());
|
||||||
|
|
||||||
|
if !timelinepath.exists() {
|
||||||
|
anyhow::bail!("timeline {} does not exist in the page server's repository");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan .zenith/timelines/<timeline>/snapshots
|
||||||
|
let snapshotspath = PathBuf::from("timelines")
|
||||||
|
.join(timeline.to_string())
|
||||||
|
.join("snapshots");
|
||||||
|
|
||||||
|
let mut last_snapshot_lsn: u64 = 0;
|
||||||
|
|
||||||
|
for direntry in fs::read_dir(&snapshotspath).unwrap() {
|
||||||
|
let direntry = direntry?;
|
||||||
|
let filename = direntry.file_name().to_str().unwrap().to_owned();
|
||||||
|
|
||||||
|
let lsn = u64::from_str_radix(&filename, 16)?;
|
||||||
|
last_snapshot_lsn = max(lsn, last_snapshot_lsn);
|
||||||
|
|
||||||
|
restore_snapshot(conf, pcache, timeline, &filename)?;
|
||||||
|
info!("restored snapshot at {}", filename);
|
||||||
|
}
|
||||||
|
|
||||||
|
if last_snapshot_lsn == 0 {
|
||||||
|
error!(
|
||||||
|
"could not find valid snapshot in {}",
|
||||||
|
snapshotspath.display()
|
||||||
|
);
|
||||||
|
// TODO return error?
|
||||||
|
}
|
||||||
|
pcache.init_valid_lsn(last_snapshot_lsn);
|
||||||
|
|
||||||
|
restore_wal(conf, pcache, timeline, last_snapshot_lsn)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn find_latest_snapshot(_conf: &PageServerConf, timeline: ZTimelineId) -> Result<u64> {
|
||||||
|
let snapshotspath = format!("timelines/{}/snapshots", timeline);
|
||||||
|
|
||||||
|
let mut last_snapshot_lsn = 0;
|
||||||
|
for direntry in fs::read_dir(&snapshotspath).unwrap() {
|
||||||
|
let filename = direntry.unwrap().file_name().to_str().unwrap().to_owned();
|
||||||
|
|
||||||
|
let lsn = u64::from_str_radix(&filename, 16)?;
|
||||||
|
last_snapshot_lsn = max(lsn, last_snapshot_lsn);
|
||||||
|
}
|
||||||
|
|
||||||
|
if last_snapshot_lsn == 0 {
|
||||||
|
error!("could not find valid snapshot in {}", &snapshotspath);
|
||||||
|
// TODO return error?
|
||||||
|
}
|
||||||
|
Ok(last_snapshot_lsn)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn restore_snapshot(
|
||||||
|
conf: &PageServerConf,
|
||||||
|
pcache: &PageCache,
|
||||||
|
timeline: ZTimelineId,
|
||||||
|
snapshot: &str,
|
||||||
|
) -> Result<()> {
|
||||||
|
let snapshotpath = PathBuf::from("timelines")
|
||||||
|
.join(timeline.to_string())
|
||||||
|
.join("snapshots")
|
||||||
|
.join(snapshot);
|
||||||
|
|
||||||
|
// Scan 'global'
|
||||||
|
for direntry in fs::read_dir(snapshotpath.join("global"))? {
|
||||||
|
let direntry = direntry?;
|
||||||
|
match direntry.file_name().to_str() {
|
||||||
|
None => continue,
|
||||||
|
|
||||||
|
// These special files appear in the snapshot, but are not needed by the page server
|
||||||
|
Some("pg_control") => continue,
|
||||||
|
Some("pg_filenode.map") => continue,
|
||||||
|
|
||||||
|
// Load any relation files into the page server
|
||||||
|
_ => restore_relfile(
|
||||||
|
conf,
|
||||||
|
pcache,
|
||||||
|
timeline,
|
||||||
|
snapshot,
|
||||||
|
GLOBALTABLESPACE_OID,
|
||||||
|
0,
|
||||||
|
&direntry.path(),
|
||||||
|
)?,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan 'base'. It contains database dirs, the database OID is the filename.
|
||||||
|
// E.g. 'base/12345', where 12345 is the database OID.
|
||||||
|
for direntry in fs::read_dir(snapshotpath.join("base"))? {
|
||||||
|
let direntry = direntry?;
|
||||||
|
|
||||||
|
let dboid = u32::from_str_radix(direntry.file_name().to_str().unwrap(), 10)?;
|
||||||
|
|
||||||
|
for direntry in fs::read_dir(direntry.path())? {
|
||||||
|
let direntry = direntry?;
|
||||||
|
match direntry.file_name().to_str() {
|
||||||
|
None => continue,
|
||||||
|
|
||||||
|
// These special files appear in the snapshot, but are not needed by the page server
|
||||||
|
Some("PG_VERSION") => continue,
|
||||||
|
Some("pg_filenode.map") => continue,
|
||||||
|
|
||||||
|
// Load any relation files into the page server
|
||||||
|
_ => restore_relfile(
|
||||||
|
conf,
|
||||||
|
pcache,
|
||||||
|
timeline,
|
||||||
|
snapshot,
|
||||||
|
DEFAULTTABLESPACE_OID,
|
||||||
|
dboid,
|
||||||
|
&direntry.path(),
|
||||||
|
)?,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Scan pg_tblspc
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn restore_relfile(
|
||||||
|
_conf: &PageServerConf,
|
||||||
|
pcache: &PageCache,
|
||||||
|
_timeline: ZTimelineId,
|
||||||
|
snapshot: &str,
|
||||||
|
spcoid: u32,
|
||||||
|
dboid: u32,
|
||||||
|
path: &Path,
|
||||||
|
) -> Result<()> {
|
||||||
|
let lsn = u64::from_str_radix(snapshot, 16)?;
|
||||||
|
|
||||||
|
// Does it look like a relation file?
|
||||||
|
|
||||||
|
let p = parse_relfilename(path.file_name().unwrap().to_str().unwrap());
|
||||||
|
if p.is_err() {
|
||||||
|
let e = p.unwrap_err();
|
||||||
|
warn!("unrecognized file in snapshot: {:?} ({})", path, e);
|
||||||
|
return Err(e)?;
|
||||||
|
}
|
||||||
|
let (relnode, forknum, segno) = p.unwrap();
|
||||||
|
|
||||||
|
let mut file = File::open(path)?;
|
||||||
|
let mut buf: [u8; 8192] = [0u8; 8192];
|
||||||
|
|
||||||
|
// FIXME: use constants (BLCKSZ)
|
||||||
|
let mut blknum: u32 = segno * (1024 * 1024 * 1024 / 8192);
|
||||||
|
loop {
|
||||||
|
let r = file.read_exact(&mut buf);
|
||||||
|
match r {
|
||||||
|
Ok(_) => {
|
||||||
|
let tag = page_cache::BufferTag {
|
||||||
|
spcnode: spcoid,
|
||||||
|
dbnode: dboid,
|
||||||
|
relnode: relnode,
|
||||||
|
forknum: forknum as u8,
|
||||||
|
blknum: blknum,
|
||||||
|
};
|
||||||
|
pcache.put_page_image(tag, lsn, Bytes::copy_from_slice(&buf));
|
||||||
|
/*
|
||||||
|
if oldest_lsn == 0 || p.lsn < oldest_lsn {
|
||||||
|
oldest_lsn = p.lsn;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: UnexpectedEof is expected
|
||||||
|
Err(e) => match e.kind() {
|
||||||
|
std::io::ErrorKind::UnexpectedEof => {
|
||||||
|
// reached EOF. That's expected.
|
||||||
|
// FIXME: maybe check that we read the full length of the file?
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
error!("error reading file: {:?} ({})", path, e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
};
|
||||||
|
blknum += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let tag = page_cache::RelTag {
|
||||||
|
spcnode: spcoid,
|
||||||
|
dbnode: dboid,
|
||||||
|
relnode: relnode,
|
||||||
|
forknum: forknum as u8,
|
||||||
|
};
|
||||||
|
pcache.relsize_inc(&tag, blknum);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan WAL on a timeline, starting from gien LSN, and load all the records
|
||||||
|
// into the page cache.
|
||||||
|
fn restore_wal(
|
||||||
|
_conf: &PageServerConf,
|
||||||
|
pcache: &PageCache,
|
||||||
|
timeline: ZTimelineId,
|
||||||
|
startpoint: u64,
|
||||||
|
) -> Result<()> {
|
||||||
|
let walpath = format!("timelines/{}/wal", timeline);
|
||||||
|
|
||||||
|
let mut waldecoder = WalStreamDecoder::new(u64::from(startpoint));
|
||||||
|
|
||||||
|
let mut segno = XLByteToSeg(startpoint, 16 * 1024 * 1024);
|
||||||
|
let mut offset = XLogSegmentOffset(startpoint, 16 * 1024 * 1024);
|
||||||
|
let mut last_lsn = 0;
|
||||||
|
loop {
|
||||||
|
// FIXME: assume postgresql tli 1 for now
|
||||||
|
let filename = XLogFileName(1, segno, 16 * 1024 * 1024);
|
||||||
|
let mut path = walpath.clone() + "/" + &filename;
|
||||||
|
|
||||||
|
// It could be as .partial
|
||||||
|
if !PathBuf::from(&path).exists() {
|
||||||
|
path = path + ".partial";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slurp the WAL file
|
||||||
|
let open_result = File::open(&path);
|
||||||
|
if let Err(e) = open_result {
|
||||||
|
if e.kind() == std::io::ErrorKind::NotFound {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return Err(e)?;
|
||||||
|
}
|
||||||
|
let mut file = open_result.unwrap();
|
||||||
|
|
||||||
|
if offset > 0 {
|
||||||
|
file.seek(SeekFrom::Start(offset as u64))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
let nread = file.read_to_end(&mut buf)?;
|
||||||
|
if nread != 16 * 1024 * 1024 - offset as usize {
|
||||||
|
// Maybe allow this for .partial files?
|
||||||
|
error!("read only {} bytes from WAL file", nread);
|
||||||
|
}
|
||||||
|
waldecoder.feed_bytes(&buf);
|
||||||
|
|
||||||
|
let mut nrecords = 0;
|
||||||
|
loop {
|
||||||
|
let rec = waldecoder.poll_decode();
|
||||||
|
if rec.is_err() {
|
||||||
|
// Assume that an error means we've reached the end of
|
||||||
|
// a partial WAL record. So that's ok.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if let Some((lsn, recdata)) = rec.unwrap() {
|
||||||
|
let decoded = decode_wal_record(recdata.clone());
|
||||||
|
// Put the WAL record to the page cache. We make a separate copy of
|
||||||
|
// it for every block it modifies. (The actual WAL record is kept in
|
||||||
|
// a Bytes, which uses a reference counter for the underlying buffer,
|
||||||
|
// so having multiple copies of it doesn't cost that much)
|
||||||
|
for blk in decoded.blocks.iter() {
|
||||||
|
let tag = BufferTag {
|
||||||
|
spcnode: blk.rnode_spcnode,
|
||||||
|
dbnode: blk.rnode_dbnode,
|
||||||
|
relnode: blk.rnode_relnode,
|
||||||
|
forknum: blk.forknum as u8,
|
||||||
|
blknum: blk.blkno,
|
||||||
|
};
|
||||||
|
|
||||||
|
let rec = page_cache::WALRecord {
|
||||||
|
lsn: lsn,
|
||||||
|
will_init: blk.will_init || blk.apply_image,
|
||||||
|
rec: recdata.clone(),
|
||||||
|
main_data_offset: decoded.main_data_offset,
|
||||||
|
};
|
||||||
|
|
||||||
|
pcache.put_wal_record(tag, rec);
|
||||||
|
}
|
||||||
|
// Now that this record has been handled, let the page cache know that
|
||||||
|
// it is up-to-date to this LSN
|
||||||
|
pcache.advance_last_valid_lsn(lsn);
|
||||||
|
last_lsn = lsn;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
nrecords += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
info!("restored {} records from WAL file {}", nrecords, filename);
|
||||||
|
|
||||||
|
segno += 1;
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
info!(
|
||||||
|
"reached end of WAL at {:X}/{:X}",
|
||||||
|
last_lsn >> 32,
|
||||||
|
last_lsn & 0xffffffff
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: copied from xlog_utils.rs
|
||||||
|
pub const XLOG_FNAME_LEN: usize = 24;
|
||||||
|
pub type XLogRecPtr = u64;
|
||||||
|
pub type XLogSegNo = u64;
|
||||||
|
pub type TimeLineID = u32;
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLogSegmentOffset(xlogptr: XLogRecPtr, wal_segsz_bytes: usize) -> u32 {
|
||||||
|
return (xlogptr as u32) & (wal_segsz_bytes as u32 - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLByteToSeg(xlogptr: XLogRecPtr, wal_segsz_bytes: usize) -> XLogSegNo {
|
||||||
|
return xlogptr / wal_segsz_bytes as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
|
||||||
|
return format!(
|
||||||
|
"{:>08X}{:>08X}{:>08X}",
|
||||||
|
tli,
|
||||||
|
logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),
|
||||||
|
logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLogSegmentsPerXLogId(wal_segsz_bytes: usize) -> XLogSegNo {
|
||||||
|
return (0x100000000u64 / wal_segsz_bytes as u64) as XLogSegNo;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLineID) {
|
||||||
|
let tli = u32::from_str_radix(&fname[0..8], 16).unwrap();
|
||||||
|
let log = u32::from_str_radix(&fname[8..16], 16).unwrap() as XLogSegNo;
|
||||||
|
let seg = u32::from_str_radix(&fname[16..24], 16).unwrap() as XLogSegNo;
|
||||||
|
return (log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn IsXLogFileName(fname: &str) -> bool {
|
||||||
|
return fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn IsPartialXLogFileName(fname: &str) -> bool {
|
||||||
|
if let Some(basefname) = fname.strip_suffix(".partial") {
|
||||||
|
IsXLogFileName(basefname)
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct FilePathError {
|
||||||
|
msg: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for FilePathError {
|
||||||
|
fn description(&self) -> &str {
|
||||||
|
&self.msg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl FilePathError {
|
||||||
|
fn new(msg: &str) -> FilePathError {
|
||||||
|
FilePathError {
|
||||||
|
msg: msg.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<core::num::ParseIntError> for FilePathError {
|
||||||
|
fn from(e: core::num::ParseIntError) -> Self {
|
||||||
|
return FilePathError {
|
||||||
|
msg: format!("invalid filename: {}", e),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for FilePathError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "invalid filename")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn forkname_to_forknum(forkname: Option<&str>) -> Result<u32, FilePathError> {
|
||||||
|
match forkname {
|
||||||
|
// "main" is not in filenames, it's implicit if the fork name is not present
|
||||||
|
None => Ok(0),
|
||||||
|
Some("fsm") => Ok(1),
|
||||||
|
Some("vm") => Ok(2),
|
||||||
|
Some("init") => Ok(3),
|
||||||
|
Some(_) => Err(FilePathError::new("invalid forkname")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct ParsedBaseImageFileName {
|
||||||
|
pub spcnode: u32,
|
||||||
|
pub dbnode: u32,
|
||||||
|
pub relnode: u32,
|
||||||
|
pub forknum: u32,
|
||||||
|
pub segno: u32,
|
||||||
|
|
||||||
|
pub lsn: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
// formats:
|
||||||
|
// <oid>
|
||||||
|
// <oid>_<fork name>
|
||||||
|
// <oid>.<segment number>
|
||||||
|
// <oid>_<fork name>.<segment number>
|
||||||
|
|
||||||
|
fn parse_relfilename(fname: &str) -> Result<(u32, u32, u32), FilePathError> {
|
||||||
|
let re = Regex::new(r"^(?P<relnode>\d+)(_(?P<forkname>[a-z]+))?(\.(?P<segno>\d+))?$").unwrap();
|
||||||
|
|
||||||
|
let caps = re
|
||||||
|
.captures(fname)
|
||||||
|
.ok_or_else(|| FilePathError::new("invalid relation data file name"))?;
|
||||||
|
|
||||||
|
let relnode_str = caps.name("relnode").unwrap().as_str();
|
||||||
|
let relnode = u32::from_str_radix(relnode_str, 10)?;
|
||||||
|
|
||||||
|
let forkname_match = caps.name("forkname");
|
||||||
|
let forkname = if forkname_match.is_none() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(forkname_match.unwrap().as_str())
|
||||||
|
};
|
||||||
|
let forknum = forkname_to_forknum(forkname)?;
|
||||||
|
|
||||||
|
let segno_match = caps.name("segno");
|
||||||
|
let segno = if segno_match.is_none() {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
u32::from_str_radix(segno_match.unwrap().as_str(), 10)?
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok((relnode, forknum, segno));
|
||||||
|
}
|
||||||
@@ -60,8 +60,8 @@ pub fn restore_main(conf: &PageServerConf) {
|
|||||||
async fn restore_chunk(conf: &PageServerConf) -> Result<(), S3Error> {
|
async fn restore_chunk(conf: &PageServerConf) -> Result<(), S3Error> {
|
||||||
let backend = Storage {
|
let backend = Storage {
|
||||||
region: Region::Custom {
|
region: Region::Custom {
|
||||||
region: env::var("S3_REGION").unwrap().into(),
|
region: env::var("S3_REGION").unwrap(),
|
||||||
endpoint: env::var("S3_ENDPOINT").unwrap().into(),
|
endpoint: env::var("S3_ENDPOINT").unwrap(),
|
||||||
},
|
},
|
||||||
credentials: Credentials::new(
|
credentials: Credentials::new(
|
||||||
Some(&env::var("S3_ACCESSKEY").unwrap()),
|
Some(&env::var("S3_ACCESSKEY").unwrap()),
|
||||||
@@ -119,7 +119,7 @@ async fn restore_chunk(conf: &PageServerConf) -> Result<(), S3Error> {
|
|||||||
panic!("no base backup found");
|
panic!("no base backup found");
|
||||||
}
|
}
|
||||||
|
|
||||||
let pcache = page_cache::get_pagecache(conf.clone(), sys_id);
|
let pcache = page_cache::get_pagecache(conf, sys_id);
|
||||||
pcache.init_valid_lsn(oldest_lsn);
|
pcache.init_valid_lsn(oldest_lsn);
|
||||||
|
|
||||||
info!("{} files to restore...", slurp_futures.len());
|
info!("{} files to restore...", slurp_futures.len());
|
||||||
@@ -305,7 +305,7 @@ async fn slurp_base_file(
|
|||||||
// FIXME: use constants (BLCKSZ)
|
// FIXME: use constants (BLCKSZ)
|
||||||
let mut blknum: u32 = parsed.segno * (1024 * 1024 * 1024 / 8192);
|
let mut blknum: u32 = parsed.segno * (1024 * 1024 * 1024 / 8192);
|
||||||
|
|
||||||
let pcache = page_cache::get_pagecache(conf.clone(), sys_id);
|
let pcache = page_cache::get_pagecache(conf, sys_id);
|
||||||
|
|
||||||
while bytes.remaining() >= 8192 {
|
while bytes.remaining() >= 8192 {
|
||||||
let tag = page_cache::BufferTag {
|
let tag = page_cache::BufferTag {
|
||||||
@@ -313,7 +313,7 @@ async fn slurp_base_file(
|
|||||||
dbnode: parsed.dbnode,
|
dbnode: parsed.dbnode,
|
||||||
relnode: parsed.relnode,
|
relnode: parsed.relnode,
|
||||||
forknum: parsed.forknum as u8,
|
forknum: parsed.forknum as u8,
|
||||||
blknum: blknum,
|
blknum,
|
||||||
};
|
};
|
||||||
|
|
||||||
pcache.put_page_image(tag, parsed.lsn, bytes.copy_to_bytes(8192));
|
pcache.put_page_image(tag, parsed.lsn, bytes.copy_to_bytes(8192));
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ use tui::text::{Span, Spans, Text};
|
|||||||
use tui::widgets::{Block, BorderType, Borders, Paragraph, Widget};
|
use tui::widgets::{Block, BorderType, Borders, Paragraph, Widget};
|
||||||
use tui::Terminal;
|
use tui::Terminal;
|
||||||
|
|
||||||
use slog;
|
|
||||||
use slog::Drain;
|
use slog::Drain;
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
@@ -92,7 +91,7 @@ pub fn init_logging() -> slog_scope::GlobalLoggerGuard {
|
|||||||
return slog_scope::set_global_logger(logger);
|
return slog_scope::set_global_logger(logger);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn ui_main<'b>() -> Result<(), Box<dyn Error>> {
|
pub fn ui_main() -> Result<(), Box<dyn Error>> {
|
||||||
// Terminal initialization
|
// Terminal initialization
|
||||||
let stdout = io::stdout().into_raw_mode()?;
|
let stdout = io::stdout().into_raw_mode()?;
|
||||||
let stdout = MouseTerminal::from(stdout);
|
let stdout = MouseTerminal::from(stdout);
|
||||||
@@ -188,6 +187,7 @@ pub fn ui_main<'b>() -> Result<(), Box<dyn Error>> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
struct LogWidget<'a> {
|
struct LogWidget<'a> {
|
||||||
logger: &'a TuiLogger,
|
logger: &'a TuiLogger,
|
||||||
title: &'a str,
|
title: &'a str,
|
||||||
@@ -229,7 +229,7 @@ impl<'a> Widget for LogWidget<'a> {
|
|||||||
// Render a widget to show some metrics
|
// Render a widget to show some metrics
|
||||||
struct MetricsWidget {}
|
struct MetricsWidget {}
|
||||||
|
|
||||||
fn get_metric_u64<'a>(title: &'a str, value: u64) -> Spans<'a> {
|
fn get_metric_u64(title: &str, value: u64) -> Spans {
|
||||||
Spans::from(vec![
|
Spans::from(vec![
|
||||||
Span::styled(format!("{:<20}", title), Style::default()),
|
Span::styled(format!("{:<20}", title), Style::default()),
|
||||||
Span::raw(": "),
|
Span::raw(": "),
|
||||||
@@ -240,7 +240,7 @@ fn get_metric_u64<'a>(title: &'a str, value: u64) -> Spans<'a> {
|
|||||||
])
|
])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_metric_str<'a>(title: &'a str, value: &'a str) -> Spans<'a> {
|
fn get_metric_str<'a>(title: &str, value: &'a str) -> Spans<'a> {
|
||||||
Spans::from(vec![
|
Spans::from(vec![
|
||||||
Span::styled(format!("{:<20}", title), Style::default()),
|
Span::styled(format!("{:<20}", title), Style::default()),
|
||||||
Span::raw(": "),
|
Span::raw(": "),
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ use std::time::Duration;
|
|||||||
use termion::event::Key;
|
use termion::event::Key;
|
||||||
use termion::input::TermRead;
|
use termion::input::TermRead;
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub enum Event<I> {
|
pub enum Event<I> {
|
||||||
Input(I),
|
Input(I),
|
||||||
Tick,
|
Tick,
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
//
|
//
|
||||||
use chrono::offset::Local;
|
use chrono::offset::Local;
|
||||||
use chrono::DateTime;
|
use chrono::DateTime;
|
||||||
use slog;
|
|
||||||
use slog::{Drain, Level, OwnedKVList, Record};
|
use slog::{Drain, Level, OwnedKVList, Record};
|
||||||
use slog_async::AsyncRecord;
|
use slog_async::AsyncRecord;
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
@@ -81,7 +80,7 @@ impl<'b> TuiLoggerWidget<'b> {
|
|||||||
style_trace: None,
|
style_trace: None,
|
||||||
style_info: None,
|
style_info: None,
|
||||||
show_module: true,
|
show_module: true,
|
||||||
logger: logger,
|
logger,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -168,7 +167,7 @@ impl<'b> Widget for TuiLoggerWidget<'b> {
|
|||||||
Level::Debug => (self.style_debug, "DEBUG", true),
|
Level::Debug => (self.style_debug, "DEBUG", true),
|
||||||
Level::Trace => (self.style_trace, "TRACE", true),
|
Level::Trace => (self.style_trace, "TRACE", true),
|
||||||
};
|
};
|
||||||
line.push(Span::styled(txt, lvl_style.unwrap_or(Style::default())));
|
line.push(Span::styled(txt, lvl_style.unwrap_or_default()));
|
||||||
|
|
||||||
if self.show_module {
|
if self.show_module {
|
||||||
line.push(Span::raw(" "));
|
line.push(Span::raw(" "));
|
||||||
|
|||||||
@@ -1,14 +1,8 @@
|
|||||||
//#![allow(non_upper_case_globals)]
|
use crate::pg_constants;
|
||||||
//#![allow(non_camel_case_types)]
|
|
||||||
//#![allow(non_snake_case)]
|
|
||||||
//#![allow(dead_code)]
|
|
||||||
//include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
|
||||||
|
|
||||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||||
|
|
||||||
use std::cmp::min;
|
|
||||||
|
|
||||||
use log::*;
|
use log::*;
|
||||||
|
use std::cmp::min;
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
const XLOG_BLCKSZ: u32 = 8192;
|
const XLOG_BLCKSZ: u32 = 8192;
|
||||||
|
|
||||||
@@ -19,7 +13,7 @@ const WAL_SEGMENT_SIZE: u64 = 16 * 1024 * 1024;
|
|||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct XLogPageHeaderData {
|
pub struct XLogPageHeaderData {
|
||||||
xlp_magic: u16, /* magic value for correctness checks */
|
xlp_magic: u16, /* magic value for correctness checks */
|
||||||
xlp_info: u16, /* flag bits, see below */
|
xlp_info: u16, /* flag bits, see below */
|
||||||
xlp_tli: u32, /* TimeLineID of first record on page */
|
xlp_tli: u32, /* TimeLineID of first record on page */
|
||||||
@@ -33,7 +27,7 @@ const SizeOfXLogShortPHD: usize = 2 + 2 + 4 + 8 + 4 + 4;
|
|||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct XLogLongPageHeaderData {
|
pub struct XLogLongPageHeaderData {
|
||||||
std: XLogPageHeaderData, /* standard header fields */
|
std: XLogPageHeaderData, /* standard header fields */
|
||||||
xlp_sysid: u64, /* system identifier from pg_control */
|
xlp_sysid: u64, /* system identifier from pg_control */
|
||||||
xlp_seg_size: u32, /* just as a cross-check */
|
xlp_seg_size: u32, /* just as a cross-check */
|
||||||
@@ -44,6 +38,7 @@ struct XLogLongPageHeaderData {
|
|||||||
#[allow(non_upper_case_globals)]
|
#[allow(non_upper_case_globals)]
|
||||||
const SizeOfXLogLongPHD: usize = (2 + 2 + 4 + 8 + 4) + 4 + 8 + 4 + 4;
|
const SizeOfXLogLongPHD: usize = (2 + 2 + 4 + 8 + 4) + 4 + 8 + 4 + 4;
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
pub struct WalStreamDecoder {
|
pub struct WalStreamDecoder {
|
||||||
lsn: u64,
|
lsn: u64,
|
||||||
|
|
||||||
@@ -56,6 +51,13 @@ pub struct WalStreamDecoder {
|
|||||||
recordbuf: BytesMut,
|
recordbuf: BytesMut,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Error, Debug, Clone)]
|
||||||
|
#[error("{msg} at {lsn}")]
|
||||||
|
pub struct WalDecodeError {
|
||||||
|
msg: String,
|
||||||
|
lsn: u64,
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// WalRecordStream is a Stream that returns a stream of WAL records
|
// WalRecordStream is a Stream that returns a stream of WAL records
|
||||||
// FIXME: This isn't a proper rust stream
|
// FIXME: This isn't a proper rust stream
|
||||||
@@ -63,7 +65,7 @@ pub struct WalStreamDecoder {
|
|||||||
impl WalStreamDecoder {
|
impl WalStreamDecoder {
|
||||||
pub fn new(lsn: u64) -> WalStreamDecoder {
|
pub fn new(lsn: u64) -> WalStreamDecoder {
|
||||||
WalStreamDecoder {
|
WalStreamDecoder {
|
||||||
lsn: lsn,
|
lsn,
|
||||||
|
|
||||||
startlsn: 0,
|
startlsn: 0,
|
||||||
contlen: 0,
|
contlen: 0,
|
||||||
@@ -78,40 +80,56 @@ impl WalStreamDecoder {
|
|||||||
self.inputbuf.extend_from_slice(buf);
|
self.inputbuf.extend_from_slice(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a tuple:
|
/// Attempt to decode another WAL record from the input that has been fed to the
|
||||||
// (end LSN, record)
|
/// decoder so far.
|
||||||
pub fn poll_decode(&mut self) -> Option<(u64, Bytes)> {
|
///
|
||||||
|
/// Returns one of the following:
|
||||||
|
/// Ok((u64, Bytes)): a tuple containing the LSN of next record, and the record itself
|
||||||
|
/// Ok(None): there is not enough data in the input buffer. Feed more by calling the `feed_bytes` function
|
||||||
|
/// Err(WalDecodeError): an error occured while decoding, meaning the input was invalid.
|
||||||
|
///
|
||||||
|
pub fn poll_decode(&mut self) -> Result<Option<(u64, Bytes)>, WalDecodeError> {
|
||||||
loop {
|
loop {
|
||||||
// parse and verify page boundaries as we go
|
// parse and verify page boundaries as we go
|
||||||
if self.lsn % WAL_SEGMENT_SIZE == 0 {
|
if self.lsn % WAL_SEGMENT_SIZE == 0 {
|
||||||
// parse long header
|
// parse long header
|
||||||
|
|
||||||
if self.inputbuf.remaining() < SizeOfXLogLongPHD {
|
if self.inputbuf.remaining() < SizeOfXLogLongPHD {
|
||||||
return None;
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.decode_XLogLongPageHeaderData();
|
let hdr = self.decode_XLogLongPageHeaderData();
|
||||||
|
if hdr.std.xlp_pageaddr != self.lsn {
|
||||||
|
return Err(WalDecodeError {
|
||||||
|
msg: "invalid xlog segment header".into(),
|
||||||
|
lsn: self.lsn,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// TODO: verify the remaining fields in the header
|
||||||
|
|
||||||
self.lsn += SizeOfXLogLongPHD as u64;
|
self.lsn += SizeOfXLogLongPHD as u64;
|
||||||
|
|
||||||
// TODO: verify the fields in the header
|
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
} else if self.lsn % (XLOG_BLCKSZ as u64) == 0 {
|
} else if self.lsn % (XLOG_BLCKSZ as u64) == 0 {
|
||||||
// parse page header
|
// parse page header
|
||||||
|
|
||||||
if self.inputbuf.remaining() < SizeOfXLogShortPHD {
|
if self.inputbuf.remaining() < SizeOfXLogShortPHD {
|
||||||
return None;
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.decode_XLogPageHeaderData();
|
let hdr = self.decode_XLogPageHeaderData();
|
||||||
|
if hdr.xlp_pageaddr != self.lsn {
|
||||||
|
return Err(WalDecodeError {
|
||||||
|
msg: "invalid xlog page header".into(),
|
||||||
|
lsn: self.lsn,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// TODO: verify the remaining fields in the header
|
||||||
|
|
||||||
self.lsn += SizeOfXLogShortPHD as u64;
|
self.lsn += SizeOfXLogShortPHD as u64;
|
||||||
|
|
||||||
// TODO: verify the fields in the header
|
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
} else if self.padlen > 0 {
|
} else if self.padlen > 0 {
|
||||||
if self.inputbuf.remaining() < self.padlen as usize {
|
if self.inputbuf.remaining() < self.padlen as usize {
|
||||||
return None;
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
// skip padding
|
// skip padding
|
||||||
@@ -122,20 +140,17 @@ impl WalStreamDecoder {
|
|||||||
// need to have at least the xl_tot_len field
|
// need to have at least the xl_tot_len field
|
||||||
|
|
||||||
if self.inputbuf.remaining() < 4 {
|
if self.inputbuf.remaining() < 4 {
|
||||||
return None;
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
// read xl_tot_len FIXME: assumes little-endian
|
// read xl_tot_len FIXME: assumes little-endian
|
||||||
self.startlsn = self.lsn;
|
self.startlsn = self.lsn;
|
||||||
let xl_tot_len = self.inputbuf.get_u32_le();
|
let xl_tot_len = self.inputbuf.get_u32_le();
|
||||||
if xl_tot_len < SizeOfXLogRecord {
|
if xl_tot_len < SizeOfXLogRecord {
|
||||||
error!(
|
return Err(WalDecodeError {
|
||||||
"invalid xl_tot_len {} at {:X}/{:X}",
|
msg: format!("invalid xl_tot_len {}", xl_tot_len),
|
||||||
xl_tot_len,
|
lsn: self.lsn,
|
||||||
self.lsn >> 32,
|
});
|
||||||
self.lsn & 0xffffffff
|
|
||||||
);
|
|
||||||
panic!();
|
|
||||||
}
|
}
|
||||||
self.lsn += 4;
|
self.lsn += 4;
|
||||||
|
|
||||||
@@ -153,7 +168,7 @@ impl WalStreamDecoder {
|
|||||||
let n = min(self.contlen, pageleft) as usize;
|
let n = min(self.contlen, pageleft) as usize;
|
||||||
|
|
||||||
if self.inputbuf.remaining() < n {
|
if self.inputbuf.remaining() < n {
|
||||||
return None;
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.recordbuf.put(self.inputbuf.split_to(n));
|
self.recordbuf.put(self.inputbuf.split_to(n));
|
||||||
@@ -181,7 +196,7 @@ impl WalStreamDecoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let result = (self.lsn, recordbuf);
|
let result = (self.lsn, recordbuf);
|
||||||
return Some(result);
|
return Ok(Some(result));
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -234,6 +249,7 @@ const BLCKSZ: u16 = 8192;
|
|||||||
//
|
//
|
||||||
// Constants from xlogrecord.h
|
// Constants from xlogrecord.h
|
||||||
//
|
//
|
||||||
|
|
||||||
const XLR_MAX_BLOCK_ID: u8 = 32;
|
const XLR_MAX_BLOCK_ID: u8 = 32;
|
||||||
|
|
||||||
const XLR_BLOCK_ID_DATA_SHORT: u8 = 255;
|
const XLR_BLOCK_ID_DATA_SHORT: u8 = 255;
|
||||||
@@ -253,6 +269,7 @@ const BKPIMAGE_HAS_HOLE: u8 = 0x01; /* page image has "hole" */
|
|||||||
const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
|
const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
|
||||||
const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
|
const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
pub struct DecodedBkpBlock {
|
pub struct DecodedBkpBlock {
|
||||||
/* Is this block ref in use? */
|
/* Is this block ref in use? */
|
||||||
//in_use: bool,
|
//in_use: bool,
|
||||||
@@ -261,6 +278,7 @@ pub struct DecodedBkpBlock {
|
|||||||
pub rnode_spcnode: u32,
|
pub rnode_spcnode: u32,
|
||||||
pub rnode_dbnode: u32,
|
pub rnode_dbnode: u32,
|
||||||
pub rnode_relnode: u32,
|
pub rnode_relnode: u32,
|
||||||
|
// Note that we have a few special forknum values for non-rel files.
|
||||||
pub forknum: u8,
|
pub forknum: u8,
|
||||||
pub blkno: u32,
|
pub blkno: u32,
|
||||||
|
|
||||||
@@ -279,24 +297,43 @@ pub struct DecodedBkpBlock {
|
|||||||
|
|
||||||
/* Buffer holding the rmgr-specific data associated with this block */
|
/* Buffer holding the rmgr-specific data associated with this block */
|
||||||
has_data: bool,
|
has_data: bool,
|
||||||
//char *data;
|
|
||||||
data_len: u16,
|
data_len: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl DecodedBkpBlock {
|
||||||
|
pub fn new() -> DecodedBkpBlock {
|
||||||
|
DecodedBkpBlock {
|
||||||
|
rnode_spcnode: 0,
|
||||||
|
rnode_dbnode: 0,
|
||||||
|
rnode_relnode: 0,
|
||||||
|
forknum: 0,
|
||||||
|
blkno: 0,
|
||||||
|
|
||||||
|
flags: 0,
|
||||||
|
has_image: false,
|
||||||
|
apply_image: false,
|
||||||
|
will_init: false,
|
||||||
|
hole_offset: 0,
|
||||||
|
hole_length: 0,
|
||||||
|
bimg_len: 0,
|
||||||
|
bimg_info: 0,
|
||||||
|
|
||||||
|
has_data: false,
|
||||||
|
data_len: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(non_upper_case_globals)]
|
#[allow(non_upper_case_globals)]
|
||||||
const SizeOfXLogRecord: u32 = 24;
|
const SizeOfXLogRecord: u32 = 24;
|
||||||
|
|
||||||
pub struct DecodedWALRecord {
|
pub struct DecodedWALRecord {
|
||||||
pub lsn: u64, // LSN at the *end* of the record
|
|
||||||
pub record: Bytes, // raw XLogRecord
|
pub record: Bytes, // raw XLogRecord
|
||||||
|
|
||||||
pub blocks: Vec<DecodedBkpBlock>,
|
pub blocks: Vec<DecodedBkpBlock>,
|
||||||
|
pub main_data_offset: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
// From pg_control.h and rmgrlist.h
|
|
||||||
const XLOG_SWITCH: u8 = 0x40;
|
|
||||||
const RM_XLOG_ID: u8 = 0;
|
|
||||||
|
|
||||||
// Is this record an XLOG_SWITCH record? They need some special processing,
|
// Is this record an XLOG_SWITCH record? They need some special processing,
|
||||||
// so we need to check for that before the rest of the parsing.
|
// so we need to check for that before the rest of the parsing.
|
||||||
//
|
//
|
||||||
@@ -313,62 +350,88 @@ fn is_xlog_switch_record(rec: &Bytes) -> bool {
|
|||||||
buf.advance(2); // 2 bytes of padding
|
buf.advance(2); // 2 bytes of padding
|
||||||
let _xl_crc = buf.get_u32_le();
|
let _xl_crc = buf.get_u32_le();
|
||||||
|
|
||||||
return xl_info == XLOG_SWITCH && xl_rmid == RM_XLOG_ID;
|
return xl_info == pg_constants::XLOG_SWITCH && xl_rmid == pg_constants::RM_XLOG_ID;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct RelFileNode {
|
||||||
|
pub spcnode: u32,
|
||||||
|
pub dbnode: u32,
|
||||||
|
pub relnode: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Routines to decode a WAL record and figure out which blocks are modified
|
// Routines to decode a WAL record and figure out which blocks are modified
|
||||||
//
|
//
|
||||||
pub fn decode_wal_record(lsn: u64, rec: Bytes) -> DecodedWALRecord {
|
// See xlogrecord.h for details
|
||||||
trace!(
|
// The overall layout of an XLOG record is:
|
||||||
"decoding record with LSN {:08X}/{:08X} ({} bytes)",
|
// Fixed-size header (XLogRecord struct)
|
||||||
lsn >> 32,
|
// XLogRecordBlockHeader struct
|
||||||
lsn & 0xffff_ffff,
|
// If BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows
|
||||||
rec.remaining()
|
// If BKPIMAGE_HAS_HOLE and BKPIMAGE_IS_COMPRESSED, an
|
||||||
);
|
// XLogRecordBlockCompressHeader struct follows.
|
||||||
|
// If BKPBLOCK_SAME_REL is not set, a RelFileNode follows
|
||||||
|
// BlockNumber follows
|
||||||
|
// XLogRecordBlockHeader struct
|
||||||
|
// ...
|
||||||
|
// XLogRecordDataHeader[Short|Long] struct
|
||||||
|
// block data
|
||||||
|
// block data
|
||||||
|
// ...
|
||||||
|
// main data
|
||||||
|
pub fn decode_wal_record(rec: Bytes) -> DecodedWALRecord {
|
||||||
|
let mut rnode_spcnode: u32 = 0;
|
||||||
|
let mut rnode_dbnode: u32 = 0;
|
||||||
|
let mut rnode_relnode: u32 = 0;
|
||||||
|
let mut got_rnode = false;
|
||||||
|
|
||||||
let mut buf = rec.clone();
|
let mut buf = rec.clone();
|
||||||
|
|
||||||
|
// 1. Parse XLogRecord struct
|
||||||
|
|
||||||
// FIXME: assume little-endian here
|
// FIXME: assume little-endian here
|
||||||
let xl_tot_len = buf.get_u32_le();
|
let xl_tot_len = buf.get_u32_le();
|
||||||
let _xl_xid = buf.get_u32_le();
|
let xl_xid = buf.get_u32_le();
|
||||||
let _xl_prev = buf.get_u64_le();
|
let xl_prev = buf.get_u64_le();
|
||||||
let _xl_info = buf.get_u8();
|
let xl_info = buf.get_u8();
|
||||||
let _xl_rmid = buf.get_u8();
|
let xl_rmid = buf.get_u8();
|
||||||
buf.advance(2); // 2 bytes of padding
|
buf.advance(2); // 2 bytes of padding
|
||||||
let _xl_crc = buf.get_u32_le();
|
let _xl_crc = buf.get_u32_le();
|
||||||
|
|
||||||
|
trace!(
|
||||||
|
"decode_wal_record xl_rmid = {} xl_info = {}",
|
||||||
|
xl_rmid,
|
||||||
|
xl_info
|
||||||
|
);
|
||||||
|
|
||||||
let remaining = xl_tot_len - SizeOfXLogRecord;
|
let remaining = xl_tot_len - SizeOfXLogRecord;
|
||||||
|
|
||||||
if buf.remaining() != remaining as usize {
|
if buf.remaining() != remaining as usize {
|
||||||
//TODO error
|
//TODO error
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut rnode_spcnode: u32 = 0;
|
|
||||||
let mut rnode_dbnode: u32 = 0;
|
|
||||||
let mut rnode_relnode: u32 = 0;
|
|
||||||
let mut got_rnode = false;
|
|
||||||
|
|
||||||
// Decode the headers
|
|
||||||
|
|
||||||
let mut max_block_id = 0;
|
let mut max_block_id = 0;
|
||||||
|
let mut blocks_total_len: u32 = 0;
|
||||||
|
let mut main_data_len = 0;
|
||||||
let mut datatotal: u32 = 0;
|
let mut datatotal: u32 = 0;
|
||||||
let mut blocks: Vec<DecodedBkpBlock> = Vec::new();
|
let mut blocks: Vec<DecodedBkpBlock> = Vec::new();
|
||||||
|
|
||||||
|
// 2. Decode the headers.
|
||||||
|
// XLogRecordBlockHeaders if any,
|
||||||
|
// XLogRecordDataHeader[Short|Long]
|
||||||
while buf.remaining() > datatotal as usize {
|
while buf.remaining() > datatotal as usize {
|
||||||
let block_id = buf.get_u8();
|
let block_id = buf.get_u8();
|
||||||
|
|
||||||
match block_id {
|
match block_id {
|
||||||
XLR_BLOCK_ID_DATA_SHORT => {
|
XLR_BLOCK_ID_DATA_SHORT => {
|
||||||
/* XLogRecordDataHeaderShort */
|
/* XLogRecordDataHeaderShort */
|
||||||
let main_data_len = buf.get_u8() as u32;
|
main_data_len = buf.get_u8() as u32;
|
||||||
|
|
||||||
datatotal += main_data_len;
|
datatotal += main_data_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
XLR_BLOCK_ID_DATA_LONG => {
|
XLR_BLOCK_ID_DATA_LONG => {
|
||||||
/* XLogRecordDataHeaderShort */
|
/* XLogRecordDataHeaderLong */
|
||||||
let main_data_len = buf.get_u32();
|
main_data_len = buf.get_u32_le();
|
||||||
|
|
||||||
datatotal += main_data_len;
|
datatotal += main_data_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -384,25 +447,7 @@ pub fn decode_wal_record(lsn: u64, rec: Bytes) -> DecodedWALRecord {
|
|||||||
|
|
||||||
0..=XLR_MAX_BLOCK_ID => {
|
0..=XLR_MAX_BLOCK_ID => {
|
||||||
/* XLogRecordBlockHeader */
|
/* XLogRecordBlockHeader */
|
||||||
let mut blk = DecodedBkpBlock {
|
let mut blk = DecodedBkpBlock::new();
|
||||||
rnode_spcnode: 0,
|
|
||||||
rnode_dbnode: 0,
|
|
||||||
rnode_relnode: 0,
|
|
||||||
forknum: 0,
|
|
||||||
blkno: 0,
|
|
||||||
|
|
||||||
flags: 0,
|
|
||||||
has_image: false,
|
|
||||||
apply_image: false,
|
|
||||||
will_init: false,
|
|
||||||
hole_offset: 0,
|
|
||||||
hole_length: 0,
|
|
||||||
bimg_len: 0,
|
|
||||||
bimg_info: 0,
|
|
||||||
|
|
||||||
has_data: false,
|
|
||||||
data_len: 0,
|
|
||||||
};
|
|
||||||
let fork_flags: u8;
|
let fork_flags: u8;
|
||||||
|
|
||||||
if block_id <= max_block_id {
|
if block_id <= max_block_id {
|
||||||
@@ -422,28 +467,12 @@ pub fn decode_wal_record(lsn: u64, rec: Bytes) -> DecodedWALRecord {
|
|||||||
blk.has_image = (fork_flags & BKPBLOCK_HAS_IMAGE) != 0;
|
blk.has_image = (fork_flags & BKPBLOCK_HAS_IMAGE) != 0;
|
||||||
blk.has_data = (fork_flags & BKPBLOCK_HAS_DATA) != 0;
|
blk.has_data = (fork_flags & BKPBLOCK_HAS_DATA) != 0;
|
||||||
blk.will_init = (fork_flags & BKPBLOCK_WILL_INIT) != 0;
|
blk.will_init = (fork_flags & BKPBLOCK_WILL_INIT) != 0;
|
||||||
|
|
||||||
blk.data_len = buf.get_u16_le();
|
blk.data_len = buf.get_u16_le();
|
||||||
/* cross-check that the HAS_DATA flag is set iff data_length > 0 */
|
|
||||||
// TODO
|
/* TODO cross-check that the HAS_DATA flag is set iff data_length > 0 */
|
||||||
/*
|
|
||||||
if (blk->has_data && blk->data_len == 0)
|
|
||||||
{
|
|
||||||
report_invalid_record(state,
|
|
||||||
"BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
|
|
||||||
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
if (!blk->has_data && blk->data_len != 0)
|
|
||||||
{
|
|
||||||
report_invalid_record(state,
|
|
||||||
"BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
|
|
||||||
(unsigned int) blk->data_len,
|
|
||||||
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
datatotal += blk.data_len as u32;
|
datatotal += blk.data_len as u32;
|
||||||
|
blocks_total_len += blk.data_len as u32;
|
||||||
|
|
||||||
if blk.has_image {
|
if blk.has_image {
|
||||||
blk.bimg_len = buf.get_u16_le();
|
blk.bimg_len = buf.get_u16_le();
|
||||||
@@ -462,6 +491,7 @@ pub fn decode_wal_record(lsn: u64, rec: Bytes) -> DecodedWALRecord {
|
|||||||
blk.hole_length = BLCKSZ - blk.bimg_len;
|
blk.hole_length = BLCKSZ - blk.bimg_len;
|
||||||
}
|
}
|
||||||
datatotal += blk.bimg_len as u32;
|
datatotal += blk.bimg_len as u32;
|
||||||
|
blocks_total_len += blk.bimg_len as u32;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* cross-check that hole_offset > 0, hole_length > 0 and
|
* cross-check that hole_offset > 0, hole_length > 0 and
|
||||||
@@ -537,28 +567,28 @@ pub fn decode_wal_record(lsn: u64, rec: Bytes) -> DecodedWALRecord {
|
|||||||
rnode_spcnode = buf.get_u32_le();
|
rnode_spcnode = buf.get_u32_le();
|
||||||
rnode_dbnode = buf.get_u32_le();
|
rnode_dbnode = buf.get_u32_le();
|
||||||
rnode_relnode = buf.get_u32_le();
|
rnode_relnode = buf.get_u32_le();
|
||||||
//rnode = &blk->rnode;
|
|
||||||
got_rnode = true;
|
got_rnode = true;
|
||||||
} else {
|
} else if !got_rnode {
|
||||||
if !got_rnode {
|
// TODO
|
||||||
// TODO
|
/*
|
||||||
/*
|
report_invalid_record(state,
|
||||||
report_invalid_record(state,
|
"BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
|
||||||
"BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
|
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
|
||||||
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
|
goto err; */
|
||||||
goto err;
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
//blk->rnode = *rnode;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
blk.rnode_spcnode = rnode_spcnode;
|
blk.rnode_spcnode = rnode_spcnode;
|
||||||
blk.rnode_dbnode = rnode_dbnode;
|
blk.rnode_dbnode = rnode_dbnode;
|
||||||
blk.rnode_relnode = rnode_relnode;
|
blk.rnode_relnode = rnode_relnode;
|
||||||
|
|
||||||
blk.blkno = buf.get_u32_le();
|
blk.blkno = buf.get_u32_le();
|
||||||
|
trace!(
|
||||||
//println!("this record affects {}/{}/{} blk {}",rnode_spcnode, rnode_dbnode, rnode_relnode, blk.blkno);
|
"this record affects {}/{}/{} blk {}",
|
||||||
|
rnode_spcnode,
|
||||||
|
rnode_dbnode,
|
||||||
|
rnode_relnode,
|
||||||
|
blk.blkno
|
||||||
|
);
|
||||||
|
|
||||||
blocks.push(blk);
|
blocks.push(blk);
|
||||||
}
|
}
|
||||||
@@ -569,21 +599,58 @@ pub fn decode_wal_record(lsn: u64, rec: Bytes) -> DecodedWALRecord {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
// 3. Decode blocks.
|
||||||
* Ok, we've parsed the fragment headers, and verified that the total
|
// We don't need them, so just skip blocks_total_len bytes
|
||||||
* length of the payload in the fragments is equal to the amount of data
|
buf.advance(blocks_total_len as usize);
|
||||||
* left. Copy the data of each fragment to a separate buffer.
|
|
||||||
*
|
|
||||||
* We could just set up pointers into readRecordBuf, but we want to align
|
|
||||||
* the data for the convenience of the callers. Backup images are not
|
|
||||||
* copied, however; they don't need alignment.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Since we don't care about the data payloads here, we're done.
|
let main_data_offset = (xl_tot_len - main_data_len) as usize;
|
||||||
|
|
||||||
return DecodedWALRecord {
|
// 4. Decode main_data
|
||||||
lsn: lsn,
|
if main_data_len > 0 {
|
||||||
|
assert_eq!(buf.remaining(), main_data_len as usize);
|
||||||
|
}
|
||||||
|
|
||||||
|
//5. Handle special CLOG and XACT records
|
||||||
|
if xl_rmid == pg_constants::RM_CLOG_ID {
|
||||||
|
let mut blk = DecodedBkpBlock::new();
|
||||||
|
blk.forknum = pg_constants::PG_XACT_FORKNUM as u8;
|
||||||
|
blk.blkno = buf.get_i32_le() as u32;
|
||||||
|
trace!("RM_CLOG_ID updates block {}", blk.blkno);
|
||||||
|
blocks.push(blk);
|
||||||
|
} else if xl_rmid == pg_constants::RM_XACT_ID {
|
||||||
|
let info = xl_info & pg_constants::XLOG_XACT_OPMASK;
|
||||||
|
if info == pg_constants::XLOG_XACT_COMMIT {
|
||||||
|
let mut blk = DecodedBkpBlock::new();
|
||||||
|
blk.forknum = pg_constants::PG_XACT_FORKNUM as u8;
|
||||||
|
blk.blkno = xl_xid / pg_constants::CLOG_XACTS_PER_PAGE;
|
||||||
|
trace!(
|
||||||
|
"XLOG_XACT_COMMIT xl_prev {:X}/{:X} xid {} updates block {}",
|
||||||
|
(xl_prev >> 32),
|
||||||
|
xl_prev & 0xffffffff,
|
||||||
|
xl_xid,
|
||||||
|
blk.blkno
|
||||||
|
);
|
||||||
|
blocks.push(blk);
|
||||||
|
//TODO parse commit record to extract subtrans entries
|
||||||
|
} else if info == pg_constants::XLOG_XACT_ABORT {
|
||||||
|
let mut blk = DecodedBkpBlock::new();
|
||||||
|
blk.forknum = pg_constants::PG_XACT_FORKNUM as u8;
|
||||||
|
blk.blkno = xl_xid / pg_constants::CLOG_XACTS_PER_PAGE;
|
||||||
|
trace!(
|
||||||
|
"XLOG_XACT_ABORT xl_prev {:X}/{:X} xid {} updates block {}",
|
||||||
|
(xl_prev >> 32),
|
||||||
|
xl_prev & 0xffffffff,
|
||||||
|
xl_xid,
|
||||||
|
blk.blkno
|
||||||
|
);
|
||||||
|
blocks.push(blk);
|
||||||
|
//TODO parse abort record to extract subtrans entries
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DecodedWALRecord {
|
||||||
record: rec,
|
record: rec,
|
||||||
blocks: blocks,
|
blocks,
|
||||||
};
|
main_data_offset: main_data_offset,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,29 +1,96 @@
|
|||||||
//
|
//!
|
||||||
// WAL receiver
|
//! WAL receiver
|
||||||
//
|
//!
|
||||||
// The WAL receiver connects to the WAL safekeeper service, and streams WAL.
|
//! The WAL receiver connects to the WAL safekeeper service, and streams WAL.
|
||||||
// For each WAL record, it decodes the record to figure out which data blocks
|
//! For each WAL record, it decodes the record to figure out which data blocks
|
||||||
// the record affects, and adds the records to the page cache.
|
//! the record affects, and adds the records to the page cache.
|
||||||
//
|
//!
|
||||||
use log::*;
|
|
||||||
|
|
||||||
use tokio::runtime;
|
|
||||||
use tokio::time::{sleep, Duration};
|
|
||||||
use tokio_stream::StreamExt;
|
|
||||||
|
|
||||||
use crate::page_cache;
|
use crate::page_cache;
|
||||||
use crate::page_cache::BufferTag;
|
use crate::page_cache::BufferTag;
|
||||||
use crate::waldecoder::WalStreamDecoder;
|
use crate::waldecoder::{decode_wal_record, WalStreamDecoder};
|
||||||
use crate::PageServerConf;
|
use crate::PageServerConf;
|
||||||
|
use crate::ZTimelineId;
|
||||||
|
use anyhow::Error;
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use log::*;
|
||||||
use postgres_protocol::message::backend::ReplicationMessage;
|
use postgres_protocol::message::backend::ReplicationMessage;
|
||||||
use tokio_postgres::{connect_replication, Error, NoTls, ReplicationMode};
|
use postgres_types::PgLsn;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::fs;
|
||||||
|
use std::fs::{File, OpenOptions};
|
||||||
|
use std::io::{Seek, SeekFrom, Write};
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use std::sync::Mutex;
|
||||||
|
use std::thread;
|
||||||
|
use tokio::runtime;
|
||||||
|
use tokio::time::{sleep, Duration};
|
||||||
|
use tokio_postgres::replication::{PgTimestamp, ReplicationStream};
|
||||||
|
use tokio_postgres::{NoTls, SimpleQueryMessage, SimpleQueryRow};
|
||||||
|
use tokio_stream::StreamExt;
|
||||||
|
|
||||||
|
//
|
||||||
|
// We keep one WAL Receiver active per timeline.
|
||||||
|
//
|
||||||
|
struct WalReceiverEntry {
|
||||||
|
wal_producer_connstr: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref WAL_RECEIVERS: Mutex<HashMap<ZTimelineId, WalReceiverEntry>> =
|
||||||
|
Mutex::new(HashMap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Launch a new WAL receiver, or tell one that's running about change in connection string
|
||||||
|
pub fn launch_wal_receiver(
|
||||||
|
conf: &PageServerConf,
|
||||||
|
timelineid: ZTimelineId,
|
||||||
|
wal_producer_connstr: &str,
|
||||||
|
) {
|
||||||
|
let mut receivers = WAL_RECEIVERS.lock().unwrap();
|
||||||
|
|
||||||
|
match receivers.get_mut(&timelineid) {
|
||||||
|
Some(receiver) => {
|
||||||
|
receiver.wal_producer_connstr = wal_producer_connstr.into();
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let receiver = WalReceiverEntry {
|
||||||
|
wal_producer_connstr: wal_producer_connstr.into(),
|
||||||
|
};
|
||||||
|
receivers.insert(timelineid, receiver);
|
||||||
|
|
||||||
|
// Also launch a new thread to handle this connection
|
||||||
|
let conf_copy = conf.clone();
|
||||||
|
let _walreceiver_thread = thread::Builder::new()
|
||||||
|
.name("WAL receiver thread".into())
|
||||||
|
.spawn(move || {
|
||||||
|
thread_main(&conf_copy, timelineid);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Look up current WAL producer connection string in the hash table
|
||||||
|
fn get_wal_producer_connstr(timelineid: ZTimelineId) -> String {
|
||||||
|
let receivers = WAL_RECEIVERS.lock().unwrap();
|
||||||
|
|
||||||
|
receivers
|
||||||
|
.get(&timelineid)
|
||||||
|
.unwrap()
|
||||||
|
.wal_producer_connstr
|
||||||
|
.clone()
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// This is the entry point for the WAL receiver thread.
|
// This is the entry point for the WAL receiver thread.
|
||||||
//
|
//
|
||||||
pub fn thread_main(conf: PageServerConf, wal_producer_connstr: &String) {
|
fn thread_main(conf: &PageServerConf, timelineid: ZTimelineId) {
|
||||||
info!("WAL receiver thread started: '{}'", wal_producer_connstr);
|
info!(
|
||||||
|
"WAL receiver thread started for timeline : '{}'",
|
||||||
|
timelineid
|
||||||
|
);
|
||||||
|
|
||||||
let runtime = runtime::Builder::new_current_thread()
|
let runtime = runtime::Builder::new_current_thread()
|
||||||
.enable_all()
|
.enable_all()
|
||||||
@@ -32,31 +99,32 @@ pub fn thread_main(conf: PageServerConf, wal_producer_connstr: &String) {
|
|||||||
|
|
||||||
runtime.block_on(async {
|
runtime.block_on(async {
|
||||||
loop {
|
loop {
|
||||||
let _res = walreceiver_main(conf.clone(), wal_producer_connstr).await;
|
// Look up the current WAL producer address
|
||||||
|
let wal_producer_connstr = get_wal_producer_connstr(timelineid);
|
||||||
|
|
||||||
// TODO: print/log the error
|
let res = walreceiver_main(conf, timelineid, &wal_producer_connstr).await;
|
||||||
info!(
|
|
||||||
"WAL streaming connection failed, retrying in 1 second...: {:?}",
|
if let Err(e) = res {
|
||||||
_res
|
info!(
|
||||||
);
|
"WAL streaming connection failed ({}), retrying in 1 second",
|
||||||
sleep(Duration::from_secs(1)).await;
|
e
|
||||||
|
);
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn walreceiver_main(
|
async fn walreceiver_main(
|
||||||
conf: PageServerConf,
|
conf: &PageServerConf,
|
||||||
wal_producer_connstr: &String,
|
timelineid: ZTimelineId,
|
||||||
|
wal_producer_connstr: &str,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
// Connect to the database in replication mode.
|
// Connect to the database in replication mode.
|
||||||
debug!("connecting to {}...", wal_producer_connstr);
|
info!("connecting to {:?}", wal_producer_connstr);
|
||||||
let (mut rclient, connection) = connect_replication(
|
let connect_cfg = format!("{} replication=true", wal_producer_connstr);
|
||||||
wal_producer_connstr.as_str(),
|
let (rclient, connection) = tokio_postgres::connect(&connect_cfg, NoTls).await?;
|
||||||
NoTls,
|
info!("connected!");
|
||||||
ReplicationMode::Physical,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
debug!("connected!");
|
|
||||||
|
|
||||||
// The connection object performs the actual communication with the database,
|
// The connection object performs the actual communication with the database,
|
||||||
// so spawn it off to run on its own.
|
// so spawn it off to run on its own.
|
||||||
@@ -66,28 +134,29 @@ async fn walreceiver_main(
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
let identify_system = rclient.identify_system().await?;
|
let identify = identify_system(&rclient).await?;
|
||||||
let end_of_wal = u64::from(identify_system.xlogpos());
|
info!("{:?}", identify);
|
||||||
|
let end_of_wal = u64::from(identify.xlogpos);
|
||||||
let mut caught_up = false;
|
let mut caught_up = false;
|
||||||
|
|
||||||
let sysid: u64 = identify_system.systemid().parse().unwrap();
|
let pcache = page_cache::get_pagecache(&conf, timelineid).unwrap();
|
||||||
let pcache = page_cache::get_pagecache(conf, sysid);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Start streaming the WAL, from where we left off previously.
|
// Start streaming the WAL, from where we left off previously.
|
||||||
//
|
//
|
||||||
let mut startpoint = pcache.get_last_valid_lsn();
|
let mut startpoint = pcache.get_last_valid_lsn();
|
||||||
|
let last_valid_lsn = pcache.get_last_valid_lsn();
|
||||||
if startpoint == 0 {
|
if startpoint == 0 {
|
||||||
// If we start here with identify_system.xlogpos() we will have race condition with
|
// If we start here with identify.xlogpos we will have race condition with
|
||||||
// postgres start: insert into postgres may request page that was modified with lsn
|
// postgres start: insert into postgres may request page that was modified with lsn
|
||||||
// smaller than identify_system.xlogpos().
|
// smaller than identify.xlogpos.
|
||||||
//
|
//
|
||||||
// Current procedure for starting postgres will anyway be changed to something
|
// Current procedure for starting postgres will anyway be changed to something
|
||||||
// different like having 'initdb' method on a pageserver (or importing some shared
|
// different like having 'initdb' method on a pageserver (or importing some shared
|
||||||
// empty database snapshot), so for now I just put start of first segment which
|
// empty database snapshot), so for now I just put start of first segment which
|
||||||
// seems to be a valid record.
|
// seems to be a valid record.
|
||||||
pcache.init_valid_lsn(0x_1_000_000_u64);
|
pcache.init_valid_lsn(0x_1_000_000_u64);
|
||||||
startpoint = u64::from(0x_1_000_000_u64);
|
startpoint = 0x_1_000_000_u64;
|
||||||
} else {
|
} else {
|
||||||
// There might be some padding after the last full record, skip it.
|
// There might be some padding after the last full record, skip it.
|
||||||
//
|
//
|
||||||
@@ -99,16 +168,23 @@ async fn walreceiver_main(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
debug!(
|
debug!(
|
||||||
"starting replication from {:X}/{:X}, server is at {:X}/{:X}...",
|
"last_valid_lsn {:X}/{:X} starting replication from {:X}/{:X} for timeline {}, server is at {:X}/{:X}...",
|
||||||
|
(last_valid_lsn >> 32),
|
||||||
|
(last_valid_lsn & 0xffffffff),
|
||||||
(startpoint >> 32),
|
(startpoint >> 32),
|
||||||
(startpoint & 0xffffffff),
|
(startpoint & 0xffffffff),
|
||||||
|
timelineid,
|
||||||
(end_of_wal >> 32),
|
(end_of_wal >> 32),
|
||||||
(end_of_wal & 0xffffffff)
|
(end_of_wal & 0xffffffff)
|
||||||
);
|
);
|
||||||
let startpoint = tokio_postgres::types::Lsn::from(startpoint);
|
|
||||||
let mut physical_stream = rclient
|
let startpoint = PgLsn::from(startpoint);
|
||||||
.start_physical_replication(None, startpoint, None)
|
let query = format!("START_REPLICATION PHYSICAL {}", startpoint);
|
||||||
.await?;
|
let copy_stream = rclient.copy_both_simple::<bytes::Bytes>(&query).await?;
|
||||||
|
|
||||||
|
let physical_stream = ReplicationStream::new(copy_stream);
|
||||||
|
tokio::pin!(physical_stream);
|
||||||
|
|
||||||
let mut waldecoder = WalStreamDecoder::new(u64::from(startpoint));
|
let mut waldecoder = WalStreamDecoder::new(u64::from(startpoint));
|
||||||
|
|
||||||
while let Some(replication_message) = physical_stream.next().await {
|
while let Some(replication_message) = physical_stream.next().await {
|
||||||
@@ -120,6 +196,13 @@ async fn walreceiver_main(
|
|||||||
let startlsn = xlog_data.wal_start();
|
let startlsn = xlog_data.wal_start();
|
||||||
let endlsn = startlsn + data.len() as u64;
|
let endlsn = startlsn + data.len() as u64;
|
||||||
|
|
||||||
|
write_wal_file(
|
||||||
|
startlsn,
|
||||||
|
timelineid,
|
||||||
|
16 * 1024 * 1024, // FIXME
|
||||||
|
data,
|
||||||
|
)?;
|
||||||
|
|
||||||
trace!(
|
trace!(
|
||||||
"received XLogData between {:X}/{:X} and {:X}/{:X}",
|
"received XLogData between {:X}/{:X} and {:X}/{:X}",
|
||||||
(startlsn >> 32),
|
(startlsn >> 32),
|
||||||
@@ -131,10 +214,8 @@ async fn walreceiver_main(
|
|||||||
waldecoder.feed_bytes(data);
|
waldecoder.feed_bytes(data);
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
if let Some((lsn, recdata)) = waldecoder.poll_decode() {
|
if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
|
||||||
let decoded =
|
let decoded = decode_wal_record(recdata.clone());
|
||||||
crate::waldecoder::decode_wal_record(startlsn, recdata.clone());
|
|
||||||
|
|
||||||
// Put the WAL record to the page cache. We make a separate copy of
|
// Put the WAL record to the page cache. We make a separate copy of
|
||||||
// it for every block it modifies. (The actual WAL record is kept in
|
// it for every block it modifies. (The actual WAL record is kept in
|
||||||
// a Bytes, which uses a reference counter for the underlying buffer,
|
// a Bytes, which uses a reference counter for the underlying buffer,
|
||||||
@@ -149,17 +230,17 @@ async fn walreceiver_main(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let rec = page_cache::WALRecord {
|
let rec = page_cache::WALRecord {
|
||||||
lsn: lsn,
|
lsn,
|
||||||
will_init: blk.will_init || blk.apply_image,
|
will_init: blk.will_init || blk.apply_image,
|
||||||
rec: recdata.clone(),
|
rec: recdata.clone(),
|
||||||
|
main_data_offset: decoded.main_data_offset,
|
||||||
};
|
};
|
||||||
|
|
||||||
pcache.put_wal_record(tag, rec);
|
pcache.put_wal_record(tag, rec);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now that this record has been handled, let the page cache know that
|
// Now that this record has been handled, let the page cache know that
|
||||||
// it is up-to-date to this LSN
|
// it is up-to-date to this LSN
|
||||||
pcache.advance_last_valid_lsn(lsn);
|
pcache.advance_last_record_lsn(lsn);
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -183,12 +264,230 @@ async fn walreceiver_main(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ReplicationMessage::PrimaryKeepAlive(_keepalive) => {
|
ReplicationMessage::PrimaryKeepAlive(keepalive) => {
|
||||||
trace!("received PrimaryKeepAlive");
|
let wal_end = keepalive.wal_end();
|
||||||
// FIXME: Reply, or the connection will time out
|
let timestamp = keepalive.timestamp();
|
||||||
|
let reply_requested: bool = keepalive.reply() != 0;
|
||||||
|
|
||||||
|
trace!(
|
||||||
|
"received PrimaryKeepAlive(wal_end: {}, timestamp: {} reply: {})",
|
||||||
|
wal_end,
|
||||||
|
timestamp,
|
||||||
|
reply_requested,
|
||||||
|
);
|
||||||
|
if reply_requested {
|
||||||
|
// TODO: More thought should go into what values are sent here.
|
||||||
|
let last_lsn = PgLsn::from(pcache.get_last_valid_lsn());
|
||||||
|
let write_lsn = last_lsn;
|
||||||
|
let flush_lsn = last_lsn;
|
||||||
|
let apply_lsn = PgLsn::INVALID;
|
||||||
|
let ts = PgTimestamp::now()?;
|
||||||
|
const NO_REPLY: u8 = 0u8;
|
||||||
|
|
||||||
|
physical_stream
|
||||||
|
.as_mut()
|
||||||
|
.standby_status_update(write_lsn, flush_lsn, apply_lsn, ts, NO_REPLY)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Data returned from the postgres `IDENTIFY_SYSTEM` command
|
||||||
|
///
|
||||||
|
/// See the [postgres docs] for more details.
|
||||||
|
///
|
||||||
|
/// [postgres docs]: https://www.postgresql.org/docs/current/protocol-replication.html
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct IdentifySystem {
|
||||||
|
systemid: u64,
|
||||||
|
timeline: u32,
|
||||||
|
xlogpos: PgLsn,
|
||||||
|
dbname: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// There was a problem parsing the response to
|
||||||
|
/// a postgres IDENTIFY_SYSTEM command.
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
#[error("IDENTIFY_SYSTEM parse error")]
|
||||||
|
pub struct IdentifyError;
|
||||||
|
|
||||||
|
/// Run the postgres `IDENTIFY_SYSTEM` command
|
||||||
|
pub async fn identify_system(client: &tokio_postgres::Client) -> Result<IdentifySystem, Error> {
|
||||||
|
let query_str = "IDENTIFY_SYSTEM";
|
||||||
|
let response = client.simple_query(query_str).await?;
|
||||||
|
|
||||||
|
// get(N) from row, then parse it as some destination type.
|
||||||
|
fn get_parse<T>(row: &SimpleQueryRow, idx: usize) -> Result<T, IdentifyError>
|
||||||
|
where
|
||||||
|
T: FromStr,
|
||||||
|
{
|
||||||
|
let val = row.get(idx).ok_or(IdentifyError)?;
|
||||||
|
val.parse::<T>().or(Err(IdentifyError))
|
||||||
|
}
|
||||||
|
|
||||||
|
// extract the row contents into an IdentifySystem struct.
|
||||||
|
// written as a closure so I can use ? for Option here.
|
||||||
|
if let Some(SimpleQueryMessage::Row(first_row)) = response.get(0) {
|
||||||
|
Ok(IdentifySystem {
|
||||||
|
systemid: get_parse(first_row, 0)?,
|
||||||
|
timeline: get_parse(first_row, 1)?,
|
||||||
|
xlogpos: get_parse(first_row, 2)?,
|
||||||
|
dbname: get_parse(first_row, 3).ok(),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Err(IdentifyError)?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const XLOG_FNAME_LEN: usize = 24;
|
||||||
|
pub const XLOG_BLCKSZ: usize = 8192;
|
||||||
|
pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
|
||||||
|
pub const XLOG_PAGE_MAGIC: u16 = 0xD109;
|
||||||
|
pub const XLP_REM_LEN_OFFS: usize = 2 + 2 + 4 + 8;
|
||||||
|
pub const XLOG_SIZE_OF_XLOG_SHORT_PHD: usize = XLP_REM_LEN_OFFS + 4 + 4;
|
||||||
|
pub const XLOG_SIZE_OF_XLOG_LONG_PHD: usize = XLOG_SIZE_OF_XLOG_SHORT_PHD + 8 + 4 + 4;
|
||||||
|
pub const XLOG_RECORD_CRC_OFFS: usize = 4 + 4 + 8 + 1 + 1 + 2;
|
||||||
|
pub const XLOG_SIZE_OF_XLOG_RECORD: usize = XLOG_RECORD_CRC_OFFS + 4;
|
||||||
|
pub type XLogRecPtr = u64;
|
||||||
|
pub type TimeLineID = u32;
|
||||||
|
pub type TimestampTz = u64;
|
||||||
|
pub type XLogSegNo = u64;
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLogSegmentOffset(xlogptr: XLogRecPtr, wal_segsz_bytes: usize) -> u32 {
|
||||||
|
return (xlogptr as u32) & (wal_segsz_bytes as u32 - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLogSegmentsPerXLogId(wal_segsz_bytes: usize) -> XLogSegNo {
|
||||||
|
return (0x100000000u64 / wal_segsz_bytes as u64) as XLogSegNo;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLByteToSeg(xlogptr: XLogRecPtr, wal_segsz_bytes: usize) -> XLogSegNo {
|
||||||
|
return xlogptr / wal_segsz_bytes as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLogSegNoOffsetToRecPtr(
|
||||||
|
segno: XLogSegNo,
|
||||||
|
offset: u32,
|
||||||
|
wal_segsz_bytes: usize,
|
||||||
|
) -> XLogRecPtr {
|
||||||
|
return segno * (wal_segsz_bytes as u64) + (offset as u64);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
|
||||||
|
return format!(
|
||||||
|
"{:>08X}{:>08X}{:>08X}",
|
||||||
|
tli,
|
||||||
|
logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),
|
||||||
|
logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLineID) {
|
||||||
|
let tli = u32::from_str_radix(&fname[0..8], 16).unwrap();
|
||||||
|
let log = u32::from_str_radix(&fname[8..16], 16).unwrap() as XLogSegNo;
|
||||||
|
let seg = u32::from_str_radix(&fname[16..24], 16).unwrap() as XLogSegNo;
|
||||||
|
return (log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_wal_file(
|
||||||
|
startpos: XLogRecPtr,
|
||||||
|
timeline: ZTimelineId,
|
||||||
|
wal_seg_size: usize,
|
||||||
|
buf: &[u8],
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let mut bytes_left: usize = buf.len();
|
||||||
|
let mut bytes_written: usize = 0;
|
||||||
|
let mut partial;
|
||||||
|
let mut start_pos = startpos;
|
||||||
|
const ZERO_BLOCK: &'static [u8] = &[0u8; XLOG_BLCKSZ];
|
||||||
|
|
||||||
|
let wal_dir = PathBuf::from(format!("timelines/{}/wal", timeline));
|
||||||
|
|
||||||
|
/* Extract WAL location for this block */
|
||||||
|
let mut xlogoff = XLogSegmentOffset(start_pos, wal_seg_size) as usize;
|
||||||
|
|
||||||
|
while bytes_left != 0 {
|
||||||
|
let bytes_to_write;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If crossing a WAL boundary, only write up until we reach wal
|
||||||
|
* segment size.
|
||||||
|
*/
|
||||||
|
if xlogoff + bytes_left > wal_seg_size {
|
||||||
|
bytes_to_write = wal_seg_size - xlogoff;
|
||||||
|
} else {
|
||||||
|
bytes_to_write = bytes_left;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Open file */
|
||||||
|
let segno = XLByteToSeg(start_pos, wal_seg_size);
|
||||||
|
let wal_file_name = XLogFileName(
|
||||||
|
1, // FIXME: always use Postgres timeline 1
|
||||||
|
segno,
|
||||||
|
wal_seg_size,
|
||||||
|
);
|
||||||
|
let wal_file_path = wal_dir.join(wal_file_name.clone());
|
||||||
|
let wal_file_partial_path = wal_dir.join(wal_file_name.clone() + ".partial");
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut wal_file: File;
|
||||||
|
/* Try to open already completed segment */
|
||||||
|
if let Ok(file) = OpenOptions::new().write(true).open(&wal_file_path) {
|
||||||
|
wal_file = file;
|
||||||
|
partial = false;
|
||||||
|
} else if let Ok(file) = OpenOptions::new().write(true).open(&wal_file_partial_path) {
|
||||||
|
/* Try to open existed partial file */
|
||||||
|
wal_file = file;
|
||||||
|
partial = true;
|
||||||
|
} else {
|
||||||
|
/* Create and fill new partial file */
|
||||||
|
partial = true;
|
||||||
|
match OpenOptions::new()
|
||||||
|
.create(true)
|
||||||
|
.write(true)
|
||||||
|
.open(&wal_file_partial_path)
|
||||||
|
{
|
||||||
|
Ok(mut file) => {
|
||||||
|
for _ in 0..(wal_seg_size / XLOG_BLCKSZ) {
|
||||||
|
file.write_all(&ZERO_BLOCK)?;
|
||||||
|
}
|
||||||
|
wal_file = file;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to open log file {:?}: {}", &wal_file_path, e);
|
||||||
|
return Err(e.into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wal_file.seek(SeekFrom::Start(xlogoff as u64))?;
|
||||||
|
wal_file.write_all(&buf[bytes_written..(bytes_written + bytes_to_write)])?;
|
||||||
|
|
||||||
|
// FIXME: Flush the file
|
||||||
|
//wal_file.sync_all()?;
|
||||||
|
}
|
||||||
|
/* Write was successful, advance our position */
|
||||||
|
bytes_written += bytes_to_write;
|
||||||
|
bytes_left -= bytes_to_write;
|
||||||
|
start_pos += bytes_to_write as u64;
|
||||||
|
xlogoff += bytes_to_write;
|
||||||
|
|
||||||
|
/* Did we reach the end of a WAL segment? */
|
||||||
|
if XLogSegmentOffset(start_pos, wal_seg_size) == 0 {
|
||||||
|
xlogoff = 0;
|
||||||
|
if partial {
|
||||||
|
fs::rename(&wal_file_partial_path, &wal_file_path)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|||||||
@@ -19,30 +19,31 @@ use std::assert;
|
|||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::io::Error;
|
use std::io::Error;
|
||||||
|
use std::process::Stdio;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use std::{path::PathBuf, process::Stdio};
|
|
||||||
use tokio::io::AsyncBufReadExt;
|
use tokio::io::AsyncBufReadExt;
|
||||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||||
use tokio::process::{Child, ChildStdin, ChildStdout, Command};
|
use tokio::process::{Child, ChildStdin, ChildStdout, Command};
|
||||||
use tokio::runtime::Runtime;
|
use tokio::runtime::Runtime;
|
||||||
use tokio::time::timeout;
|
use tokio::time::timeout;
|
||||||
|
|
||||||
use bytes::{BufMut, Bytes, BytesMut};
|
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||||
|
|
||||||
use crate::page_cache;
|
use crate::page_cache;
|
||||||
use crate::page_cache::CacheEntry;
|
use crate::page_cache::CacheEntry;
|
||||||
use crate::page_cache::WALRecord;
|
use crate::page_cache::WALRecord;
|
||||||
use crate::{page_cache::BufferTag, PageServerConf};
|
use crate::ZTimelineId;
|
||||||
|
use crate::{page_cache::BufferTag, pg_constants, PageServerConf};
|
||||||
|
|
||||||
static TIMEOUT: Duration = Duration::from_secs(20);
|
static TIMEOUT: Duration = Duration::from_secs(20);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Main entry point for the WAL applicator thread.
|
// Main entry point for the WAL applicator thread.
|
||||||
//
|
//
|
||||||
pub fn wal_redo_main(conf: PageServerConf, sys_id: u64) {
|
pub fn wal_redo_main(conf: &PageServerConf, timelineid: ZTimelineId) {
|
||||||
info!("WAL redo thread started {}", sys_id);
|
info!("WAL redo thread started {}", timelineid);
|
||||||
|
|
||||||
// We block on waiting for requests on the walredo request channel, but
|
// We block on waiting for requests on the walredo request channel, but
|
||||||
// use async I/O to communicate with the child process. Initialize the
|
// use async I/O to communicate with the child process. Initialize the
|
||||||
@@ -52,15 +53,15 @@ pub fn wal_redo_main(conf: PageServerConf, sys_id: u64) {
|
|||||||
.build()
|
.build()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let pcache = page_cache::get_pagecache(conf.clone(), sys_id);
|
let pcache = page_cache::get_pagecache(conf, timelineid).unwrap();
|
||||||
|
|
||||||
// Loop forever, handling requests as they come.
|
// Loop forever, handling requests as they come.
|
||||||
let walredo_channel_receiver = &pcache.walredo_receiver;
|
let walredo_channel_receiver = &pcache.walredo_receiver;
|
||||||
loop {
|
loop {
|
||||||
let mut process: WalRedoProcess;
|
let mut process: WalRedoProcess;
|
||||||
let datadir = conf.data_dir.join(format!("wal-redo/{}", sys_id));
|
let datadir = format!("wal-redo/{}", timelineid);
|
||||||
|
|
||||||
info!("launching WAL redo postgres process {}", sys_id);
|
info!("launching WAL redo postgres process {}", timelineid);
|
||||||
{
|
{
|
||||||
let _guard = runtime.enter();
|
let _guard = runtime.enter();
|
||||||
process = WalRedoProcess::launch(&datadir, &runtime).unwrap();
|
process = WalRedoProcess::launch(&datadir, &runtime).unwrap();
|
||||||
@@ -88,6 +89,59 @@ pub fn wal_redo_main(conf: PageServerConf, sys_id: u64) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn transaction_id_set_status_bit(
|
||||||
|
xl_info: u8,
|
||||||
|
xl_rmid: u8,
|
||||||
|
xl_xid: u32,
|
||||||
|
record: WALRecord,
|
||||||
|
page: &mut BytesMut,
|
||||||
|
) {
|
||||||
|
let info = xl_info & pg_constants::XLOG_XACT_OPMASK;
|
||||||
|
let mut status = 0;
|
||||||
|
if info == pg_constants::XLOG_XACT_COMMIT {
|
||||||
|
status = pg_constants::TRANSACTION_STATUS_COMMITTED;
|
||||||
|
} else if info == pg_constants::XLOG_XACT_ABORT {
|
||||||
|
status = pg_constants::TRANSACTION_STATUS_ABORTED;
|
||||||
|
} else {
|
||||||
|
trace!("handle_apply_request for RM_XACT_ID-{} NOT SUPPORTED YET. RETURN. lsn {:X}/{:X} main_data_offset {}, rec.len {}",
|
||||||
|
status,
|
||||||
|
record.lsn >> 32,
|
||||||
|
record.lsn & 0xffffffff,
|
||||||
|
record.main_data_offset, record.rec.len());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
trace!("handle_apply_request for RM_XACT_ID-{} (1-commit, 2-abort) lsn {:X}/{:X} main_data_offset {}, rec.len {}",
|
||||||
|
status,
|
||||||
|
record.lsn >> 32,
|
||||||
|
record.lsn & 0xffffffff,
|
||||||
|
record.main_data_offset, record.rec.len());
|
||||||
|
|
||||||
|
let byteno: usize = ((xl_rmid as u32 % pg_constants::CLOG_XACTS_PER_PAGE as u32)
|
||||||
|
/ pg_constants::CLOG_XACTS_PER_BYTE) as usize;
|
||||||
|
|
||||||
|
let byteptr = &mut page[byteno..byteno + 1];
|
||||||
|
let bshift: u8 = ((xl_xid % pg_constants::CLOG_XACTS_PER_BYTE)
|
||||||
|
* pg_constants::CLOG_BITS_PER_XACT as u32) as u8;
|
||||||
|
|
||||||
|
let mut curval = byteptr[0];
|
||||||
|
curval = (curval >> bshift) & pg_constants::CLOG_XACT_BITMASK;
|
||||||
|
|
||||||
|
let mut byteval = [0];
|
||||||
|
byteval[0] = curval;
|
||||||
|
byteval[0] &= !(((1 << pg_constants::CLOG_BITS_PER_XACT as u8) - 1) << bshift);
|
||||||
|
byteval[0] |= status << bshift;
|
||||||
|
|
||||||
|
byteptr.copy_from_slice(&byteval);
|
||||||
|
trace!(
|
||||||
|
"xl_xid {} byteno {} curval {} byteval {}",
|
||||||
|
xl_xid,
|
||||||
|
byteno,
|
||||||
|
curval,
|
||||||
|
byteval[0]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
fn handle_apply_request(
|
fn handle_apply_request(
|
||||||
pcache: &page_cache::PageCache,
|
pcache: &page_cache::PageCache,
|
||||||
process: &WalRedoProcess,
|
process: &WalRedoProcess,
|
||||||
@@ -104,7 +158,46 @@ fn handle_apply_request(
|
|||||||
let nrecords = records.len();
|
let nrecords = records.len();
|
||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
let apply_result = process.apply_wal_records(runtime, tag, base_img, records);
|
|
||||||
|
let apply_result: Result<Bytes, Error>;
|
||||||
|
if tag.forknum == pg_constants::PG_XACT_FORKNUM as u8 {
|
||||||
|
//TODO use base image if any
|
||||||
|
static ZERO_PAGE: [u8; 8192] = [0u8; 8192];
|
||||||
|
let zero_page_bytes: &[u8] = &ZERO_PAGE;
|
||||||
|
let mut page = BytesMut::from(zero_page_bytes);
|
||||||
|
|
||||||
|
for record in records {
|
||||||
|
let mut buf = record.rec.clone();
|
||||||
|
|
||||||
|
// 1. Parse XLogRecord struct
|
||||||
|
// FIXME: refactor to avoid code duplication.
|
||||||
|
let _xl_tot_len = buf.get_u32_le();
|
||||||
|
let xl_xid = buf.get_u32_le();
|
||||||
|
let _xl_prev = buf.get_u64_le();
|
||||||
|
let xl_info = buf.get_u8();
|
||||||
|
let xl_rmid = buf.get_u8();
|
||||||
|
buf.advance(2); // 2 bytes of padding
|
||||||
|
let _xl_crc = buf.get_u32_le();
|
||||||
|
|
||||||
|
if xl_rmid == pg_constants::RM_CLOG_ID {
|
||||||
|
let info = xl_info & !pg_constants::XLR_INFO_MASK;
|
||||||
|
if info == pg_constants::CLOG_ZEROPAGE {
|
||||||
|
page.clone_from_slice(zero_page_bytes);
|
||||||
|
trace!("handle_apply_request for RM_CLOG_ID-CLOG_ZEROPAGE lsn {:X}/{:X} main_data_offset {}, rec.len {}",
|
||||||
|
record.lsn >> 32,
|
||||||
|
record.lsn & 0xffffffff,
|
||||||
|
record.main_data_offset, record.rec.len());
|
||||||
|
}
|
||||||
|
} else if xl_rmid == pg_constants::RM_XACT_ID {
|
||||||
|
transaction_id_set_status_bit(xl_info, xl_rmid, xl_xid, record, &mut page);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
apply_result = Ok::<Bytes, Error>(page.freeze());
|
||||||
|
} else {
|
||||||
|
apply_result = process.apply_wal_records(runtime, tag, base_img, records);
|
||||||
|
}
|
||||||
|
|
||||||
let duration = start.elapsed();
|
let duration = start.elapsed();
|
||||||
|
|
||||||
let result;
|
let result;
|
||||||
@@ -147,22 +240,24 @@ impl WalRedoProcess {
|
|||||||
// Tests who run pageserver binary are setting proper PG_BIN_DIR
|
// Tests who run pageserver binary are setting proper PG_BIN_DIR
|
||||||
// and PG_LIB_DIR so that WalRedo would start right postgres. We may later
|
// and PG_LIB_DIR so that WalRedo would start right postgres. We may later
|
||||||
// switch to setting same things in pageserver config file.
|
// switch to setting same things in pageserver config file.
|
||||||
fn launch(datadir: &PathBuf, runtime: &Runtime) -> Result<WalRedoProcess, Error> {
|
fn launch(datadir: &str, runtime: &Runtime) -> Result<WalRedoProcess, Error> {
|
||||||
// Create empty data directory for wal-redo postgres deleting old one.
|
// Create empty data directory for wal-redo postgres deleting old one.
|
||||||
fs::remove_dir_all(datadir.to_str().unwrap()).ok();
|
fs::remove_dir_all(datadir).ok();
|
||||||
let initdb = runtime
|
let initdb = runtime
|
||||||
.block_on(
|
.block_on(
|
||||||
Command::new("initdb")
|
Command::new("initdb")
|
||||||
.args(&["-D", datadir.to_str().unwrap()])
|
.args(&["-D", datadir])
|
||||||
.arg("-N")
|
.arg("-N")
|
||||||
.output(),
|
.output(),
|
||||||
)
|
)
|
||||||
.expect("failed to execute initdb");
|
.expect("failed to execute initdb");
|
||||||
|
|
||||||
if !initdb.status.success() {
|
if !initdb.status.success() {
|
||||||
panic!("initdb failed: {}\nstderr:\n{}",
|
panic!(
|
||||||
std::str::from_utf8(&initdb.stdout).unwrap(),
|
"initdb failed: {}\nstderr:\n{}",
|
||||||
std::str::from_utf8(&initdb.stderr).unwrap());
|
std::str::from_utf8(&initdb.stdout).unwrap(),
|
||||||
|
std::str::from_utf8(&initdb.stderr).unwrap()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start postgres itself
|
// Start postgres itself
|
||||||
@@ -171,14 +266,11 @@ impl WalRedoProcess {
|
|||||||
.stdin(Stdio::piped())
|
.stdin(Stdio::piped())
|
||||||
.stderr(Stdio::piped())
|
.stderr(Stdio::piped())
|
||||||
.stdout(Stdio::piped())
|
.stdout(Stdio::piped())
|
||||||
.env("PGDATA", datadir.to_str().unwrap())
|
.env("PGDATA", datadir)
|
||||||
.spawn()
|
.spawn()
|
||||||
.expect("postgres --wal-redo command failed to start");
|
.expect("postgres --wal-redo command failed to start");
|
||||||
|
|
||||||
info!(
|
info!("launched WAL redo postgres process on {}", datadir);
|
||||||
"launched WAL redo postgres process on {}",
|
|
||||||
datadir.to_str().unwrap()
|
|
||||||
);
|
|
||||||
|
|
||||||
let stdin = child.stdin.take().expect("failed to open child's stdin");
|
let stdin = child.stdin.take().expect("failed to open child's stdin");
|
||||||
let stderr = child.stderr.take().expect("failed to open child's stderr");
|
let stderr = child.stderr.take().expect("failed to open child's stderr");
|
||||||
@@ -206,7 +298,7 @@ impl WalRedoProcess {
|
|||||||
tokio::spawn(f_stderr);
|
tokio::spawn(f_stderr);
|
||||||
|
|
||||||
Ok(WalRedoProcess {
|
Ok(WalRedoProcess {
|
||||||
child: child,
|
child,
|
||||||
stdin: RefCell::new(stdin),
|
stdin: RefCell::new(stdin),
|
||||||
stdout: RefCell::new(stdout),
|
stdout: RefCell::new(stdout),
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -10,6 +10,10 @@
|
|||||||
#
|
#
|
||||||
# 2) installs postgres to REPO_ROOT/tmp_install/
|
# 2) installs postgres to REPO_ROOT/tmp_install/
|
||||||
#
|
#
|
||||||
|
|
||||||
|
# Halt immediately if any command fails
|
||||||
|
set -e
|
||||||
|
|
||||||
REPO_ROOT=$(dirname "$0")
|
REPO_ROOT=$(dirname "$0")
|
||||||
REPO_ROOT="`( cd \"$REPO_ROOT\" && pwd )`"
|
REPO_ROOT="`( cd \"$REPO_ROOT\" && pwd )`"
|
||||||
|
|
||||||
|
|||||||
19
postgres_ffi/Cargo.toml
Normal file
19
postgres_ffi/Cargo.toml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[package]
|
||||||
|
name = "postgres_ffi"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Heikki Linnakangas <heikki@zenith.tech>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
chrono = "0.4.19"
|
||||||
|
rand = "0.8.3"
|
||||||
|
bytes = "1.0.1"
|
||||||
|
byteorder = "1.4.3"
|
||||||
|
anyhow = "1.0"
|
||||||
|
crc32c = "0.6.0"
|
||||||
|
hex = "0.4.3"
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
bindgen = "0.53.1"
|
||||||
42
postgres_ffi/build.rs
Normal file
42
postgres_ffi/build.rs
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
extern crate bindgen;
|
||||||
|
|
||||||
|
use std::env;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
||||||
|
println!("cargo:rerun-if-changed=pg_control_ffi.h");
|
||||||
|
|
||||||
|
// The bindgen::Builder is the main entry point
|
||||||
|
// to bindgen, and lets you build up options for
|
||||||
|
// the resulting bindings.
|
||||||
|
let bindings = bindgen::Builder::default()
|
||||||
|
// The input header we would like to generate
|
||||||
|
// bindings for.
|
||||||
|
.header("pg_control_ffi.h")
|
||||||
|
// Tell cargo to invalidate the built crate whenever any of the
|
||||||
|
// included header files changed.
|
||||||
|
.parse_callbacks(Box::new(bindgen::CargoCallbacks))
|
||||||
|
.whitelist_type("ControlFileData")
|
||||||
|
.whitelist_var("PG_CONTROL_FILE_SIZE")
|
||||||
|
.whitelist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC")
|
||||||
|
.whitelist_type("DBState")
|
||||||
|
// Path the server include dir. It is in tmp_install/include/server, if you did
|
||||||
|
// "configure --prefix=<path to tmp_install>". But if you used "configure --prefix=/",
|
||||||
|
// and used DESTDIR to move it into tmp_install, then it's in
|
||||||
|
// tmp_install/include/postgres/server (that's how the pgbuild.sh script does it).
|
||||||
|
// 'pg_config --includedir-server' would perhaps be the more proper way to find it,
|
||||||
|
// but this will do for now.
|
||||||
|
.clang_arg("-I../tmp_install/include/server")
|
||||||
|
.clang_arg("-I../tmp_install/include/postgresql/server")
|
||||||
|
// Finish the builder and generate the bindings.
|
||||||
|
.generate()
|
||||||
|
// Unwrap the Result and panic on failure.
|
||||||
|
.expect("Unable to generate bindings");
|
||||||
|
|
||||||
|
// Write the bindings to the $OUT_DIR/bindings.rs file.
|
||||||
|
let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
|
||||||
|
bindings
|
||||||
|
.write_to_file(out_path.join("bindings.rs"))
|
||||||
|
.expect("Couldn't write bindings!");
|
||||||
|
}
|
||||||
4
postgres_ffi/pg_control_ffi.h
Normal file
4
postgres_ffi/pg_control_ffi.h
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
#include "c.h"
|
||||||
|
#include "catalog/pg_control.h"
|
||||||
|
|
||||||
|
const uint32 PG_CONTROLFILEDATA_OFFSETOF_CRC = offsetof(ControlFileData, crc);
|
||||||
67
postgres_ffi/src/lib.rs
Normal file
67
postgres_ffi/src/lib.rs
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
#![allow(non_upper_case_globals)]
|
||||||
|
#![allow(non_camel_case_types)]
|
||||||
|
#![allow(non_snake_case)]
|
||||||
|
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||||
|
|
||||||
|
use bytes::{Buf, Bytes, BytesMut};
|
||||||
|
|
||||||
|
// sizeof(ControlFileData)
|
||||||
|
const SIZEOF_CONTROLDATA: usize = std::mem::size_of::<ControlFileData>();
|
||||||
|
const OFFSETOF_CRC: usize = PG_CONTROLFILEDATA_OFFSETOF_CRC as usize;
|
||||||
|
|
||||||
|
impl ControlFileData {
|
||||||
|
// Initialize an all-zeros ControlFileData struct
|
||||||
|
pub fn new() -> ControlFileData {
|
||||||
|
let controlfile: ControlFileData;
|
||||||
|
|
||||||
|
let b = [0u8; SIZEOF_CONTROLDATA];
|
||||||
|
controlfile =
|
||||||
|
unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) };
|
||||||
|
|
||||||
|
return controlfile;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn decode_pg_control(buf: Bytes) -> Result<ControlFileData, anyhow::Error> {
|
||||||
|
let mut b: [u8; SIZEOF_CONTROLDATA] = [0u8; SIZEOF_CONTROLDATA];
|
||||||
|
buf.clone().copy_to_slice(&mut b);
|
||||||
|
|
||||||
|
let controlfile: ControlFileData;
|
||||||
|
|
||||||
|
// TODO: verify CRC
|
||||||
|
let mut data_without_crc: [u8; OFFSETOF_CRC] = [0u8; OFFSETOF_CRC];
|
||||||
|
data_without_crc.copy_from_slice(&b[0..OFFSETOF_CRC]);
|
||||||
|
let expectedcrc = crc32c::crc32c(&data_without_crc);
|
||||||
|
|
||||||
|
controlfile = unsafe { std::mem::transmute::<[u8; SIZEOF_CONTROLDATA], ControlFileData>(b) };
|
||||||
|
|
||||||
|
if expectedcrc != controlfile.crc {
|
||||||
|
anyhow::bail!(
|
||||||
|
"invalid CRC in control file: expected {:08X}, was {:08X}",
|
||||||
|
expectedcrc,
|
||||||
|
controlfile.crc
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(controlfile)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn encode_pg_control(controlfile: ControlFileData) -> Bytes {
|
||||||
|
let b: [u8; SIZEOF_CONTROLDATA];
|
||||||
|
|
||||||
|
b = unsafe { std::mem::transmute::<ControlFileData, [u8; SIZEOF_CONTROLDATA]>(controlfile) };
|
||||||
|
|
||||||
|
// Recompute the CRC
|
||||||
|
let mut data_without_crc: [u8; OFFSETOF_CRC] = [0u8; OFFSETOF_CRC];
|
||||||
|
data_without_crc.copy_from_slice(&b[0..OFFSETOF_CRC]);
|
||||||
|
let newcrc = crc32c::crc32c(&data_without_crc);
|
||||||
|
|
||||||
|
let mut buf = BytesMut::with_capacity(PG_CONTROL_FILE_SIZE as usize);
|
||||||
|
|
||||||
|
buf.extend_from_slice(&b[0..OFFSETOF_CRC]);
|
||||||
|
buf.extend_from_slice(&newcrc.to_ne_bytes());
|
||||||
|
// Fill the rest of the control file with zeros.
|
||||||
|
buf.resize(PG_CONTROL_FILE_SIZE as usize, 0);
|
||||||
|
|
||||||
|
return buf.into();
|
||||||
|
}
|
||||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: b1f5a5ec14...67da6b1df6
@@ -7,14 +7,10 @@ edition = "2018"
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
chrono = "0.4.19"
|
|
||||||
crossbeam-channel = "0.5.0"
|
|
||||||
rand = "0.8.3"
|
|
||||||
regex = "1.4.5"
|
regex = "1.4.5"
|
||||||
bytes = "1.0.1"
|
bytes = "1.0.1"
|
||||||
byteorder = "1.4.3"
|
byteorder = "1.4.3"
|
||||||
fs2 = "0.4.3"
|
fs2 = "0.4.3"
|
||||||
futures = "0.3.13"
|
|
||||||
lazy_static = "1.4.0"
|
lazy_static = "1.4.0"
|
||||||
slog-stdlog = "4.1.0"
|
slog-stdlog = "4.1.0"
|
||||||
slog-async = "2.6.0"
|
slog-async = "2.6.0"
|
||||||
@@ -23,16 +19,14 @@ slog-term = "2.8.0"
|
|||||||
slog = "2.7.0"
|
slog = "2.7.0"
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
clap = "2.33.0"
|
clap = "2.33.0"
|
||||||
termion = "1.5.6"
|
|
||||||
tui = "0.14.0"
|
|
||||||
daemonize = "0.4.1"
|
daemonize = "0.4.1"
|
||||||
rust-s3 = { git = "https://github.com/hlinnaka/rust-s3", features = ["no-verify-ssl"] }
|
|
||||||
tokio = { version = "1.3.0", features = ["full"] }
|
tokio = { version = "1.3.0", features = ["full"] }
|
||||||
tokio-stream = { version = "0.1.4" }
|
tokio-stream = { version = "0.1.4" }
|
||||||
tokio-postgres = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
|
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
postgres-protocol = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
|
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
postgres = { git = "https://github.com/kelvich/rust-postgres", branch = "replication_rebase" }
|
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="a0d067b66447951d1276a53fb09886539c3fa094" }
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
crc32c = "0.6.0"
|
crc32c = "0.6.0"
|
||||||
|
|
||||||
|
# FIXME: 'pageserver' is needed for ZTimelineId. Refactor
|
||||||
pageserver = { path = "../pageserver" }
|
pageserver = { path = "../pageserver" }
|
||||||
|
|||||||
@@ -9,17 +9,15 @@ use std::path::PathBuf;
|
|||||||
use std::thread;
|
use std::thread;
|
||||||
use std::{fs::File, fs::OpenOptions};
|
use std::{fs::File, fs::OpenOptions};
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
use clap::{App, Arg};
|
use clap::{App, Arg};
|
||||||
|
|
||||||
use slog;
|
|
||||||
use slog::Drain;
|
use slog::Drain;
|
||||||
use slog_scope;
|
|
||||||
use slog_stdlog;
|
|
||||||
|
|
||||||
use walkeeper::wal_service;
|
use walkeeper::wal_service;
|
||||||
use walkeeper::WalAcceptorConf;
|
use walkeeper::WalAcceptorConf;
|
||||||
|
|
||||||
fn main() -> Result<(), io::Error> {
|
fn main() -> Result<()> {
|
||||||
let arg_matches = App::new("Zenith wal_acceptor")
|
let arg_matches = App::new("Zenith wal_acceptor")
|
||||||
.about("Store WAL stream to local file system and push it to WAL receivers")
|
.about("Store WAL stream to local file system and push it to WAL receivers")
|
||||||
.arg(
|
.arg(
|
||||||
@@ -29,6 +27,13 @@ fn main() -> Result<(), io::Error> {
|
|||||||
.takes_value(true)
|
.takes_value(true)
|
||||||
.help("Path to the WAL acceptor data directory"),
|
.help("Path to the WAL acceptor data directory"),
|
||||||
)
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("systemid")
|
||||||
|
.long("systemid")
|
||||||
|
.takes_value(true)
|
||||||
|
.required(true)
|
||||||
|
.help("PostgreSQL system id, from pg_control"),
|
||||||
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("listen")
|
Arg::with_name("listen")
|
||||||
.short("l")
|
.short("l")
|
||||||
@@ -59,16 +64,23 @@ fn main() -> Result<(), io::Error> {
|
|||||||
)
|
)
|
||||||
.get_matches();
|
.get_matches();
|
||||||
|
|
||||||
|
let systemid_str = arg_matches.value_of("systemid").unwrap();
|
||||||
|
let systemid: u64 = systemid_str.parse()?;
|
||||||
|
|
||||||
let mut conf = WalAcceptorConf {
|
let mut conf = WalAcceptorConf {
|
||||||
data_dir: PathBuf::from("./"),
|
data_dir: PathBuf::from("./"),
|
||||||
|
systemid: systemid,
|
||||||
daemonize: false,
|
daemonize: false,
|
||||||
no_sync: false,
|
no_sync: false,
|
||||||
pageserver_addr: None,
|
pageserver_addr: None,
|
||||||
listen_addr: "127.0.0.1:5454".parse().unwrap(),
|
listen_addr: "127.0.0.1:5454".parse()?,
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(dir) = arg_matches.value_of("datadir") {
|
if let Some(dir) = arg_matches.value_of("datadir") {
|
||||||
conf.data_dir = PathBuf::from(dir);
|
conf.data_dir = PathBuf::from(dir);
|
||||||
|
|
||||||
|
// change into the data directory.
|
||||||
|
std::env::set_current_dir(&conf.data_dir)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if arg_matches.is_present("no-sync") {
|
if arg_matches.is_present("no-sync") {
|
||||||
@@ -90,9 +102,9 @@ fn main() -> Result<(), io::Error> {
|
|||||||
start_wal_acceptor(conf)
|
start_wal_acceptor(conf)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<(), io::Error> {
|
fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
|
||||||
// Initialize logger
|
// Initialize logger
|
||||||
let _scope_guard = init_logging(&conf);
|
let _scope_guard = init_logging(&conf)?;
|
||||||
let _log_guard = slog_stdlog::init().unwrap();
|
let _log_guard = slog_stdlog::init().unwrap();
|
||||||
// Note: this `info!(...)` macro comes from `log` crate
|
// Note: this `info!(...)` macro comes from `log` crate
|
||||||
info!("standard logging redirected to slog");
|
info!("standard logging redirected to slog");
|
||||||
@@ -101,20 +113,20 @@ fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<(), io::Error> {
|
|||||||
info!("daemonizing...");
|
info!("daemonizing...");
|
||||||
|
|
||||||
// There should'n be any logging to stdin/stdout. Redirect it to the main log so
|
// There should'n be any logging to stdin/stdout. Redirect it to the main log so
|
||||||
// that we will see any accidental manual fpritf's or backtraces.
|
// that we will see any accidental manual fprintf's or backtraces.
|
||||||
let stdout = OpenOptions::new()
|
let stdout = OpenOptions::new()
|
||||||
.create(true)
|
.create(true)
|
||||||
.append(true)
|
.append(true)
|
||||||
.open(conf.data_dir.join("wal_acceptor.log"))
|
.open("wal_acceptor.log")
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let stderr = OpenOptions::new()
|
let stderr = OpenOptions::new()
|
||||||
.create(true)
|
.create(true)
|
||||||
.append(true)
|
.append(true)
|
||||||
.open(conf.data_dir.join("wal_acceptor.log"))
|
.open("wal_acceptor.log")
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let daemonize = Daemonize::new()
|
let daemonize = Daemonize::new()
|
||||||
.pid_file(conf.data_dir.join("wal_acceptor.pid"))
|
.pid_file("wal_acceptor.pid")
|
||||||
.working_directory(Path::new("."))
|
.working_directory(Path::new("."))
|
||||||
.stdout(stdout)
|
.stdout(stdout)
|
||||||
.stderr(stderr);
|
.stderr(stderr);
|
||||||
@@ -141,20 +153,24 @@ fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<(), io::Error> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn init_logging(conf: &WalAcceptorConf) -> slog_scope::GlobalLoggerGuard {
|
fn init_logging(conf: &WalAcceptorConf) -> Result<slog_scope::GlobalLoggerGuard, io::Error> {
|
||||||
if conf.daemonize {
|
if conf.daemonize {
|
||||||
let log = conf.data_dir.join("wal_acceptor.log");
|
let log = conf.data_dir.join("wal_acceptor.log");
|
||||||
let log_file = File::create(log).unwrap_or_else(|_| panic!("Could not create log file"));
|
let log_file = File::create(&log).map_err(|err| {
|
||||||
|
// We failed to initialize logging, so we can't log this message with error!
|
||||||
|
eprintln!("Could not create log file {:?}: {}", log, err);
|
||||||
|
err
|
||||||
|
})?;
|
||||||
let decorator = slog_term::PlainSyncDecorator::new(log_file);
|
let decorator = slog_term::PlainSyncDecorator::new(log_file);
|
||||||
let drain = slog_term::CompactFormat::new(decorator).build();
|
let drain = slog_term::CompactFormat::new(decorator).build();
|
||||||
let drain = std::sync::Mutex::new(drain).fuse();
|
let drain = std::sync::Mutex::new(drain).fuse();
|
||||||
let logger = slog::Logger::root(drain, slog::o!());
|
let logger = slog::Logger::root(drain, slog::o!());
|
||||||
slog_scope::set_global_logger(logger)
|
Ok(slog_scope::set_global_logger(logger))
|
||||||
} else {
|
} else {
|
||||||
let decorator = slog_term::TermDecorator::new().build();
|
let decorator = slog_term::TermDecorator::new().build();
|
||||||
let drain = slog_term::FullFormat::new(decorator).build().fuse();
|
let drain = slog_term::FullFormat::new(decorator).build().fuse();
|
||||||
let drain = slog_async::Async::new(drain).chan_size(1000).build().fuse();
|
let drain = slog_async::Async::new(drain).chan_size(1000).build().fuse();
|
||||||
let logger = slog::Logger::root(drain, slog::o!());
|
let logger = slog::Logger::root(drain, slog::o!());
|
||||||
return slog_scope::set_global_logger(logger);
|
Ok(slog_scope::set_global_logger(logger))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,10 +6,12 @@ mod pq_protocol;
|
|||||||
pub mod wal_service;
|
pub mod wal_service;
|
||||||
pub mod xlog_utils;
|
pub mod xlog_utils;
|
||||||
|
|
||||||
#[allow(dead_code)]
|
use crate::pq_protocol::SystemId;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct WalAcceptorConf {
|
pub struct WalAcceptorConf {
|
||||||
pub data_dir: PathBuf,
|
pub data_dir: PathBuf,
|
||||||
|
pub systemid: SystemId,
|
||||||
pub daemonize: bool,
|
pub daemonize: bool,
|
||||||
pub no_sync: bool,
|
pub no_sync: bool,
|
||||||
pub listen_addr: SocketAddr,
|
pub listen_addr: SocketAddr,
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
use byteorder::{BigEndian, ByteOrder};
|
use byteorder::{BigEndian, ByteOrder};
|
||||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||||
|
use pageserver::ZTimelineId;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::str;
|
use std::str;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
pub type Oid = u32;
|
pub type Oid = u32;
|
||||||
pub type SystemId = u64;
|
pub type SystemId = u64;
|
||||||
@@ -37,7 +39,7 @@ pub enum BeMessage<'a> {
|
|||||||
pub struct FeStartupMessage {
|
pub struct FeStartupMessage {
|
||||||
pub version: u32,
|
pub version: u32,
|
||||||
pub kind: StartupRequestCode,
|
pub kind: StartupRequestCode,
|
||||||
pub system_id: SystemId,
|
pub timelineid: ZTimelineId,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@@ -83,26 +85,33 @@ impl FeStartupMessage {
|
|||||||
let params_str = str::from_utf8(¶ms_bytes).unwrap();
|
let params_str = str::from_utf8(¶ms_bytes).unwrap();
|
||||||
let params = params_str.split('\0');
|
let params = params_str.split('\0');
|
||||||
let mut options = false;
|
let mut options = false;
|
||||||
let mut system_id: u64 = 0;
|
let mut timelineid: Option<ZTimelineId> = None;
|
||||||
for p in params {
|
for p in params {
|
||||||
if p == "options" {
|
if p == "options" {
|
||||||
options = true;
|
options = true;
|
||||||
} else if options {
|
} else if options {
|
||||||
for opt in p.split(' ') {
|
for opt in p.split(' ') {
|
||||||
if opt.starts_with("system.id=") {
|
if opt.starts_with("ztimelineid=") {
|
||||||
system_id = opt[10..].parse::<u64>().unwrap();
|
// FIXME: rethrow parsing error, don't unwrap
|
||||||
|
timelineid = Some(ZTimelineId::from_str(&opt[12..]).unwrap());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if timelineid.is_none() {
|
||||||
|
return Err(io::Error::new(
|
||||||
|
io::ErrorKind::InvalidInput,
|
||||||
|
"timelineid is required",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
buf.advance(len as usize);
|
buf.advance(len as usize);
|
||||||
Ok(Some(FeMessage::StartupMessage(FeStartupMessage {
|
Ok(Some(FeMessage::StartupMessage(FeStartupMessage {
|
||||||
version,
|
version,
|
||||||
kind,
|
kind,
|
||||||
system_id,
|
timelineid: timelineid.unwrap(),
|
||||||
})))
|
})))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -146,20 +155,20 @@ impl<'a> BeMessage<'a> {
|
|||||||
|
|
||||||
BeMessage::RowDescription(rows) => {
|
BeMessage::RowDescription(rows) => {
|
||||||
buf.put_u8(b'T');
|
buf.put_u8(b'T');
|
||||||
let total_len: u32 = rows
|
|
||||||
.iter()
|
let mut body = BytesMut::new();
|
||||||
.fold(0, |acc, row| acc + row.name.len() as u32 + 3 * (4 + 2));
|
body.put_i16(rows.len() as i16); // # of fields
|
||||||
buf.put_u32(4 + 2 + total_len);
|
|
||||||
for row in rows.iter() {
|
for row in rows.iter() {
|
||||||
buf.put_i16(row.name.len() as i16);
|
body.put_slice(row.name);
|
||||||
buf.put_slice(row.name);
|
body.put_i32(0); /* table oid */
|
||||||
buf.put_i32(0); /* table oid */
|
body.put_i16(0); /* attnum */
|
||||||
buf.put_i16(0); /* attnum */
|
body.put_u32(row.typoid);
|
||||||
buf.put_u32(row.typoid);
|
body.put_i16(row.typlen);
|
||||||
buf.put_i16(row.typlen);
|
body.put_i32(-1); /* typmod */
|
||||||
buf.put_i32(-1); /* typmod */
|
body.put_i16(0); /* format code */
|
||||||
buf.put_i16(0); /* format code */
|
|
||||||
}
|
}
|
||||||
|
buf.put_i32((4 + body.len()) as i32); // # of bytes, including len field itself
|
||||||
|
buf.put(body);
|
||||||
}
|
}
|
||||||
|
|
||||||
BeMessage::DataRow(vals) => {
|
BeMessage::DataRow(vals) => {
|
||||||
|
|||||||
@@ -3,8 +3,6 @@
|
|||||||
// receive WAL from wal_proposer and send it to WAL receivers
|
// receive WAL from wal_proposer and send it to WAL receivers
|
||||||
//
|
//
|
||||||
|
|
||||||
extern crate fs2;
|
|
||||||
|
|
||||||
use byteorder::{BigEndian, ByteOrder};
|
use byteorder::{BigEndian, ByteOrder};
|
||||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||||
use fs2::FileExt;
|
use fs2::FileExt;
|
||||||
@@ -33,6 +31,7 @@ use tokio_postgres::{connect, Error, NoTls};
|
|||||||
use crate::pq_protocol::*;
|
use crate::pq_protocol::*;
|
||||||
use crate::xlog_utils::*;
|
use crate::xlog_utils::*;
|
||||||
use crate::WalAcceptorConf;
|
use crate::WalAcceptorConf;
|
||||||
|
use pageserver::ZTimelineId;
|
||||||
|
|
||||||
type FullTransactionId = u64;
|
type FullTransactionId = u64;
|
||||||
|
|
||||||
@@ -64,7 +63,8 @@ struct ServerInfo {
|
|||||||
protocol_version: u32, /* proxy-safekeeper protocol version */
|
protocol_version: u32, /* proxy-safekeeper protocol version */
|
||||||
pg_version: u32, /* Postgres server version */
|
pg_version: u32, /* Postgres server version */
|
||||||
node_id: NodeId,
|
node_id: NodeId,
|
||||||
system_id: SystemId, /* Postgres system identifier */
|
system_id: SystemId,
|
||||||
|
timeline_id: ZTimelineId, /* Zenith timelineid */
|
||||||
wal_end: XLogRecPtr,
|
wal_end: XLogRecPtr,
|
||||||
timeline: TimeLineID,
|
timeline: TimeLineID,
|
||||||
wal_seg_size: u32,
|
wal_seg_size: u32,
|
||||||
@@ -146,8 +146,8 @@ struct SharedState {
|
|||||||
* Database instance (tenant)
|
* Database instance (tenant)
|
||||||
*/
|
*/
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct System {
|
pub struct Timeline {
|
||||||
id: SystemId,
|
timelineid: ZTimelineId,
|
||||||
mutex: Mutex<SharedState>,
|
mutex: Mutex<SharedState>,
|
||||||
cond: Notify, /* conditional variable used to notify wal senders */
|
cond: Notify, /* conditional variable used to notify wal senders */
|
||||||
}
|
}
|
||||||
@@ -157,7 +157,7 @@ pub struct System {
|
|||||||
*/
|
*/
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct Connection {
|
struct Connection {
|
||||||
system: Option<Arc<System>>,
|
timeline: Option<Arc<Timeline>>,
|
||||||
stream: TcpStream, /* Postgres connection */
|
stream: TcpStream, /* Postgres connection */
|
||||||
inbuf: BytesMut, /* input buffer */
|
inbuf: BytesMut, /* input buffer */
|
||||||
outbuf: BytesMut, /* output buffer */
|
outbuf: BytesMut, /* output buffer */
|
||||||
@@ -211,6 +211,7 @@ impl Serializer for ServerInfo {
|
|||||||
buf.put_u32_le(self.pg_version);
|
buf.put_u32_le(self.pg_version);
|
||||||
self.node_id.pack(buf);
|
self.node_id.pack(buf);
|
||||||
buf.put_u64_le(self.system_id);
|
buf.put_u64_le(self.system_id);
|
||||||
|
buf.put_slice(&self.timeline_id.as_arr());
|
||||||
buf.put_u64_le(self.wal_end);
|
buf.put_u64_le(self.wal_end);
|
||||||
buf.put_u32_le(self.timeline);
|
buf.put_u32_le(self.timeline);
|
||||||
buf.put_u32_le(self.wal_seg_size);
|
buf.put_u32_le(self.wal_seg_size);
|
||||||
@@ -221,6 +222,7 @@ impl Serializer for ServerInfo {
|
|||||||
pg_version: buf.get_u32_le(),
|
pg_version: buf.get_u32_le(),
|
||||||
node_id: NodeId::unpack(buf),
|
node_id: NodeId::unpack(buf),
|
||||||
system_id: buf.get_u64_le(),
|
system_id: buf.get_u64_le(),
|
||||||
|
timeline_id: ZTimelineId::get_from_buf(buf),
|
||||||
wal_end: buf.get_u64_le(),
|
wal_end: buf.get_u64_le(),
|
||||||
timeline: buf.get_u32_le(),
|
timeline: buf.get_u32_le(),
|
||||||
wal_seg_size: buf.get_u32_le(),
|
wal_seg_size: buf.get_u32_le(),
|
||||||
@@ -278,6 +280,7 @@ impl SafeKeeperInfo {
|
|||||||
pg_version: UNKNOWN_SERVER_VERSION, /* Postgres server version */
|
pg_version: UNKNOWN_SERVER_VERSION, /* Postgres server version */
|
||||||
node_id: NodeId { term: 0, uuid: 0 },
|
node_id: NodeId { term: 0, uuid: 0 },
|
||||||
system_id: 0, /* Postgres system identifier */
|
system_id: 0, /* Postgres system identifier */
|
||||||
|
timeline_id: ZTimelineId::from([0u8; 16]),
|
||||||
wal_end: 0,
|
wal_end: 0,
|
||||||
timeline: 0,
|
timeline: 0,
|
||||||
wal_seg_size: 0,
|
wal_seg_size: 0,
|
||||||
@@ -349,7 +352,8 @@ impl Serializer for SafeKeeperResponse {
|
|||||||
}
|
}
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
pub static ref SYSTEMS: Mutex<HashMap<SystemId, Arc<System>>> = Mutex::new(HashMap::new());
|
pub static ref TIMELINES: Mutex<HashMap<ZTimelineId, Arc<Timeline>>> =
|
||||||
|
Mutex::new(HashMap::new());
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn thread_main(conf: WalAcceptorConf) {
|
pub fn thread_main(conf: WalAcceptorConf) {
|
||||||
@@ -366,7 +370,7 @@ pub fn thread_main(conf: WalAcceptorConf) {
|
|||||||
info!("Starting wal acceptor on {}", conf.listen_addr);
|
info!("Starting wal acceptor on {}", conf.listen_addr);
|
||||||
|
|
||||||
runtime.block_on(async {
|
runtime.block_on(async {
|
||||||
let _unused = main_loop(&conf).await;
|
main_loop(&conf).await.unwrap();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -389,8 +393,8 @@ async fn main_loop(conf: &WalAcceptorConf) -> Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl System {
|
impl Timeline {
|
||||||
pub fn new(id: SystemId) -> System {
|
pub fn new(timelineid: ZTimelineId) -> Timeline {
|
||||||
let shared_state = SharedState {
|
let shared_state = SharedState {
|
||||||
commit_lsn: 0,
|
commit_lsn: 0,
|
||||||
info: SafeKeeperInfo::new(),
|
info: SafeKeeperInfo::new(),
|
||||||
@@ -401,8 +405,8 @@ impl System {
|
|||||||
catalog_xmin: u64::MAX,
|
catalog_xmin: u64::MAX,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
System {
|
Timeline {
|
||||||
id: id,
|
timelineid,
|
||||||
mutex: Mutex::new(shared_state),
|
mutex: Mutex::new(shared_state),
|
||||||
cond: Notify::new(),
|
cond: Notify::new(),
|
||||||
}
|
}
|
||||||
@@ -443,12 +447,23 @@ impl System {
|
|||||||
return shared_state.hs_feedback;
|
return shared_state.hs_feedback;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load and lock control file (prevent running more than one instance of safekeeper
|
// Load and lock control file (prevent running more than one instance of safekeeper)
|
||||||
fn load_control_file(&self, conf: &WalAcceptorConf) {
|
fn load_control_file(&self, conf: &WalAcceptorConf) -> Result<()> {
|
||||||
|
let mut shared_state = self.mutex.lock().unwrap();
|
||||||
|
|
||||||
|
if shared_state.control_file.is_some() {
|
||||||
|
info!(
|
||||||
|
"control file for timeline {} is already open",
|
||||||
|
self.timelineid
|
||||||
|
);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
let control_file_path = conf
|
let control_file_path = conf
|
||||||
.data_dir
|
.data_dir
|
||||||
.join(self.id.to_string())
|
.join(self.timelineid.to_string())
|
||||||
.join(CONTROL_FILE_NAME);
|
.join(CONTROL_FILE_NAME);
|
||||||
|
info!("loading control file {}", control_file_path.display());
|
||||||
match OpenOptions::new()
|
match OpenOptions::new()
|
||||||
.read(true)
|
.read(true)
|
||||||
.write(true)
|
.write(true)
|
||||||
@@ -460,13 +475,13 @@ impl System {
|
|||||||
match file.try_lock_exclusive() {
|
match file.try_lock_exclusive() {
|
||||||
Ok(()) => {}
|
Ok(()) => {}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
panic!(
|
io_error!(
|
||||||
"Control file {:?} is locked by some other process: {}",
|
"Control file {:?} is locked by some other process: {}",
|
||||||
&control_file_path, e
|
&control_file_path,
|
||||||
|
e
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut shared_state = self.mutex.lock().unwrap();
|
|
||||||
shared_state.control_file = Some(file);
|
shared_state.control_file = Some(file);
|
||||||
|
|
||||||
const SIZE: usize = mem::size_of::<SafeKeeperInfo>();
|
const SIZE: usize = mem::size_of::<SafeKeeperInfo>();
|
||||||
@@ -483,12 +498,13 @@ impl System {
|
|||||||
let my_info = SafeKeeperInfo::unpack(&mut input);
|
let my_info = SafeKeeperInfo::unpack(&mut input);
|
||||||
|
|
||||||
if my_info.magic != SK_MAGIC {
|
if my_info.magic != SK_MAGIC {
|
||||||
panic!("Invalid control file magic: {}", my_info.magic);
|
io_error!("Invalid control file magic: {}", my_info.magic);
|
||||||
}
|
}
|
||||||
if my_info.format_version != SK_FORMAT_VERSION {
|
if my_info.format_version != SK_FORMAT_VERSION {
|
||||||
panic!(
|
io_error!(
|
||||||
"Incompatible format version: {} vs. {}",
|
"Incompatible format version: {} vs. {}",
|
||||||
my_info.format_version, SK_FORMAT_VERSION
|
my_info.format_version,
|
||||||
|
SK_FORMAT_VERSION
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
shared_state.info = my_info;
|
shared_state.info = my_info;
|
||||||
@@ -501,6 +517,7 @@ impl System {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn save_control_file(&self, sync: bool) -> Result<()> {
|
fn save_control_file(&self, sync: bool) -> Result<()> {
|
||||||
@@ -521,7 +538,7 @@ impl System {
|
|||||||
impl Connection {
|
impl Connection {
|
||||||
pub fn new(socket: TcpStream, conf: &WalAcceptorConf) -> Connection {
|
pub fn new(socket: TcpStream, conf: &WalAcceptorConf) -> Connection {
|
||||||
Connection {
|
Connection {
|
||||||
system: None,
|
timeline: None,
|
||||||
stream: socket,
|
stream: socket,
|
||||||
inbuf: BytesMut::with_capacity(10 * 1024),
|
inbuf: BytesMut::with_capacity(10 * 1024),
|
||||||
outbuf: BytesMut::with_capacity(10 * 1024),
|
outbuf: BytesMut::with_capacity(10 * 1024),
|
||||||
@@ -530,8 +547,8 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn system(&self) -> Arc<System> {
|
fn timeline(&self) -> Arc<Timeline> {
|
||||||
self.system.as_ref().unwrap().clone()
|
self.timeline.as_ref().unwrap().clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run(&mut self) -> Result<()> {
|
async fn run(&mut self) -> Result<()> {
|
||||||
@@ -563,10 +580,15 @@ impl Connection {
|
|||||||
"no_user",
|
"no_user",
|
||||||
);
|
);
|
||||||
let callme = format!(
|
let callme = format!(
|
||||||
"callmemaybe host={} port={} replication=1 options='-c system.id={}'",
|
"callmemaybe {} host={} port={} options='-c ztimelineid={}'",
|
||||||
|
self.timeline().timelineid,
|
||||||
self.conf.listen_addr.ip(),
|
self.conf.listen_addr.ip(),
|
||||||
self.conf.listen_addr.port(),
|
self.conf.listen_addr.port(),
|
||||||
self.system().get_info().server.system_id,
|
self.timeline().timelineid
|
||||||
|
);
|
||||||
|
info!(
|
||||||
|
"requesting page server to connect to us: start {} {}",
|
||||||
|
ps_connstr, callme
|
||||||
);
|
);
|
||||||
let (client, connection) = connect(&ps_connstr, NoTls).await?;
|
let (client, connection) = connect(&ps_connstr, NoTls).await?;
|
||||||
|
|
||||||
@@ -582,22 +604,14 @@ impl Connection {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_system(&mut self, id: SystemId) -> Result<()> {
|
fn set_timeline(&mut self, timelineid: ZTimelineId) -> Result<()> {
|
||||||
let mut systems = SYSTEMS.lock().unwrap();
|
let mut timelines = TIMELINES.lock().unwrap();
|
||||||
if id == 0 {
|
if !timelines.contains_key(&timelineid) {
|
||||||
// non-multitenant configuration: just a single instance
|
info!("creating timeline dir {}", timelineid);
|
||||||
if let Some(system) = systems.values().next() {
|
fs::create_dir_all(timelineid.to_string())?;
|
||||||
self.system = Some(system.clone());
|
timelines.insert(timelineid, Arc::new(Timeline::new(timelineid)));
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
io_error!("No active instances");
|
|
||||||
}
|
}
|
||||||
if !systems.contains_key(&id) {
|
self.timeline = Some(timelines.get(&timelineid).unwrap().clone());
|
||||||
let system_dir = self.conf.data_dir.join(id.to_string());
|
|
||||||
fs::create_dir_all(system_dir)?;
|
|
||||||
systems.insert(id, Arc::new(System::new(id)));
|
|
||||||
}
|
|
||||||
self.system = Some(systems.get(&id).unwrap().clone());
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -606,14 +620,16 @@ impl Connection {
|
|||||||
// Receive information about server
|
// Receive information about server
|
||||||
let server_info = self.read_req::<ServerInfo>().await?;
|
let server_info = self.read_req::<ServerInfo>().await?;
|
||||||
info!(
|
info!(
|
||||||
"Start handshake with wal_proposer {} sysid {}",
|
"Start handshake with wal_proposer {} sysid {} timeline {}",
|
||||||
self.stream.peer_addr()?,
|
self.stream.peer_addr()?,
|
||||||
server_info.system_id
|
server_info.system_id,
|
||||||
|
server_info.timeline_id,
|
||||||
);
|
);
|
||||||
self.set_system(server_info.system_id)?;
|
// FIXME: also check that the system identifier matches
|
||||||
self.system().load_control_file(&self.conf);
|
self.set_timeline(server_info.timeline_id)?;
|
||||||
|
self.timeline().load_control_file(&self.conf)?;
|
||||||
|
|
||||||
let mut my_info = self.system().get_info();
|
let mut my_info = self.timeline().get_info();
|
||||||
|
|
||||||
/* Check protocol compatibility */
|
/* Check protocol compatibility */
|
||||||
if server_info.protocol_version != SK_PROTOCOL_VERSION {
|
if server_info.protocol_version != SK_PROTOCOL_VERSION {
|
||||||
@@ -662,9 +678,9 @@ impl Connection {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
my_info.server.node_id = prop.node_id;
|
my_info.server.node_id = prop.node_id;
|
||||||
self.system().set_info(&my_info);
|
self.timeline().set_info(&my_info);
|
||||||
/* Need to persist our vote first */
|
/* Need to persist our vote first */
|
||||||
self.system().save_control_file(true)?;
|
self.timeline().save_control_file(true)?;
|
||||||
|
|
||||||
let mut flushed_restart_lsn: XLogRecPtr = 0;
|
let mut flushed_restart_lsn: XLogRecPtr = 0;
|
||||||
let wal_seg_size = server_info.wal_seg_size as usize;
|
let wal_seg_size = server_info.wal_seg_size as usize;
|
||||||
@@ -678,12 +694,13 @@ impl Connection {
|
|||||||
// Add far as replication in postgres is initiated by receiver, we should use callme mechanism
|
// Add far as replication in postgres is initiated by receiver, we should use callme mechanism
|
||||||
if let Err(e) = self.request_callback().await {
|
if let Err(e) = self.request_callback().await {
|
||||||
// Do not treate it as fatal error and continue work
|
// Do not treate it as fatal error and continue work
|
||||||
|
// FIXME: we should retry after a while...
|
||||||
error!("Failed to send callme request to pageserver: {}", e);
|
error!("Failed to send callme request to pageserver: {}", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
"Start streaming from server {} address {:?}",
|
"Start streaming from timeline {} address {:?}",
|
||||||
server_info.system_id,
|
server_info.timeline_id,
|
||||||
self.stream.peer_addr()?
|
self.stream.peer_addr()?
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -705,6 +722,15 @@ impl Connection {
|
|||||||
let rec_size = (end_pos - start_pos) as usize;
|
let rec_size = (end_pos - start_pos) as usize;
|
||||||
assert!(rec_size <= MAX_SEND_SIZE);
|
assert!(rec_size <= MAX_SEND_SIZE);
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"received for {} bytes between {:X}/{:X} and {:X}/{:X}",
|
||||||
|
rec_size,
|
||||||
|
start_pos >> 32,
|
||||||
|
start_pos & 0xffffffff,
|
||||||
|
end_pos >> 32,
|
||||||
|
end_pos & 0xffffffff
|
||||||
|
);
|
||||||
|
|
||||||
/* Receive message body */
|
/* Receive message body */
|
||||||
self.inbuf.resize(rec_size, 0u8);
|
self.inbuf.resize(rec_size, 0u8);
|
||||||
self.stream.read_exact(&mut self.inbuf[0..rec_size]).await?;
|
self.stream.read_exact(&mut self.inbuf[0..rec_size]).await?;
|
||||||
@@ -735,7 +761,7 @@ impl Connection {
|
|||||||
* when restart_lsn delta exceeds WAL segment size.
|
* when restart_lsn delta exceeds WAL segment size.
|
||||||
*/
|
*/
|
||||||
sync_control_file |= flushed_restart_lsn + (wal_seg_size as u64) < my_info.restart_lsn;
|
sync_control_file |= flushed_restart_lsn + (wal_seg_size as u64) < my_info.restart_lsn;
|
||||||
self.system().save_control_file(sync_control_file)?;
|
self.timeline().save_control_file(sync_control_file)?;
|
||||||
|
|
||||||
if sync_control_file {
|
if sync_control_file {
|
||||||
flushed_restart_lsn = my_info.restart_lsn;
|
flushed_restart_lsn = my_info.restart_lsn;
|
||||||
@@ -746,7 +772,7 @@ impl Connection {
|
|||||||
let resp = SafeKeeperResponse {
|
let resp = SafeKeeperResponse {
|
||||||
epoch: my_info.epoch,
|
epoch: my_info.epoch,
|
||||||
flush_lsn: end_pos,
|
flush_lsn: end_pos,
|
||||||
hs_feedback: self.system().get_hs_feedback(),
|
hs_feedback: self.timeline().get_hs_feedback(),
|
||||||
};
|
};
|
||||||
self.start_sending();
|
self.start_sending();
|
||||||
resp.pack(&mut self.outbuf);
|
resp.pack(&mut self.outbuf);
|
||||||
@@ -756,7 +782,7 @@ impl Connection {
|
|||||||
* Ping wal sender that new data is available.
|
* Ping wal sender that new data is available.
|
||||||
* FlushLSN (end_pos) can be smaller than commitLSN in case we are at catching-up safekeeper.
|
* FlushLSN (end_pos) can be smaller than commitLSN in case we are at catching-up safekeeper.
|
||||||
*/
|
*/
|
||||||
self.system()
|
self.timeline()
|
||||||
.notify_wal_senders(min(req.commit_lsn, end_pos));
|
.notify_wal_senders(min(req.commit_lsn, end_pos));
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -807,7 +833,7 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// Send WAL to replica or WAL sender using standard libpq replication protocol
|
// Send WAL to replica or WAL receiver using standard libpq replication protocol
|
||||||
//
|
//
|
||||||
async fn send_wal(&mut self) -> Result<()> {
|
async fn send_wal(&mut self) -> Result<()> {
|
||||||
info!("WAL sender to {:?} is started", self.stream.peer_addr()?);
|
info!("WAL sender to {:?} is started", self.stream.peer_addr()?);
|
||||||
@@ -828,7 +854,7 @@ impl Connection {
|
|||||||
BeMessage::write(&mut self.outbuf, &BeMessage::ReadyForQuery);
|
BeMessage::write(&mut self.outbuf, &BeMessage::ReadyForQuery);
|
||||||
self.send().await?;
|
self.send().await?;
|
||||||
self.init_done = true;
|
self.init_done = true;
|
||||||
self.set_system(m.system_id)?;
|
self.set_timeline(m.timelineid)?;
|
||||||
}
|
}
|
||||||
StartupRequestCode::Cancel => return Ok(()),
|
StartupRequestCode::Cancel => return Ok(()),
|
||||||
}
|
}
|
||||||
@@ -861,7 +887,7 @@ impl Connection {
|
|||||||
let (start_pos, timeline) = self.find_end_of_wal(false);
|
let (start_pos, timeline) = self.find_end_of_wal(false);
|
||||||
let lsn = format!("{:X}/{:>08X}", (start_pos >> 32) as u32, start_pos as u32);
|
let lsn = format!("{:X}/{:>08X}", (start_pos >> 32) as u32, start_pos as u32);
|
||||||
let tli = timeline.to_string();
|
let tli = timeline.to_string();
|
||||||
let sysid = self.system().get_info().server.system_id.to_string();
|
let sysid = self.timeline().get_info().server.system_id.to_string();
|
||||||
let lsn_bytes = lsn.as_bytes();
|
let lsn_bytes = lsn.as_bytes();
|
||||||
let tli_bytes = tli.as_bytes();
|
let tli_bytes = tli.as_bytes();
|
||||||
let sysid_bytes = sysid.as_bytes();
|
let sysid_bytes = sysid.as_bytes();
|
||||||
@@ -893,11 +919,11 @@ impl Connection {
|
|||||||
);
|
);
|
||||||
BeMessage::write(
|
BeMessage::write(
|
||||||
&mut self.outbuf,
|
&mut self.outbuf,
|
||||||
&BeMessage::DataRow(&[Some(lsn_bytes), Some(tli_bytes), Some(sysid_bytes), None]),
|
&BeMessage::DataRow(&[Some(sysid_bytes), Some(tli_bytes), Some(lsn_bytes), None]),
|
||||||
);
|
);
|
||||||
BeMessage::write(
|
BeMessage::write(
|
||||||
&mut self.outbuf,
|
&mut self.outbuf,
|
||||||
&BeMessage::CommandComplete(b"IDENTIFY_SYSTEM"),
|
&BeMessage::CommandComplete(b"IDENTIFY_SYSTEM\0"),
|
||||||
);
|
);
|
||||||
BeMessage::write(&mut self.outbuf, &BeMessage::ReadyForQuery);
|
BeMessage::write(&mut self.outbuf, &BeMessage::ReadyForQuery);
|
||||||
self.send().await?;
|
self.send().await?;
|
||||||
@@ -917,7 +943,7 @@ impl Connection {
|
|||||||
} else {
|
} else {
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
let wal_seg_size = self.system().get_info().server.wal_seg_size as usize;
|
let wal_seg_size = self.timeline().get_info().server.wal_seg_size as usize;
|
||||||
if wal_seg_size == 0 {
|
if wal_seg_size == 0 {
|
||||||
io_error!("Can not start replication before connecting to wal_proposer");
|
io_error!("Can not start replication before connecting to wal_proposer");
|
||||||
}
|
}
|
||||||
@@ -935,15 +961,6 @@ impl Connection {
|
|||||||
BeMessage::write(&mut self.outbuf, &BeMessage::Copy);
|
BeMessage::write(&mut self.outbuf, &BeMessage::Copy);
|
||||||
self.send().await?;
|
self.send().await?;
|
||||||
|
|
||||||
/*
|
|
||||||
* Always start streaming at the beginning of a segment
|
|
||||||
*
|
|
||||||
* FIXME: It is common practice to start streaming at the beginning of
|
|
||||||
* the segment, but it should be up to the client to decide that. We
|
|
||||||
* shouldn't enforce that here.
|
|
||||||
*/
|
|
||||||
start_pos -= XLogSegmentOffset(start_pos, wal_seg_size) as u64;
|
|
||||||
|
|
||||||
let mut end_pos: XLogRecPtr;
|
let mut end_pos: XLogRecPtr;
|
||||||
let mut commit_lsn: XLogRecPtr;
|
let mut commit_lsn: XLogRecPtr;
|
||||||
let mut wal_file: Option<File> = None;
|
let mut wal_file: Option<File> = None;
|
||||||
@@ -960,19 +977,18 @@ impl Connection {
|
|||||||
end_pos = stop_pos;
|
end_pos = stop_pos;
|
||||||
} else {
|
} else {
|
||||||
/* normal mode */
|
/* normal mode */
|
||||||
|
let timeline = self.timeline();
|
||||||
loop {
|
loop {
|
||||||
// Rust doesn't allow to grab async result from mutex scope
|
// Rust doesn't allow to grab async result from mutex scope
|
||||||
let system = self.system();
|
|
||||||
let notified = system.cond.notified();
|
|
||||||
{
|
{
|
||||||
let shared_state = system.mutex.lock().unwrap();
|
let shared_state = timeline.mutex.lock().unwrap();
|
||||||
commit_lsn = shared_state.commit_lsn;
|
commit_lsn = shared_state.commit_lsn;
|
||||||
if start_pos < commit_lsn {
|
if start_pos < commit_lsn {
|
||||||
end_pos = commit_lsn;
|
end_pos = commit_lsn;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
notified.await;
|
timeline.cond.notified().await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if end_pos == END_REPLICATION_MARKER {
|
if end_pos == END_REPLICATION_MARKER {
|
||||||
@@ -983,13 +999,13 @@ impl Connection {
|
|||||||
Ok(0) => break,
|
Ok(0) => break,
|
||||||
Ok(_) => match self.parse_message()? {
|
Ok(_) => match self.parse_message()? {
|
||||||
Some(FeMessage::CopyData(m)) => self
|
Some(FeMessage::CopyData(m)) => self
|
||||||
.system()
|
.timeline()
|
||||||
.add_hs_feedback(HotStandbyFeedback::parse(&m.body)),
|
.add_hs_feedback(HotStandbyFeedback::parse(&m.body)),
|
||||||
_ => {}
|
_ => {}
|
||||||
},
|
},
|
||||||
Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {}
|
Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => {}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
return Err(e.into());
|
return Err(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1004,7 +1020,7 @@ impl Connection {
|
|||||||
let wal_file_path = self
|
let wal_file_path = self
|
||||||
.conf
|
.conf
|
||||||
.data_dir
|
.data_dir
|
||||||
.join(self.system().id.to_string())
|
.join(self.timeline().timelineid.to_string())
|
||||||
.join(wal_file_name.clone() + ".partial");
|
.join(wal_file_name.clone() + ".partial");
|
||||||
if let Ok(opened_file) = File::open(&wal_file_path) {
|
if let Ok(opened_file) = File::open(&wal_file_path) {
|
||||||
file = opened_file;
|
file = opened_file;
|
||||||
@@ -1012,21 +1028,30 @@ impl Connection {
|
|||||||
let wal_file_path = self
|
let wal_file_path = self
|
||||||
.conf
|
.conf
|
||||||
.data_dir
|
.data_dir
|
||||||
.join(self.system().id.to_string())
|
.join(self.timeline().timelineid.to_string())
|
||||||
.join(wal_file_name);
|
.join(wal_file_name);
|
||||||
match File::open(&wal_file_path) {
|
match File::open(&wal_file_path) {
|
||||||
Ok(opened_file) => file = opened_file,
|
Ok(opened_file) => file = opened_file,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Failed to open log file {:?}: {}", &wal_file_path, e);
|
error!("Failed to open log file {:?}: {}", &wal_file_path, e);
|
||||||
return Err(e.into());
|
return Err(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let send_size = min((end_pos - start_pos) as usize, MAX_SEND_SIZE);
|
let xlogoff = XLogSegmentOffset(start_pos, wal_seg_size) as usize;
|
||||||
|
|
||||||
|
// How much to read and send in message? We cannot cross the WAL file
|
||||||
|
// boundary, and we don't want send more than MAX_SEND_SIZE.
|
||||||
|
let send_size = (end_pos - start_pos) as usize;
|
||||||
|
let send_size = min(send_size, wal_seg_size - xlogoff);
|
||||||
|
let send_size = min(send_size, MAX_SEND_SIZE);
|
||||||
|
|
||||||
let msg_size = LIBPQ_HDR_SIZE + XLOG_HDR_SIZE + send_size;
|
let msg_size = LIBPQ_HDR_SIZE + XLOG_HDR_SIZE + send_size;
|
||||||
let data_start = LIBPQ_HDR_SIZE + XLOG_HDR_SIZE;
|
let data_start = LIBPQ_HDR_SIZE + XLOG_HDR_SIZE;
|
||||||
let data_end = data_start + send_size;
|
let data_end = data_start + send_size;
|
||||||
|
|
||||||
|
file.seek(SeekFrom::Start(xlogoff as u64))?;
|
||||||
file.read_exact(&mut self.outbuf[data_start..data_end])?;
|
file.read_exact(&mut self.outbuf[data_start..data_end])?;
|
||||||
self.outbuf[0] = b'd';
|
self.outbuf[0] = b'd';
|
||||||
BigEndian::write_u32(
|
BigEndian::write_u32(
|
||||||
@@ -1041,6 +1066,12 @@ impl Connection {
|
|||||||
self.stream.write_all(&self.outbuf[0..msg_size]).await?;
|
self.stream.write_all(&self.outbuf[0..msg_size]).await?;
|
||||||
start_pos += send_size as u64;
|
start_pos += send_size as u64;
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"Sent WAL to page server up to {:X}/{:>08X}",
|
||||||
|
(end_pos >> 32) as u32,
|
||||||
|
end_pos as u32
|
||||||
|
);
|
||||||
|
|
||||||
if XLogSegmentOffset(start_pos, wal_seg_size) != 0 {
|
if XLogSegmentOffset(start_pos, wal_seg_size) != 0 {
|
||||||
wal_file = Some(file);
|
wal_file = Some(file);
|
||||||
}
|
}
|
||||||
@@ -1095,12 +1126,12 @@ impl Connection {
|
|||||||
let wal_file_path = self
|
let wal_file_path = self
|
||||||
.conf
|
.conf
|
||||||
.data_dir
|
.data_dir
|
||||||
.join(self.system().id.to_string())
|
.join(self.timeline().timelineid.to_string())
|
||||||
.join(wal_file_name.clone());
|
.join(wal_file_name.clone());
|
||||||
let wal_file_partial_path = self
|
let wal_file_partial_path = self
|
||||||
.conf
|
.conf
|
||||||
.data_dir
|
.data_dir
|
||||||
.join(self.system().id.to_string())
|
.join(self.timeline().timelineid.to_string())
|
||||||
.join(wal_file_name.clone() + ".partial");
|
.join(wal_file_name.clone() + ".partial");
|
||||||
|
|
||||||
{
|
{
|
||||||
@@ -1130,7 +1161,7 @@ impl Connection {
|
|||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Failed to open log file {:?}: {}", &wal_file_path, e);
|
error!("Failed to open log file {:?}: {}", &wal_file_path, e);
|
||||||
return Err(e.into());
|
return Err(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1163,7 +1194,7 @@ impl Connection {
|
|||||||
fn find_end_of_wal(&self, precise: bool) -> (XLogRecPtr, TimeLineID) {
|
fn find_end_of_wal(&self, precise: bool) -> (XLogRecPtr, TimeLineID) {
|
||||||
find_end_of_wal(
|
find_end_of_wal(
|
||||||
&self.conf.data_dir,
|
&self.conf.data_dir,
|
||||||
self.system().get_info().server.wal_seg_size as usize,
|
self.timeline().get_info().server.wal_seg_size as usize,
|
||||||
precise,
|
precise,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use log::*;
|
|||||||
use std::cmp::min;
|
use std::cmp::min;
|
||||||
use std::fs::{self, File};
|
use std::fs::{self, File};
|
||||||
use std::io::prelude::*;
|
use std::io::prelude::*;
|
||||||
use std::path::PathBuf;
|
use std::path::{Path, PathBuf};
|
||||||
use std::time::SystemTime;
|
use std::time::SystemTime;
|
||||||
|
|
||||||
pub const XLOG_FNAME_LEN: usize = 24;
|
pub const XLOG_FNAME_LEN: usize = 24;
|
||||||
@@ -89,7 +89,7 @@ pub fn get_current_timestamp() -> TimestampTz {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn find_end_of_wal_segment(
|
fn find_end_of_wal_segment(
|
||||||
data_dir: &PathBuf,
|
data_dir: &Path,
|
||||||
segno: XLogSegNo,
|
segno: XLogSegNo,
|
||||||
tli: TimeLineID,
|
tli: TimeLineID,
|
||||||
wal_seg_size: usize,
|
wal_seg_size: usize,
|
||||||
@@ -185,7 +185,7 @@ fn find_end_of_wal_segment(
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn find_end_of_wal(
|
pub fn find_end_of_wal(
|
||||||
data_dir: &PathBuf,
|
data_dir: &Path,
|
||||||
wal_seg_size: usize,
|
wal_seg_size: usize,
|
||||||
precise: bool,
|
precise: bool,
|
||||||
) -> (XLogRecPtr, TimeLineID) {
|
) -> (XLogRecPtr, TimeLineID) {
|
||||||
|
|||||||
16
zenith/Cargo.toml
Normal file
16
zenith/Cargo.toml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
[package]
|
||||||
|
name = "zenith"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Stas Kelvich <stas@zenith.tech>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
clap = "2.33.0"
|
||||||
|
anyhow = "1.0"
|
||||||
|
|
||||||
|
# FIXME: 'pageserver' is needed for ZTimelineId. Refactor
|
||||||
|
pageserver = { path = "../pageserver" }
|
||||||
|
control_plane = { path = "../control_plane" }
|
||||||
|
postgres_ffi = { path = "../postgres_ffi" }
|
||||||
336
zenith/src/main.rs
Normal file
336
zenith/src/main.rs
Normal file
@@ -0,0 +1,336 @@
|
|||||||
|
use std::fs;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::process::exit;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use anyhow::{anyhow, bail};
|
||||||
|
use clap::{App, Arg, ArgMatches, SubCommand};
|
||||||
|
|
||||||
|
use control_plane::local_env::LocalEnv;
|
||||||
|
use control_plane::storage::PageServerNode;
|
||||||
|
use control_plane::{compute::ComputeControlPlane, local_env, storage};
|
||||||
|
|
||||||
|
use pageserver::ZTimelineId;
|
||||||
|
|
||||||
|
fn zenith_repo_dir() -> PathBuf {
|
||||||
|
// Find repository path
|
||||||
|
match std::env::var_os("ZENITH_REPO_DIR") {
|
||||||
|
Some(val) => PathBuf::from(val.to_str().unwrap()),
|
||||||
|
None => ".zenith".into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Main entry point for the 'zenith' CLI utility
|
||||||
|
//
|
||||||
|
// This utility can used to work with a local zenith repository.
|
||||||
|
// In order to run queries in it, you need to launch the page server,
|
||||||
|
// and a compute node against the page server
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
let name_arg = Arg::with_name("NAME")
|
||||||
|
.short("n")
|
||||||
|
.index(1)
|
||||||
|
.help("name of this postgres instance")
|
||||||
|
.required(true);
|
||||||
|
let matches = App::new("zenith")
|
||||||
|
.about("Zenith CLI")
|
||||||
|
.subcommand(
|
||||||
|
SubCommand::with_name("init")
|
||||||
|
.about("Initialize a new Zenith repository in current directory"),
|
||||||
|
)
|
||||||
|
.subcommand(
|
||||||
|
SubCommand::with_name("branch")
|
||||||
|
.about("Create a new branch")
|
||||||
|
.arg(Arg::with_name("branchname").required(false).index(1))
|
||||||
|
.arg(Arg::with_name("start-point").required(false).index(2)),
|
||||||
|
)
|
||||||
|
.subcommand(
|
||||||
|
SubCommand::with_name("pageserver")
|
||||||
|
.about("Manage pageserver instance")
|
||||||
|
.subcommand(SubCommand::with_name("status"))
|
||||||
|
.subcommand(SubCommand::with_name("start"))
|
||||||
|
.subcommand(SubCommand::with_name("stop")),
|
||||||
|
)
|
||||||
|
.subcommand(
|
||||||
|
SubCommand::with_name("pg")
|
||||||
|
.about("Manage postgres instances")
|
||||||
|
.subcommand(
|
||||||
|
SubCommand::with_name("create")
|
||||||
|
// .arg(name_arg.clone()
|
||||||
|
// .required(false)
|
||||||
|
// .help("name of this postgres instance (will be pgN if omitted)"))
|
||||||
|
.arg(Arg::with_name("timeline").required(false).index(1)),
|
||||||
|
)
|
||||||
|
.subcommand(SubCommand::with_name("list"))
|
||||||
|
.subcommand(SubCommand::with_name("start").arg(name_arg.clone()))
|
||||||
|
.subcommand(SubCommand::with_name("stop").arg(name_arg.clone()))
|
||||||
|
.subcommand(SubCommand::with_name("destroy").arg(name_arg.clone())),
|
||||||
|
)
|
||||||
|
.get_matches();
|
||||||
|
|
||||||
|
// handle init separately and exit
|
||||||
|
if let ("init", Some(sub_args)) = matches.subcommand() {
|
||||||
|
run_init_cmd(sub_args.clone())?;
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// all other commands would need config
|
||||||
|
|
||||||
|
let repopath = PathBuf::from(zenith_repo_dir());
|
||||||
|
if !repopath.exists() {
|
||||||
|
bail!(
|
||||||
|
"Zenith repository does not exists in {}.\n\
|
||||||
|
Set ZENITH_REPO_DIR or initialize a new repository with 'zenith init'",
|
||||||
|
repopath.display()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// TODO: check that it looks like a zenith repository
|
||||||
|
let env = match local_env::load_config(&repopath) {
|
||||||
|
Ok(conf) => conf,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Error loading config from {}: {}", repopath.display(), e);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
match matches.subcommand() {
|
||||||
|
("init", Some(_)) => {
|
||||||
|
panic!() /* Should not happen. Init was handled before */
|
||||||
|
}
|
||||||
|
|
||||||
|
("branch", Some(sub_args)) => run_branch_cmd(&env, sub_args.clone())?,
|
||||||
|
("pageserver", Some(sub_args)) => run_pageserver_cmd(&env, sub_args.clone())?,
|
||||||
|
|
||||||
|
("start", Some(_sub_m)) => {
|
||||||
|
let pageserver = storage::PageServerNode::from_env(&env);
|
||||||
|
|
||||||
|
if let Err(e) = pageserver.start() {
|
||||||
|
eprintln!("pageserver start: {}", e);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
("stop", Some(_sub_m)) => {
|
||||||
|
let pageserver = storage::PageServerNode::from_env(&env);
|
||||||
|
if let Err(e) = pageserver.stop() {
|
||||||
|
eprintln!("pageserver stop: {}", e);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
("status", Some(_sub_m)) => {}
|
||||||
|
|
||||||
|
("pg", Some(pg_match)) => {
|
||||||
|
if let Err(e) = handle_pg(pg_match, &env) {
|
||||||
|
eprintln!("pg operation failed: {}", e);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_pageserver_cmd(local_env: &LocalEnv, args: ArgMatches) -> Result<()> {
|
||||||
|
match args.subcommand() {
|
||||||
|
("status", Some(_sub_m)) => {
|
||||||
|
todo!();
|
||||||
|
}
|
||||||
|
("start", Some(_sub_m)) => {
|
||||||
|
let psnode = PageServerNode::from_env(local_env);
|
||||||
|
psnode.start()?;
|
||||||
|
println!("Page server started");
|
||||||
|
}
|
||||||
|
("stop", Some(_sub_m)) => {
|
||||||
|
todo!();
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// Peek into the repository, to grab the timeline ID of given branch
|
||||||
|
pub fn get_branch_timeline(repopath: &Path, branchname: &str) -> ZTimelineId {
|
||||||
|
let branchpath = repopath.join("refs/branches/".to_owned() + branchname);
|
||||||
|
|
||||||
|
ZTimelineId::from_str(&(fs::read_to_string(&branchpath).unwrap())).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||||
|
let mut cplane = ComputeControlPlane::load(env.clone())?;
|
||||||
|
|
||||||
|
match pg_match.subcommand() {
|
||||||
|
("create", Some(sub_m)) => {
|
||||||
|
// FIXME: cheat and resolve the timeline by peeking into the
|
||||||
|
// repository. In reality, when you're launching a compute node
|
||||||
|
// against a possibly-remote page server, we wouldn't know what
|
||||||
|
// branches exist in the remote repository. Or would we require
|
||||||
|
// that you "zenith fetch" them into a local repoitory first?
|
||||||
|
let timeline_arg = sub_m.value_of("timeline").unwrap_or("main");
|
||||||
|
let timeline = get_branch_timeline(&env.repo_path, timeline_arg);
|
||||||
|
|
||||||
|
println!("Initializing Postgres on timeline {}...", timeline);
|
||||||
|
|
||||||
|
cplane.new_node(timeline)?;
|
||||||
|
}
|
||||||
|
("list", Some(_sub_m)) => {
|
||||||
|
println!("NODE\tADDRESS\tSTATUS");
|
||||||
|
for (node_name, node) in cplane.nodes.iter() {
|
||||||
|
println!("{}\t{}\t{}", node_name, node.address, node.status());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
("start", Some(sub_m)) => {
|
||||||
|
let name = sub_m.value_of("NAME").unwrap();
|
||||||
|
let node = cplane
|
||||||
|
.nodes
|
||||||
|
.get(name)
|
||||||
|
.ok_or(anyhow!("postgres {} is not found", name))?;
|
||||||
|
node.start()?;
|
||||||
|
}
|
||||||
|
("stop", Some(sub_m)) => {
|
||||||
|
let name = sub_m.value_of("NAME").unwrap();
|
||||||
|
let node = cplane
|
||||||
|
.nodes
|
||||||
|
.get(name)
|
||||||
|
.ok_or(anyhow!("postgres {} is not found", name))?;
|
||||||
|
node.stop()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// "zenith init" - Initialize a new Zenith repository in current dir
|
||||||
|
fn run_init_cmd(_args: ArgMatches) -> Result<()> {
|
||||||
|
local_env::init()?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle "zenith branch" subcommand
|
||||||
|
fn run_branch_cmd(local_env: &LocalEnv, args: ArgMatches) -> Result<()> {
|
||||||
|
let repopath = local_env.repo_path.to_str().unwrap();
|
||||||
|
|
||||||
|
if let Some(branchname) = args.value_of("branchname") {
|
||||||
|
if PathBuf::from(format!("{}/refs/branches/{}", repopath, branchname)).exists() {
|
||||||
|
anyhow::bail!("branch {} already exists", branchname);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(startpoint_str) = args.value_of("start-point") {
|
||||||
|
let mut startpoint = parse_point_in_time(startpoint_str)?;
|
||||||
|
|
||||||
|
if startpoint.lsn == 0 {
|
||||||
|
// Find end of WAL on the old timeline
|
||||||
|
let end_of_wal = local_env::find_end_of_wal(local_env, startpoint.timelineid)?;
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"branching at end of WAL: {:X}/{:X}",
|
||||||
|
end_of_wal >> 32,
|
||||||
|
end_of_wal & 0xffffffff
|
||||||
|
);
|
||||||
|
|
||||||
|
startpoint.lsn = end_of_wal;
|
||||||
|
}
|
||||||
|
|
||||||
|
return local_env::create_branch(local_env, branchname, startpoint);
|
||||||
|
} else {
|
||||||
|
panic!("Missing start-point");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No arguments, list branches
|
||||||
|
list_branches()?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn list_branches() -> Result<()> {
|
||||||
|
// list branches
|
||||||
|
let paths = fs::read_dir(zenith_repo_dir().join("refs").join("branches"))?;
|
||||||
|
|
||||||
|
for path in paths {
|
||||||
|
println!(" {}", path?.file_name().to_str().unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Parse user-given string that represents a point-in-time.
|
||||||
|
//
|
||||||
|
// We support multiple variants:
|
||||||
|
//
|
||||||
|
// Raw timeline id in hex, meaning the end of that timeline:
|
||||||
|
// bc62e7d612d0e6fe8f99a6dd2f281f9d
|
||||||
|
//
|
||||||
|
// A specific LSN on a timeline:
|
||||||
|
// bc62e7d612d0e6fe8f99a6dd2f281f9d@2/15D3DD8
|
||||||
|
//
|
||||||
|
// Same, with a human-friendly branch name:
|
||||||
|
// main
|
||||||
|
// main@2/15D3DD8
|
||||||
|
//
|
||||||
|
// Human-friendly tag name:
|
||||||
|
// mytag
|
||||||
|
//
|
||||||
|
//
|
||||||
|
fn parse_point_in_time(s: &str) -> Result<local_env::PointInTime> {
|
||||||
|
let mut strings = s.split("@");
|
||||||
|
let name = strings.next().unwrap();
|
||||||
|
|
||||||
|
let lsn: Option<u64>;
|
||||||
|
if let Some(lsnstr) = strings.next() {
|
||||||
|
let mut s = lsnstr.split("/");
|
||||||
|
let lsn_hi: u64 = s
|
||||||
|
.next()
|
||||||
|
.ok_or(anyhow!("invalid LSN in point-in-time specification"))?
|
||||||
|
.parse()?;
|
||||||
|
let lsn_lo: u64 = s
|
||||||
|
.next()
|
||||||
|
.ok_or(anyhow!("invalid LSN in point-in-time specification"))?
|
||||||
|
.parse()?;
|
||||||
|
lsn = Some(lsn_hi << 32 | lsn_lo);
|
||||||
|
} else {
|
||||||
|
lsn = None
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if it's a tag
|
||||||
|
if lsn.is_none() {
|
||||||
|
let tagpath = zenith_repo_dir().join("refs").join("tags").join(name);
|
||||||
|
if tagpath.exists() {
|
||||||
|
let pointstr = fs::read_to_string(tagpath)?;
|
||||||
|
|
||||||
|
return parse_point_in_time(&pointstr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Check if it's a branch
|
||||||
|
// Check if it's branch @ LSN
|
||||||
|
let branchpath = zenith_repo_dir().join("refs").join("branches").join(name);
|
||||||
|
if branchpath.exists() {
|
||||||
|
let pointstr = fs::read_to_string(branchpath)?;
|
||||||
|
|
||||||
|
let mut result = parse_point_in_time(&pointstr)?;
|
||||||
|
if lsn.is_some() {
|
||||||
|
result.lsn = lsn.unwrap();
|
||||||
|
} else {
|
||||||
|
result.lsn = 0;
|
||||||
|
}
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if it's a timelineid
|
||||||
|
// Check if it's timelineid @ LSN
|
||||||
|
let tlipath = zenith_repo_dir().join("timelines").join(name);
|
||||||
|
if tlipath.exists() {
|
||||||
|
let result = local_env::PointInTime {
|
||||||
|
timelineid: ZTimelineId::from_str(name)?,
|
||||||
|
lsn: lsn.unwrap_or(0),
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
panic!("could not parse point-in-time {}", s);
|
||||||
|
}
|
||||||
7
zenith_utils/Cargo.toml
Normal file
7
zenith_utils/Cargo.toml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
[package]
|
||||||
|
name = "zenith_utils"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Eric Seppanen <eric@zenith.tech>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
2
zenith_utils/src/lib.rs
Normal file
2
zenith_utils/src/lib.rs
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
//! zenith_utils is intended to be a place to put code that is shared
|
||||||
|
//! between other crates in this repository.
|
||||||
Reference in New Issue
Block a user