mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-24 13:50:37 +00:00
Compare commits
1 Commits
netstat-lo
...
ordered-bl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b3de51296a |
@@ -7,7 +7,7 @@ executors:
|
||||
zenith-build-executor:
|
||||
resource_class: xlarge
|
||||
docker:
|
||||
- image: cimg/rust:1.55.0
|
||||
- image: cimg/rust:1.52.1
|
||||
|
||||
jobs:
|
||||
check-codestyle:
|
||||
@@ -110,7 +110,7 @@ jobs:
|
||||
# Require an exact match. While an out of date cache might speed up the build,
|
||||
# there's no way to clean out old packages, so the cache grows every time something
|
||||
# changes.
|
||||
- v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||
- v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||
|
||||
# Build the rust code, including test binaries
|
||||
- run:
|
||||
@@ -128,7 +128,7 @@ jobs:
|
||||
|
||||
- save_cache:
|
||||
name: Save rust cache
|
||||
key: v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||
key: v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||
paths:
|
||||
- ~/.cargo/registry
|
||||
- ~/.cargo/git
|
||||
@@ -182,21 +182,6 @@ jobs:
|
||||
paths:
|
||||
- "*"
|
||||
|
||||
check-python:
|
||||
executor: python/default
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Install pipenv & deps
|
||||
working_directory: test_runner
|
||||
command: |
|
||||
pip install pipenv
|
||||
pipenv install --dev
|
||||
- run:
|
||||
name: Run yapf to ensure code format
|
||||
working_directory: test_runner
|
||||
command: pipenv run yapf --recursive --diff .
|
||||
|
||||
run-pytest:
|
||||
#description: "Run pytest"
|
||||
executor: python/default
|
||||
@@ -256,21 +241,17 @@ jobs:
|
||||
if << parameters.run_in_parallel >>; then
|
||||
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
|
||||
fi;
|
||||
./netstat-script.sh &
|
||||
NS_PID=$!
|
||||
# Run the tests.
|
||||
#
|
||||
# The junit.xml file allows CircleCI to display more fine-grained test information
|
||||
# in its "Tests" tab in the results page.
|
||||
# -s prevents pytest from capturing output, which helps to see
|
||||
# what's going on if the test hangs
|
||||
# --verbose prints name of each test (helpful when there are
|
||||
# multiple tests in one file)
|
||||
# -rA prints summary in the end
|
||||
# -n4 uses four processes to run tests via pytest-xdist
|
||||
# -s is not used to prevent pytest from capturing output, because tests are running
|
||||
# in parallel and logs are mixed between different tests
|
||||
pipenv run pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short --verbose -rA $TEST_SELECTION $EXTRA_PARAMS
|
||||
kill $NS_PID
|
||||
awk '/===/ {if (count) print count; print; count=0; next} {count++} END {print count}' $TEST_OUTPUT/netstat.stdout > $TEST_OUTPUT/netstat_stats.stdout
|
||||
pipenv run pytest --junitxml=$TEST_OUTPUT/junit.xml --tb=short -s --verbose -rA $TEST_SELECTION $EXTRA_PARAMS
|
||||
- run:
|
||||
# CircleCI artifacts are preserved one file at a time, so skipping
|
||||
# this step isn't a good idea. If you want to extract the
|
||||
@@ -279,7 +260,7 @@ jobs:
|
||||
when: always
|
||||
command: |
|
||||
du -sh /tmp/test_output/*
|
||||
find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" -delete
|
||||
find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "wal_acceptor.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" -delete
|
||||
du -sh /tmp/test_output/*
|
||||
- store_artifacts:
|
||||
path: /tmp/test_output
|
||||
@@ -344,7 +325,8 @@ jobs:
|
||||
\"inputs\": {
|
||||
\"ci_job_name\": \"zenith-remote-ci\",
|
||||
\"commit_hash\": \"$CIRCLE_SHA1\",
|
||||
\"remote_repo\": \"$LOCAL_REPO\"
|
||||
\"remote_repo\": \"$LOCAL_REPO\",
|
||||
\"zenith_image_branch\": \"$CIRCLE_BRANCH\"
|
||||
}
|
||||
}"
|
||||
|
||||
@@ -352,7 +334,6 @@ workflows:
|
||||
build_and_test:
|
||||
jobs:
|
||||
- check-codestyle
|
||||
- check-python
|
||||
- build-postgres:
|
||||
name: build-postgres-<< matrix.build_type >>
|
||||
matrix:
|
||||
|
||||
247
Cargo.lock
generated
247
Cargo.lock
generated
@@ -26,21 +26,18 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.42"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "595d3cfa7a60d4555cb5067b99f07142a08ea778de5cf993f7b75c7d8fabc486"
|
||||
|
||||
[[package]]
|
||||
name = "arc-swap"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e906254e445520903e7fc9da4f709886c84ae4bc4ddaf0e093188d66df4dc820"
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.50"
|
||||
@@ -301,7 +298,7 @@ version = "2.33.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
|
||||
dependencies = [
|
||||
"ansi_term 0.11.0",
|
||||
"ansi_term",
|
||||
"atty",
|
||||
"bitflags",
|
||||
"strsim",
|
||||
@@ -390,6 +387,26 @@ dependencies = [
|
||||
"rustc_version",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-mac"
|
||||
version = "0.10.0"
|
||||
@@ -428,6 +445,16 @@ dependencies = [
|
||||
"dirs-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-next"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"dirs-sys-next",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-sys"
|
||||
version = "0.3.6"
|
||||
@@ -439,6 +466,17 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-sys-next"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"redox_users",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dlv-list"
|
||||
version = "0.2.3"
|
||||
@@ -918,15 +956,6 @@ dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1"
|
||||
dependencies = [
|
||||
"regex-automata",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matches"
|
||||
version = "0.1.8"
|
||||
@@ -1191,12 +1220,10 @@ dependencies = [
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"signal-hook",
|
||||
"tar",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"toml",
|
||||
"tracing",
|
||||
"workspace_hack",
|
||||
"zenith_metrics",
|
||||
"zenith_utils",
|
||||
@@ -1504,15 +1531,6 @@ dependencies = [
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
dependencies = [
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.25"
|
||||
@@ -1671,6 +1689,12 @@ dependencies = [
|
||||
"webpki",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61b3909d758bb75c79f23d4736fac9433868679d3ad2ea7a61e3c25cfda9a088"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.5"
|
||||
@@ -1828,32 +1852,12 @@ dependencies = [
|
||||
"opaque-debug",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sharded-slab"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "740223c51853f3145fe7c90360d2d4232f2b62e3449489c207eccde818979982"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42a568c8f2cd051a4d283bd6eb0343ac214c1b0f1ac19f93e1175b2dee38c73d"
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook"
|
||||
version = "0.3.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c98891d737e271a2954825ef19e46bd16bdb98e2746f2eec4f7a4ef7946efd1"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"signal-hook-registry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.4.0"
|
||||
@@ -1886,6 +1890,59 @@ version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f173ac3d1a7e3b28003f40de0b5ce7fe2710f9b9dc3fc38664cebee46b3b6527"
|
||||
|
||||
[[package]]
|
||||
name = "slog"
|
||||
version = "2.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8347046d4ebd943127157b94d63abb990fcf729dc4e9978927fdf4ac3c998d06"
|
||||
|
||||
[[package]]
|
||||
name = "slog-async"
|
||||
version = "2.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c60813879f820c85dbc4eabf3269befe374591289019775898d56a81a804fbdc"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"slog",
|
||||
"take_mut",
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slog-scope"
|
||||
version = "4.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f95a4b4c3274cd2869549da82b57ccc930859bdbf5bcea0424bc5f140b3c786"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"lazy_static",
|
||||
"slog",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slog-stdlog"
|
||||
version = "4.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8228ab7302adbf4fcb37e66f3cda78003feb521e7fd9e3847ec117a7784d0f5a"
|
||||
dependencies = [
|
||||
"log",
|
||||
"slog",
|
||||
"slog-scope",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slog-term"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95c1e7e5aab61ced6006149ea772770b84a0d16ce0f7885def313e4829946d76"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"chrono",
|
||||
"slog",
|
||||
"term",
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.6.1"
|
||||
@@ -1941,6 +1998,12 @@ dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "take_mut"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60"
|
||||
|
||||
[[package]]
|
||||
name = "tap"
|
||||
version = "1.0.1"
|
||||
@@ -1972,6 +2035,17 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "term"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f"
|
||||
dependencies = [
|
||||
"dirs-next",
|
||||
"rustversion",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.1.2"
|
||||
@@ -2149,79 +2223,24 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"
|
||||
|
||||
[[package]]
|
||||
name = "tracing"
|
||||
version = "0.1.29"
|
||||
version = "0.1.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105"
|
||||
checksum = "09adeb8c97449311ccd28a427f96fb563e7fd31aabf994189879d9da2394b89d"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"pin-project-lite",
|
||||
"tracing-attributes",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-attributes"
|
||||
version = "0.1.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-core"
|
||||
version = "0.1.21"
|
||||
version = "0.1.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4"
|
||||
checksum = "a9ff14f98b1a4b289c6248a023c1c2fa1491062964e9fed67ab29c4e4da4a052"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-log"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"log",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-serde"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb65ea441fbb84f9f6748fd496cf7f63ec9af5bca94dd86456978d055e8eb28b"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e0d2eaa99c3c2e41547cfa109e910a68ea03823cccad4a0525dcbc9b01e8c71"
|
||||
dependencies = [
|
||||
"ansi_term 0.12.1",
|
||||
"chrono",
|
||||
"lazy_static",
|
||||
"matchers",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sharded-slab",
|
||||
"smallvec",
|
||||
"thread_local",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
"tracing-serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "try-lock"
|
||||
version = "0.2.3"
|
||||
@@ -2326,7 +2345,6 @@ dependencies = [
|
||||
"fs2",
|
||||
"hex",
|
||||
"humantime",
|
||||
"hyper",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"pageserver",
|
||||
@@ -2334,7 +2352,6 @@ dependencies = [
|
||||
"postgres-protocol",
|
||||
"postgres_ffi",
|
||||
"regex",
|
||||
"routerify",
|
||||
"rust-s3",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -2588,12 +2605,14 @@ dependencies = [
|
||||
"rustls-split",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"slog",
|
||||
"slog-async",
|
||||
"slog-scope",
|
||||
"slog-stdlog",
|
||||
"slog-term",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"tracing-log",
|
||||
"tracing-subscriber",
|
||||
"webpki",
|
||||
"workspace_hack",
|
||||
"zenith_metrics",
|
||||
|
||||
@@ -37,7 +37,7 @@ RUN apt-get update && apt-get -yq install libreadline-dev libseccomp-dev openssl
|
||||
mkdir zenith_install
|
||||
|
||||
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/wal_acceptor /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/proxy /usr/local/bin
|
||||
COPY --from=pg-build /zenith/tmp_install postgres_install
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
|
||||
@@ -81,7 +81,7 @@ FROM alpine:3.13
|
||||
RUN apk add --update openssl build-base libseccomp-dev
|
||||
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb
|
||||
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/wal_acceptor /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/proxy /usr/local/bin
|
||||
COPY --from=pg-build /zenith/tmp_install /usr/local
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
|
||||
46
Makefile
46
Makefile
@@ -10,43 +10,32 @@ endif
|
||||
# We differentiate between release / debug build types using the BUILD_TYPE
|
||||
# environment variable.
|
||||
#
|
||||
BUILD_TYPE ?= debug
|
||||
ifeq ($(BUILD_TYPE),release)
|
||||
PG_CONFIGURE_OPTS = --enable-debug
|
||||
PG_CFLAGS = -O2 -g3 $(CFLAGS)
|
||||
# Unfortunately, `--profile=...` is a nightly feature
|
||||
CARGO_BUILD_FLAGS += --release
|
||||
else ifeq ($(BUILD_TYPE),debug)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --enable-cassert --enable-depend
|
||||
PG_CFLAGS = -O0 -g3 $(CFLAGS)
|
||||
PG_CFLAGS = -O2 -g3 ${CFLAGS}
|
||||
else
|
||||
$(error Bad build type `$(BUILD_TYPE)', see Makefile for options)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --enable-cassert --enable-depend
|
||||
PG_CFLAGS = -O0 -g3 ${CFLAGS}
|
||||
endif
|
||||
|
||||
# Choose whether we should be silent or verbose
|
||||
CARGO_BUILD_FLAGS += --$(if $(filter s,$(MAKEFLAGS)),quiet,verbose)
|
||||
# Fix for a corner case when make doesn't pass a jobserver
|
||||
CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
|
||||
|
||||
# This option has a side effect of passing make jobserver to cargo.
|
||||
# However, we shouldn't do this if `make -n` (--dry-run) has been asked.
|
||||
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
|
||||
# Force cargo not to print progress bar
|
||||
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
|
||||
|
||||
#
|
||||
# Top level Makefile to build Zenith and PostgreSQL
|
||||
#
|
||||
.PHONY: all
|
||||
all: zenith postgres
|
||||
|
||||
# We don't want to run 'cargo build' in parallel with the postgres build,
|
||||
# because interleaving cargo build output with postgres build output looks
|
||||
# confusing. Also, 'cargo build' is parallel on its own, so it would be too
|
||||
# much parallelism. (Recursive invocation of postgres target still gets any
|
||||
# '-j' flag from the command line, so 'make -j' is still useful.)
|
||||
.NOTPARALLEL:
|
||||
|
||||
### Zenith Rust bits
|
||||
#
|
||||
# The 'postgres_ffi' depends on the Postgres headers.
|
||||
.PHONY: zenith
|
||||
zenith: postgres-headers
|
||||
+@echo "Compiling Zenith"
|
||||
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
|
||||
cargo build
|
||||
|
||||
### PostgreSQL parts
|
||||
tmp_install/build/config.status:
|
||||
@@ -68,10 +57,10 @@ postgres-headers: postgres-configure
|
||||
+@echo "Installing PostgreSQL headers"
|
||||
$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install
|
||||
|
||||
|
||||
# Compile and install PostgreSQL and contrib/zenith
|
||||
.PHONY: postgres
|
||||
postgres: postgres-configure \
|
||||
postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
|
||||
postgres: postgres-configure
|
||||
+@echo "Compiling PostgreSQL"
|
||||
$(MAKE) -C tmp_install/build MAKELEVEL=0 install
|
||||
+@echo "Compiling contrib/zenith"
|
||||
@@ -79,21 +68,18 @@ postgres: postgres-configure \
|
||||
+@echo "Compiling contrib/zenith_test_utils"
|
||||
$(MAKE) -C tmp_install/build/contrib/zenith_test_utils install
|
||||
|
||||
.PHONY: postgres-clean
|
||||
postgres-clean:
|
||||
$(MAKE) -C tmp_install/build MAKELEVEL=0 clean
|
||||
|
||||
# This doesn't remove the effects of 'configure'.
|
||||
.PHONY: clean
|
||||
clean:
|
||||
cd tmp_install/build && $(MAKE) clean
|
||||
$(CARGO_CMD_PREFIX) cargo clean
|
||||
cd tmp_install/build && ${MAKE} clean
|
||||
cargo clean
|
||||
|
||||
# This removes everything
|
||||
.PHONY: distclean
|
||||
distclean:
|
||||
rm -rf tmp_install
|
||||
$(CARGO_CMD_PREFIX) cargo clean
|
||||
cargo clean
|
||||
|
||||
.PHONY: fmt
|
||||
fmt:
|
||||
|
||||
@@ -28,7 +28,7 @@ apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libsec
|
||||
libssl-dev clang pkg-config libpq-dev
|
||||
```
|
||||
|
||||
[Rust] 1.55 or later is also required.
|
||||
[Rust] 1.52 or later is also required.
|
||||
|
||||
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
|
||||
|
||||
|
||||
@@ -84,53 +84,25 @@ impl ComputeControlPlane {
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: see also parse_point_in_time in branches.rs.
|
||||
fn parse_point_in_time(
|
||||
&self,
|
||||
tenantid: ZTenantId,
|
||||
s: &str,
|
||||
) -> Result<(ZTimelineId, Option<Lsn>)> {
|
||||
let mut strings = s.split('@');
|
||||
let name = strings.next().unwrap();
|
||||
|
||||
let lsn: Option<Lsn>;
|
||||
if let Some(lsnstr) = strings.next() {
|
||||
lsn = Some(
|
||||
Lsn::from_str(lsnstr)
|
||||
.with_context(|| "invalid LSN in point-in-time specification")?,
|
||||
);
|
||||
} else {
|
||||
lsn = None
|
||||
}
|
||||
|
||||
// Resolve the timeline ID, given the human-readable branch name
|
||||
let timeline_id = self
|
||||
.pageserver
|
||||
.branch_get_by_name(&tenantid, name)?
|
||||
.timeline_id;
|
||||
|
||||
Ok((timeline_id, lsn))
|
||||
}
|
||||
|
||||
pub fn new_node(
|
||||
&mut self,
|
||||
tenantid: ZTenantId,
|
||||
name: &str,
|
||||
timeline_spec: &str,
|
||||
branch_name: &str,
|
||||
port: Option<u16>,
|
||||
) -> Result<Arc<PostgresNode>> {
|
||||
// Resolve the human-readable timeline spec into timeline ID and LSN
|
||||
let (timelineid, lsn) = self.parse_point_in_time(tenantid, timeline_spec)?;
|
||||
let timeline_id = self
|
||||
.pageserver
|
||||
.branch_get_by_name(&tenantid, branch_name)?
|
||||
.timeline_id;
|
||||
|
||||
let port = port.unwrap_or_else(|| self.get_port());
|
||||
let node = Arc::new(PostgresNode {
|
||||
name: name.to_owned(),
|
||||
name: branch_name.to_owned(),
|
||||
address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
|
||||
env: self.env.clone(),
|
||||
pageserver: Arc::clone(&self.pageserver),
|
||||
is_test: false,
|
||||
timelineid,
|
||||
lsn,
|
||||
timelineid: timeline_id,
|
||||
tenantid,
|
||||
uses_wal_proposer: false,
|
||||
});
|
||||
@@ -155,7 +127,6 @@ pub struct PostgresNode {
|
||||
pageserver: Arc<PageServerNode>,
|
||||
is_test: bool,
|
||||
pub timelineid: ZTimelineId,
|
||||
pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
|
||||
pub tenantid: ZTenantId,
|
||||
uses_wal_proposer: bool,
|
||||
}
|
||||
@@ -190,11 +161,8 @@ impl PostgresNode {
|
||||
let port: u16 = conf.parse_field("port", &context)?;
|
||||
let timelineid: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?;
|
||||
let tenantid: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?;
|
||||
let uses_wal_proposer = conf.get("wal_acceptors").is_some();
|
||||
|
||||
// parse recovery_target_lsn, if any
|
||||
let recovery_target_lsn: Option<Lsn> =
|
||||
conf.parse_field_optional("recovery_target_lsn", &context)?;
|
||||
let uses_wal_proposer = conf.get("wal_acceptors").is_some();
|
||||
|
||||
// ok now
|
||||
Ok(PostgresNode {
|
||||
@@ -204,7 +172,6 @@ impl PostgresNode {
|
||||
pageserver: Arc::clone(pageserver),
|
||||
is_test: false,
|
||||
timelineid,
|
||||
lsn: recovery_target_lsn,
|
||||
tenantid,
|
||||
uses_wal_proposer,
|
||||
})
|
||||
@@ -266,7 +233,7 @@ impl PostgresNode {
|
||||
// Read the archive directly from the `CopyOutReader`
|
||||
tar::Archive::new(copyreader)
|
||||
.unpack(&self.pgdata())
|
||||
.with_context(|| "extracting base backup failed")?;
|
||||
.with_context(|| "extracting page backup failed")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -334,9 +301,6 @@ impl PostgresNode {
|
||||
conf.append("zenith.page_server_connstring", &pageserver_connstr);
|
||||
conf.append("zenith.zenith_tenant", &self.tenantid.to_string());
|
||||
conf.append("zenith.zenith_timeline", &self.timelineid.to_string());
|
||||
if let Some(lsn) = self.lsn {
|
||||
conf.append("recovery_target_lsn", &lsn.to_string());
|
||||
}
|
||||
conf.append_line("");
|
||||
|
||||
// Configure the node to stream WAL directly to the pageserver
|
||||
@@ -350,9 +314,7 @@ impl PostgresNode {
|
||||
}
|
||||
|
||||
fn load_basebackup(&self) -> Result<()> {
|
||||
let backup_lsn = if let Some(lsn) = self.lsn {
|
||||
Some(lsn)
|
||||
} else if self.uses_wal_proposer {
|
||||
let lsn = if self.uses_wal_proposer {
|
||||
// LSN 0 means that it is bootstrap and we need to download just
|
||||
// latest data from the pageserver. That is a bit clumsy but whole bootstrap
|
||||
// procedure evolves quite actively right now, so let's think about it again
|
||||
@@ -367,7 +329,7 @@ impl PostgresNode {
|
||||
None
|
||||
};
|
||||
|
||||
self.do_basebackup(backup_lsn)?;
|
||||
self.do_basebackup(lsn)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -444,10 +406,6 @@ impl PostgresNode {
|
||||
// 3. Load basebackup
|
||||
self.load_basebackup()?;
|
||||
|
||||
if self.lsn.is_some() {
|
||||
File::create(self.pgdata().join("standby.signal"))?;
|
||||
}
|
||||
|
||||
// 4. Finally start the compute node postgres
|
||||
println!("Starting postgres node at '{}'", self.connstr());
|
||||
self.pg_ctl(&["start"], auth_token)
|
||||
|
||||
@@ -83,22 +83,6 @@ impl PostgresConf {
|
||||
.with_context(|| format!("could not parse '{}' option {}", field_name, context))
|
||||
}
|
||||
|
||||
pub fn parse_field_optional<T>(&self, field_name: &str, context: &str) -> Result<Option<T>>
|
||||
where
|
||||
T: FromStr,
|
||||
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
|
||||
{
|
||||
if let Some(val) = self.get(field_name) {
|
||||
let result = val
|
||||
.parse::<T>()
|
||||
.with_context(|| format!("could not parse '{}' option {}", field_name, context))?;
|
||||
|
||||
Ok(Some(result))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Note: if you call this multiple times for the same option, the config
|
||||
/// file will a line for each call. It would be nice to have a function
|
||||
|
||||
@@ -199,45 +199,23 @@ impl PageServerNode {
|
||||
bail!("pageserver failed to start in {} seconds", RETRIES);
|
||||
}
|
||||
|
||||
pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
|
||||
pub fn stop(&self) -> anyhow::Result<()> {
|
||||
let pid = read_pidfile(&self.pid_file())?;
|
||||
let pid = Pid::from_raw(pid);
|
||||
if immediate {
|
||||
println!("Stop pageserver immediately");
|
||||
if kill(pid, Signal::SIGQUIT).is_err() {
|
||||
bail!("Failed to kill pageserver with pid {}", pid);
|
||||
}
|
||||
} else {
|
||||
println!("Stop pageserver gracefully");
|
||||
if kill(pid, Signal::SIGTERM).is_err() {
|
||||
bail!("Failed to stop pageserver with pid {}", pid);
|
||||
}
|
||||
if kill(pid, Signal::SIGTERM).is_err() {
|
||||
bail!("Failed to kill pageserver with pid {}", pid);
|
||||
}
|
||||
|
||||
// wait for pageserver stop
|
||||
let address = connection_address(&self.pg_connection_config);
|
||||
|
||||
// TODO Remove this "timeout" and handle it on caller side instead.
|
||||
// Shutting down may take a long time,
|
||||
// if pageserver checkpoints a lot of data
|
||||
for _ in 0..100 {
|
||||
if let Err(_e) = TcpStream::connect(&address) {
|
||||
println!("Pageserver stopped receiving connections");
|
||||
|
||||
//Now check status
|
||||
match self.check_status() {
|
||||
Ok(_) => {
|
||||
println!("Pageserver status is OK. Wait a bit.");
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
Err(err) => {
|
||||
println!("Pageserver status is: {}", err);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("Pageserver still receives connections");
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
for _ in 0..5 {
|
||||
let stream = TcpStream::connect(&address);
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
if let Err(_e) = stream {
|
||||
println!("Pageserver stopped");
|
||||
return Ok(());
|
||||
}
|
||||
println!("Stopping pageserver on {}", address);
|
||||
}
|
||||
|
||||
bail!("Failed to stop pageserver with pid {}", pid);
|
||||
@@ -335,9 +313,8 @@ impl PageServerNode {
|
||||
|
||||
impl Drop for PageServerNode {
|
||||
fn drop(&mut self) {
|
||||
// TODO Looks like this flag is never set
|
||||
if self.kill_on_exit {
|
||||
let _ = self.stop(true);
|
||||
let _ = self.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ if [ "$1" = 'pageserver' ]; then
|
||||
pageserver --init -D /data --postgres-distrib /usr/local
|
||||
fi
|
||||
echo "Staring pageserver at 0.0.0.0:6400"
|
||||
pageserver -l 0.0.0.0:6400 --listen-http 0.0.0.0:9898 -D /data
|
||||
pageserver -l 0.0.0.0:6400 -D /data
|
||||
else
|
||||
"$@"
|
||||
fi
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
Currently we build two main images:
|
||||
|
||||
- [zenithdb/zenith](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [zenithdb/zenith](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `wal_acceptor` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [zenithdb/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [zenithdb/postgres](https://github.com/zenithdb/postgres).
|
||||
|
||||
And two intermediate images used either to reduce build time or to deliver some additional binary tools from other repos:
|
||||
|
||||
@@ -56,4 +56,4 @@ Tenant id is passed to postgres via GUC the same way as the timeline. Tenant id
|
||||
|
||||
### Safety
|
||||
|
||||
For now particular tenant can only appear on a particular pageserver. Set of safekeepers are also pinned to particular (tenantid, timeline) pair so there can only be one writer for particular (tenantid, timeline).
|
||||
For now particular tenant can only appear on a particular pageserver. Set of WAL acceptors are also pinned to particular (tenantid, timeline) pair so there can only be one writer for particular (tenantid, timeline).
|
||||
|
||||
@@ -17,7 +17,7 @@ lazy_static = "1.4.0"
|
||||
log = "0.4.14"
|
||||
clap = "2.33.0"
|
||||
daemonize = "0.4.1"
|
||||
tokio = { version = "1.11", features = ["process", "macros", "fs", "rt"] }
|
||||
tokio = { version = "1.11", features = ["process", "macros", "fs"] }
|
||||
postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
@@ -35,8 +35,6 @@ scopeguard = "1.1.0"
|
||||
rust-s3 = { version = "0.27.0-rc4", features = ["no-verify-ssl"] }
|
||||
async-trait = "0.1"
|
||||
const_format = "0.2.21"
|
||||
tracing = "0.1.27"
|
||||
signal-hook = {version = "0.3.10", features = ["extended-siginfo"] }
|
||||
|
||||
postgres_ffi = { path = "../postgres_ffi" }
|
||||
zenith_metrics = { path = "../zenith_metrics" }
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
use anyhow::Result;
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use log::*;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::sync::Arc;
|
||||
@@ -32,7 +31,7 @@ use zenith_utils::lsn::Lsn;
|
||||
pub struct Basebackup<'a> {
|
||||
ar: Builder<&'a mut dyn Write>,
|
||||
timeline: &'a Arc<dyn Timeline>,
|
||||
pub lsn: Lsn,
|
||||
lsn: Lsn,
|
||||
prev_record_lsn: Lsn,
|
||||
}
|
||||
|
||||
@@ -84,7 +83,7 @@ impl<'a> Basebackup<'a> {
|
||||
|
||||
info!(
|
||||
"taking basebackup lsn={}, prev_lsn={}",
|
||||
backup_lsn, backup_prev
|
||||
backup_prev, backup_lsn
|
||||
);
|
||||
|
||||
Ok(Basebackup {
|
||||
@@ -98,6 +97,7 @@ impl<'a> Basebackup<'a> {
|
||||
pub fn send_tarball(&mut self) -> anyhow::Result<()> {
|
||||
// Create pgdata subdirs structure
|
||||
for dir in pg_constants::PGDATA_SUBDIRS.iter() {
|
||||
info!("send subdir {:?}", *dir);
|
||||
let header = new_tar_header_dir(*dir)?;
|
||||
self.ar.append(&header, &mut io::empty())?;
|
||||
}
|
||||
@@ -249,7 +249,13 @@ impl<'a> Basebackup<'a> {
|
||||
let mut pg_control = ControlFileData::decode(&pg_control_bytes)?;
|
||||
let mut checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
|
||||
|
||||
// Generate new pg_control needed for bootstrap
|
||||
// Generate new pg_control and WAL needed for bootstrap
|
||||
let checkpoint_segno = self.lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE);
|
||||
let checkpoint_lsn = XLogSegNoOffsetToRecPtr(
|
||||
checkpoint_segno,
|
||||
XLOG_SIZE_OF_XLOG_LONG_PHD as u32,
|
||||
pg_constants::WAL_SEGMENT_SIZE,
|
||||
);
|
||||
checkpoint.redo = normalize_lsn(self.lsn, pg_constants::WAL_SEGMENT_SIZE).0;
|
||||
|
||||
//reset some fields we don't want to preserve
|
||||
@@ -258,24 +264,19 @@ impl<'a> Basebackup<'a> {
|
||||
checkpoint.oldestActiveXid = 0;
|
||||
|
||||
//save new values in pg_control
|
||||
pg_control.checkPoint = 0;
|
||||
pg_control.checkPoint = checkpoint_lsn;
|
||||
pg_control.checkPointCopy = checkpoint;
|
||||
pg_control.state = pg_constants::DB_SHUTDOWNED;
|
||||
|
||||
// add zenith.signal file
|
||||
let mut zenith_signal = String::new();
|
||||
if self.prev_record_lsn == Lsn(0) {
|
||||
if self.lsn == self.timeline.get_ancestor_lsn() {
|
||||
write!(zenith_signal, "PREV LSN: none")?;
|
||||
} else {
|
||||
write!(zenith_signal, "PREV LSN: invalid")?;
|
||||
}
|
||||
let xl_prev = if self.prev_record_lsn == Lsn(0) {
|
||||
0xBAD0 // magic value to indicate that we don't know prev_lsn
|
||||
} else {
|
||||
write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)?;
|
||||
}
|
||||
self.prev_record_lsn.0
|
||||
};
|
||||
self.ar.append(
|
||||
&new_tar_header("zenith.signal", zenith_signal.len() as u64)?,
|
||||
zenith_signal.as_bytes(),
|
||||
&new_tar_header("zenith.signal", 8)?,
|
||||
&xl_prev.to_le_bytes()[..],
|
||||
)?;
|
||||
|
||||
//send pg_control
|
||||
@@ -284,15 +285,14 @@ impl<'a> Basebackup<'a> {
|
||||
self.ar.append(&header, &pg_control_bytes[..])?;
|
||||
|
||||
//send wal segment
|
||||
let segno = self.lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE);
|
||||
let wal_file_name = XLogFileName(
|
||||
1, // FIXME: always use Postgres timeline 1
|
||||
segno,
|
||||
checkpoint_segno,
|
||||
pg_constants::WAL_SEGMENT_SIZE,
|
||||
);
|
||||
let wal_file_path = format!("pg_wal/{}", wal_file_name);
|
||||
let header = new_tar_header(&wal_file_path, pg_constants::WAL_SEGMENT_SIZE as u64)?;
|
||||
let wal_seg = generate_wal_segment(segno, pg_control.system_identifier);
|
||||
let wal_seg = generate_wal_segment(&pg_control);
|
||||
assert!(wal_seg.len() == pg_constants::WAL_SEGMENT_SIZE);
|
||||
self.ar.append(&header, &wal_seg[..])?;
|
||||
Ok(())
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
// Main entry point for the Page Server executable
|
||||
//
|
||||
|
||||
use log::*;
|
||||
use pageserver::defaults::*;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
env,
|
||||
@@ -10,33 +12,27 @@ use std::{
|
||||
str::FromStr,
|
||||
thread,
|
||||
};
|
||||
use tracing::*;
|
||||
use zenith_utils::{auth::JwtAuth, logging, postgres_backend::AuthType};
|
||||
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use signal_hook::consts::signal::*;
|
||||
use signal_hook::consts::TERM_SIGNALS;
|
||||
use signal_hook::flag;
|
||||
use signal_hook::iterator::exfiltrator::WithOrigin;
|
||||
use signal_hook::iterator::SignalsInfo;
|
||||
use std::process::exit;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
|
||||
use clap::{App, Arg, ArgMatches};
|
||||
use daemonize::Daemonize;
|
||||
|
||||
use pageserver::{
|
||||
branches, defaults::*, http, page_service, relish_storage, tenant_mgr, PageServerConf,
|
||||
RelishStorageConfig, RelishStorageKind, S3Config, LOG_FILE_NAME,
|
||||
branches,
|
||||
defaults::{
|
||||
DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_PG_LISTEN_ADDR,
|
||||
DEFAULT_RELISH_STORAGE_MAX_CONCURRENT_SYNC_LIMITS,
|
||||
},
|
||||
http, page_service, relish_storage, tenant_mgr, PageServerConf, RelishStorageConfig,
|
||||
RelishStorageKind, S3Config, LOG_FILE_NAME,
|
||||
};
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::postgres_backend;
|
||||
|
||||
use const_format::formatcp;
|
||||
|
||||
/// String arguments that can be declared via CLI or config file
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct CfgFileParams {
|
||||
listen_pg_addr: Option<String>,
|
||||
listen_http_addr: Option<String>,
|
||||
@@ -47,21 +43,12 @@ struct CfgFileParams {
|
||||
pg_distrib_dir: Option<String>,
|
||||
auth_validation_public_key_path: Option<String>,
|
||||
auth_type: Option<String>,
|
||||
relish_storage_max_concurrent_sync: Option<String>,
|
||||
/////////////////////////////////
|
||||
//// Don't put `Option<String>` and other "simple" values below.
|
||||
////
|
||||
/// `Option<RelishStorage>` is a <a href='https://toml.io/en/v1.0.0#table'>table</a> in TOML.
|
||||
/// Values in TOML cannot be defined after tables (other tables can),
|
||||
/// and [`toml`] crate serializes all fields in the order of their appearance.
|
||||
////////////////////////////////
|
||||
// see https://github.com/alexcrichton/toml-rs/blob/6c162e6562c3e432bf04c82a3d1d789d80761a86/examples/enum_external.rs for enum deserialisation examples
|
||||
relish_storage: Option<RelishStorage>,
|
||||
relish_storage_max_concurrent_sync: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)]
|
||||
// Without this attribute, enums with values won't be serialized by the `toml` library (but can be deserialized nonetheless!).
|
||||
// See https://github.com/alexcrichton/toml-rs/blob/6c162e6562c3e432bf04c82a3d1d789d80761a86/examples/enum_external.rs for the examples
|
||||
#[serde(untagged)]
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
enum RelishStorage {
|
||||
Local {
|
||||
local_path: String,
|
||||
@@ -460,18 +447,7 @@ fn main() -> Result<()> {
|
||||
|
||||
fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
|
||||
// Initialize logger
|
||||
let log_file = logging::init(LOG_FILE_NAME, conf.daemonize)?;
|
||||
|
||||
let term_now = Arc::new(AtomicBool::new(false));
|
||||
for sig in TERM_SIGNALS {
|
||||
// When terminated by a second term signal, exit with exit code 1.
|
||||
// This will do nothing the first time (because term_now is false).
|
||||
flag::register_conditional_shutdown(*sig, 1, Arc::clone(&term_now))?;
|
||||
// But this will "arm" the above for the second time, by setting it to true.
|
||||
// The order of registering these is important, if you put this one first, it will
|
||||
// first arm and then terminate ‒ all in the first round.
|
||||
flag::register(*sig, Arc::clone(&term_now))?;
|
||||
}
|
||||
let (_scope_guard, log_file) = logging::init(LOG_FILE_NAME, conf.daemonize)?;
|
||||
|
||||
// TODO: Check that it looks like a valid repository before going further
|
||||
|
||||
@@ -504,7 +480,7 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
|
||||
|
||||
match daemonize.start() {
|
||||
Ok(_) => info!("Success, daemonized"),
|
||||
Err(err) => error!(%err, "could not daemonize"),
|
||||
Err(e) => error!("could not daemonize: {:#}", e),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -549,173 +525,13 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> {
|
||||
page_service::thread_main(conf, auth, pageserver_listener, conf.auth_type)
|
||||
})?;
|
||||
|
||||
for info in SignalsInfo::<WithOrigin>::new(TERM_SIGNALS)?.into_iter() {
|
||||
match info.signal {
|
||||
SIGQUIT => {
|
||||
info!("Got SIGQUIT. Terminate pageserver in immediate shutdown mode");
|
||||
exit(111);
|
||||
}
|
||||
SIGINT | SIGTERM => {
|
||||
info!("Got SIGINT/SIGTERM. Terminate gracefully in fast shutdown mode");
|
||||
// Terminate postgres backends
|
||||
postgres_backend::set_pgbackend_shutdown_requested();
|
||||
// Stop all tenants and flush their data
|
||||
tenant_mgr::shutdown_all_tenants()?;
|
||||
// Wait for pageservice thread to complete the job
|
||||
page_service_thread
|
||||
.join()
|
||||
.expect("thread panicked")
|
||||
.expect("thread exited with an error");
|
||||
join_handles.push(page_service_thread);
|
||||
|
||||
// Shut down http router
|
||||
endpoint::shutdown();
|
||||
|
||||
// Wait for all threads
|
||||
for handle in join_handles.into_iter() {
|
||||
handle
|
||||
.join()
|
||||
.expect("thread panicked")
|
||||
.expect("thread exited with an error");
|
||||
}
|
||||
info!("Pageserver shut down successfully completed");
|
||||
exit(0);
|
||||
}
|
||||
unknown_signal => {
|
||||
debug!("Unknown signal {}", unknown_signal);
|
||||
}
|
||||
}
|
||||
for handle in join_handles.into_iter() {
|
||||
handle
|
||||
.join()
|
||||
.expect("thread panicked")
|
||||
.expect("thread exited with an error")
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn page_server_conf_toml_serde() {
|
||||
let params = CfgFileParams {
|
||||
listen_pg_addr: Some("listen_pg_addr_VALUE".to_string()),
|
||||
listen_http_addr: Some("listen_http_addr_VALUE".to_string()),
|
||||
checkpoint_distance: Some("checkpoint_distance_VALUE".to_string()),
|
||||
checkpoint_period: Some("checkpoint_period_VALUE".to_string()),
|
||||
gc_horizon: Some("gc_horizon_VALUE".to_string()),
|
||||
gc_period: Some("gc_period_VALUE".to_string()),
|
||||
pg_distrib_dir: Some("pg_distrib_dir_VALUE".to_string()),
|
||||
auth_validation_public_key_path: Some(
|
||||
"auth_validation_public_key_path_VALUE".to_string(),
|
||||
),
|
||||
auth_type: Some("auth_type_VALUE".to_string()),
|
||||
relish_storage: Some(RelishStorage::Local {
|
||||
local_path: "relish_storage_local_VALUE".to_string(),
|
||||
}),
|
||||
relish_storage_max_concurrent_sync: Some(
|
||||
"relish_storage_max_concurrent_sync_VALUE".to_string(),
|
||||
),
|
||||
};
|
||||
|
||||
let toml_string = toml::to_string(¶ms).expect("Failed to serialize correct config");
|
||||
let toml_pretty_string =
|
||||
toml::to_string_pretty(¶ms).expect("Failed to serialize correct config");
|
||||
assert_eq!(
|
||||
r#"listen_pg_addr = 'listen_pg_addr_VALUE'
|
||||
listen_http_addr = 'listen_http_addr_VALUE'
|
||||
checkpoint_distance = 'checkpoint_distance_VALUE'
|
||||
checkpoint_period = 'checkpoint_period_VALUE'
|
||||
gc_horizon = 'gc_horizon_VALUE'
|
||||
gc_period = 'gc_period_VALUE'
|
||||
pg_distrib_dir = 'pg_distrib_dir_VALUE'
|
||||
auth_validation_public_key_path = 'auth_validation_public_key_path_VALUE'
|
||||
auth_type = 'auth_type_VALUE'
|
||||
relish_storage_max_concurrent_sync = 'relish_storage_max_concurrent_sync_VALUE'
|
||||
|
||||
[relish_storage]
|
||||
local_path = 'relish_storage_local_VALUE'
|
||||
"#,
|
||||
toml_pretty_string
|
||||
);
|
||||
|
||||
let params_from_serialized: CfgFileParams = toml::from_str(&toml_string)
|
||||
.expect("Failed to deserialize the serialization result of the config");
|
||||
let params_from_serialized_pretty: CfgFileParams = toml::from_str(&toml_pretty_string)
|
||||
.expect("Failed to deserialize the prettified serialization result of the config");
|
||||
assert!(
|
||||
params_from_serialized == params,
|
||||
"Expected the same config in the end of config -> serialize -> deserialize chain"
|
||||
);
|
||||
assert!(
|
||||
params_from_serialized_pretty == params,
|
||||
"Expected the same config in the end of config -> serialize pretty -> deserialize chain"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn credentials_omitted_during_serialization() {
|
||||
let params = CfgFileParams {
|
||||
listen_pg_addr: Some("listen_pg_addr_VALUE".to_string()),
|
||||
listen_http_addr: Some("listen_http_addr_VALUE".to_string()),
|
||||
checkpoint_distance: Some("checkpoint_distance_VALUE".to_string()),
|
||||
checkpoint_period: Some("checkpoint_period_VALUE".to_string()),
|
||||
gc_horizon: Some("gc_horizon_VALUE".to_string()),
|
||||
gc_period: Some("gc_period_VALUE".to_string()),
|
||||
pg_distrib_dir: Some("pg_distrib_dir_VALUE".to_string()),
|
||||
auth_validation_public_key_path: Some(
|
||||
"auth_validation_public_key_path_VALUE".to_string(),
|
||||
),
|
||||
auth_type: Some("auth_type_VALUE".to_string()),
|
||||
relish_storage: Some(RelishStorage::AwsS3 {
|
||||
bucket_name: "bucket_name_VALUE".to_string(),
|
||||
bucket_region: "bucket_region_VALUE".to_string(),
|
||||
access_key_id: Some("access_key_id_VALUE".to_string()),
|
||||
secret_access_key: Some("secret_access_key_VALUE".to_string()),
|
||||
}),
|
||||
relish_storage_max_concurrent_sync: Some(
|
||||
"relish_storage_max_concurrent_sync_VALUE".to_string(),
|
||||
),
|
||||
};
|
||||
|
||||
let toml_string = toml::to_string(¶ms).expect("Failed to serialize correct config");
|
||||
let toml_pretty_string =
|
||||
toml::to_string_pretty(¶ms).expect("Failed to serialize correct config");
|
||||
assert_eq!(
|
||||
r#"listen_pg_addr = 'listen_pg_addr_VALUE'
|
||||
listen_http_addr = 'listen_http_addr_VALUE'
|
||||
checkpoint_distance = 'checkpoint_distance_VALUE'
|
||||
checkpoint_period = 'checkpoint_period_VALUE'
|
||||
gc_horizon = 'gc_horizon_VALUE'
|
||||
gc_period = 'gc_period_VALUE'
|
||||
pg_distrib_dir = 'pg_distrib_dir_VALUE'
|
||||
auth_validation_public_key_path = 'auth_validation_public_key_path_VALUE'
|
||||
auth_type = 'auth_type_VALUE'
|
||||
relish_storage_max_concurrent_sync = 'relish_storage_max_concurrent_sync_VALUE'
|
||||
|
||||
[relish_storage]
|
||||
bucket_name = 'bucket_name_VALUE'
|
||||
bucket_region = 'bucket_region_VALUE'
|
||||
"#,
|
||||
toml_pretty_string
|
||||
);
|
||||
|
||||
let params_from_serialized: CfgFileParams = toml::from_str(&toml_string)
|
||||
.expect("Failed to deserialize the serialization result of the config");
|
||||
let params_from_serialized_pretty: CfgFileParams = toml::from_str(&toml_pretty_string)
|
||||
.expect("Failed to deserialize the prettified serialization result of the config");
|
||||
|
||||
let mut expected_params = params;
|
||||
expected_params.relish_storage = Some(RelishStorage::AwsS3 {
|
||||
bucket_name: "bucket_name_VALUE".to_string(),
|
||||
bucket_region: "bucket_region_VALUE".to_string(),
|
||||
access_key_id: None,
|
||||
secret_access_key: None,
|
||||
});
|
||||
assert!(
|
||||
params_from_serialized == expected_params,
|
||||
"Expected the config without credentials in the end of a 'config -> serialize -> deserialize' chain"
|
||||
);
|
||||
assert!(
|
||||
params_from_serialized_pretty == expected_params,
|
||||
"Expected the config without credentials in the end of a 'config -> serialize pretty -> deserialize' chain"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,12 +14,12 @@ use std::{
|
||||
str::FromStr,
|
||||
sync::Arc,
|
||||
};
|
||||
use tracing::*;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
use log::*;
|
||||
use zenith_utils::crashsafe_dir;
|
||||
use zenith_utils::logging;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
use crate::tenant_mgr;
|
||||
use crate::walredo::WalRedoManager;
|
||||
@@ -100,7 +100,7 @@ pub struct PointInTime {
|
||||
pub fn init_pageserver(conf: &'static PageServerConf, create_tenant: Option<&str>) -> Result<()> {
|
||||
// Initialize logger
|
||||
// use true as daemonize parameter because otherwise we pollute zenith cli output with a few pages long output of info messages
|
||||
let _log_file = logging::init(LOG_FILE_NAME, true)?;
|
||||
let (_scope_guard, _log_file) = logging::init(LOG_FILE_NAME, true)?;
|
||||
|
||||
// We don't use the real WAL redo manager, because we don't want to spawn the WAL redo
|
||||
// process during repository initialization.
|
||||
@@ -176,16 +176,13 @@ fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
|
||||
// to get bootstrap data for timeline initialization.
|
||||
//
|
||||
fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
|
||||
info!("running initdb in {}... ", initdbpath.display());
|
||||
info!("running initdb... ");
|
||||
|
||||
let initdb_path = conf.pg_bin_dir().join("initdb");
|
||||
let initdb_output = Command::new(initdb_path)
|
||||
.args(&["-D", initdbpath.to_str().unwrap()])
|
||||
.args(&["-U", &conf.superuser])
|
||||
.arg("--no-instructions")
|
||||
// This is only used for a temporary installation that is deleted shortly after,
|
||||
// so no need to fsync it
|
||||
.arg("--no-sync")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
|
||||
.env("DYLD_LIBRARY_PATH", conf.pg_lib_dir().to_str().unwrap())
|
||||
@@ -198,6 +195,7 @@ fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
|
||||
String::from_utf8_lossy(&initdb_output.stderr)
|
||||
);
|
||||
}
|
||||
info!("initdb succeeded");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -212,8 +210,6 @@ fn bootstrap_timeline(
|
||||
tli: ZTimelineId,
|
||||
repo: &dyn Repository,
|
||||
) -> Result<()> {
|
||||
let _enter = info_span!("bootstrapping", timeline = %tli, tenant = %tenantid).entered();
|
||||
|
||||
let initdb_path = conf.tenant_path(&tenantid).join("tmp");
|
||||
|
||||
// Init temporarily repo to get bootstrap data
|
||||
@@ -222,12 +218,14 @@ fn bootstrap_timeline(
|
||||
|
||||
let lsn = get_lsn_from_controlfile(&pgdata_path)?.align();
|
||||
|
||||
info!("bootstrap_timeline {:?} at lsn {}", pgdata_path, lsn);
|
||||
|
||||
// Import the contents of the data directory at the initial checkpoint
|
||||
// LSN, and any WAL after that.
|
||||
let timeline = repo.create_empty_timeline(tli)?;
|
||||
restore_local_repo::import_timeline_from_postgres_datadir(
|
||||
&pgdata_path,
|
||||
timeline.writer().as_ref(),
|
||||
timeline.as_ref(),
|
||||
lsn,
|
||||
)?;
|
||||
timeline.checkpoint()?;
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
@@ -5,7 +6,6 @@ use hyper::header;
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response, Uri};
|
||||
use routerify::{ext::RequestExt, RouterBuilder};
|
||||
use tracing::*;
|
||||
use zenith_utils::auth::JwtAuth;
|
||||
use zenith_utils::http::endpoint::attach_openapi_ui;
|
||||
use zenith_utils::http::endpoint::auth_middleware;
|
||||
@@ -15,8 +15,6 @@ use zenith_utils::http::{
|
||||
endpoint,
|
||||
error::HttpErrorBody,
|
||||
json::{json_request, json_response},
|
||||
request::get_request_param,
|
||||
request::parse_request_param,
|
||||
};
|
||||
|
||||
use super::models::BranchCreateRequest;
|
||||
@@ -58,6 +56,33 @@ fn get_config(request: &Request<Body>) -> &'static PageServerConf {
|
||||
get_state(request).conf
|
||||
}
|
||||
|
||||
fn get_request_param<'a>(
|
||||
request: &'a Request<Body>,
|
||||
param_name: &str,
|
||||
) -> Result<&'a str, ApiError> {
|
||||
match request.param(param_name) {
|
||||
Some(arg) => Ok(arg),
|
||||
None => {
|
||||
return Err(ApiError::BadRequest(format!(
|
||||
"no {} specified in path param",
|
||||
param_name
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_request_param<T: FromStr>(
|
||||
request: &Request<Body>,
|
||||
param_name: &str,
|
||||
) -> Result<T, ApiError> {
|
||||
match get_request_param(request, param_name)?.parse() {
|
||||
Ok(v) => Ok(v),
|
||||
Err(_) => Err(ApiError::BadRequest(
|
||||
"failed to parse tenant id".to_string(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
// healthcheck handler
|
||||
async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
Ok(Response::builder()
|
||||
@@ -73,7 +98,6 @@ async fn branch_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
check_permission(&request, Some(request_data.tenant_id))?;
|
||||
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("/branch_create", name = %request_data.name, tenant = %request_data.tenant_id, startpoint=%request_data.start_point).entered();
|
||||
branches::create_branch(
|
||||
get_config(&request),
|
||||
&request_data.name,
|
||||
@@ -92,7 +116,6 @@ async fn branch_list_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
check_permission(&request, Some(tenantid))?;
|
||||
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("branch_list", tenant = %tenantid).entered();
|
||||
crate::branches::get_branches(get_config(&request), &tenantid)
|
||||
})
|
||||
.await
|
||||
@@ -103,12 +126,11 @@ async fn branch_list_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
// TODO add to swagger
|
||||
async fn branch_detail_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenantid: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let branch_name: String = get_request_param(&request, "branch_name")?.to_string();
|
||||
let branch_name: &str = get_request_param(&request, "branch_name")?;
|
||||
let conf = get_state(&request).conf;
|
||||
let path = conf.branch_path(&branch_name, &tenantid);
|
||||
let path = conf.branch_path(branch_name, &tenantid);
|
||||
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("branch_detail", tenant = %tenantid, branch=%branch_name).entered();
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenantid)?;
|
||||
BranchInfo::from_path(path, conf, &tenantid, &repo)
|
||||
})
|
||||
@@ -122,13 +144,10 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
// check for management permission
|
||||
check_permission(&request, None)?;
|
||||
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_list").entered();
|
||||
crate::branches::get_tenants(get_config(&request))
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
let response_data =
|
||||
tokio::task::spawn_blocking(move || crate::branches::get_tenants(get_config(&request)))
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
Ok(json_response(StatusCode::OK, response_data)?)
|
||||
}
|
||||
|
||||
@@ -139,7 +158,6 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_create", tenant = %request_data.tenant_id).entered();
|
||||
tenant_mgr::create_repository_for_tenant(get_config(&request), request_data.tenant_id)
|
||||
})
|
||||
.await
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -65,6 +65,7 @@ use zenith_utils::bin_ser::BeSer;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
|
||||
use super::blob::{read_blob, BlobRange};
|
||||
use super::page_versions::OrderedBlockIter;
|
||||
|
||||
// Magic constant to identify a Zenith delta file
|
||||
pub const DELTA_FILE_MAGIC: u32 = 0x5A616E01;
|
||||
@@ -169,7 +170,29 @@ impl Layer for DeltaLayer {
|
||||
}
|
||||
|
||||
fn filename(&self) -> PathBuf {
|
||||
PathBuf::from(self.layer_name().to_string())
|
||||
PathBuf::from(
|
||||
DeltaFileName {
|
||||
seg: self.seg,
|
||||
start_lsn: self.start_lsn,
|
||||
end_lsn: self.end_lsn,
|
||||
dropped: self.dropped,
|
||||
}
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
fn path(&self) -> Option<PathBuf> {
|
||||
Some(Self::path_for(
|
||||
&self.path_or_conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
&DeltaFileName {
|
||||
seg: self.seg,
|
||||
start_lsn: self.start_lsn,
|
||||
end_lsn: self.end_lsn,
|
||||
dropped: self.dropped,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
/// Look up given page in the cache.
|
||||
@@ -198,7 +221,7 @@ impl Layer for DeltaLayer {
|
||||
.slice_range((Included(&minkey), Included(&maxkey)))
|
||||
.iter()
|
||||
.rev();
|
||||
for ((_blknum, pv_lsn), blob_range) in iter {
|
||||
for ((_blknum, _lsn), blob_range) in iter {
|
||||
let pv = PageVersion::des(&read_blob(&page_version_reader, blob_range)?)?;
|
||||
|
||||
if let Some(img) = pv.page_image {
|
||||
@@ -208,7 +231,7 @@ impl Layer for DeltaLayer {
|
||||
break;
|
||||
} else if let Some(rec) = pv.record {
|
||||
let will_init = rec.will_init;
|
||||
reconstruct_data.records.push((*pv_lsn, rec));
|
||||
reconstruct_data.records.push(rec);
|
||||
if will_init {
|
||||
// This WAL record initializes the page, so no need to go further back
|
||||
need_image = false;
|
||||
@@ -278,7 +301,9 @@ impl Layer for DeltaLayer {
|
||||
|
||||
fn delete(&self) -> Result<()> {
|
||||
// delete underlying file
|
||||
fs::remove_file(self.path())?;
|
||||
if let Some(path) = self.path() {
|
||||
fs::remove_file(path)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -350,7 +375,7 @@ impl DeltaLayer {
|
||||
/// data structure with two btreemaps as we do, so passing the btreemaps is currently
|
||||
/// expedient.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn create<'a>(
|
||||
pub fn create(
|
||||
conf: &'static PageServerConf,
|
||||
timelineid: ZTimelineId,
|
||||
tenantid: ZTenantId,
|
||||
@@ -358,7 +383,7 @@ impl DeltaLayer {
|
||||
start_lsn: Lsn,
|
||||
end_lsn: Lsn,
|
||||
dropped: bool,
|
||||
page_versions: impl Iterator<Item = (u32, Lsn, &'a PageVersion)>,
|
||||
page_versions: OrderedBlockIter,
|
||||
relsizes: VecMap<Lsn, u32>,
|
||||
) -> Result<DeltaLayer> {
|
||||
if seg.rel.is_blocky() {
|
||||
@@ -382,7 +407,9 @@ impl DeltaLayer {
|
||||
let mut inner = delta_layer.inner.lock().unwrap();
|
||||
|
||||
// Write the in-memory btreemaps into a file
|
||||
let path = delta_layer.path();
|
||||
let path = delta_layer
|
||||
.path()
|
||||
.expect("DeltaLayer is supposed to have a layer path on disk");
|
||||
|
||||
// Note: This overwrites any existing file. There shouldn't be any.
|
||||
// FIXME: throw an error instead?
|
||||
@@ -392,14 +419,20 @@ impl DeltaLayer {
|
||||
|
||||
let mut page_version_writer = BlobWriter::new(book, PAGE_VERSIONS_CHAPTER);
|
||||
|
||||
for (blknum, lsn, page_version) in page_versions {
|
||||
let buf = PageVersion::ser(page_version)?;
|
||||
let blob_range = page_version_writer.write_blob(&buf)?;
|
||||
for (blknum, history) in page_versions {
|
||||
for (lsn, page_version) in history.as_slice() {
|
||||
if lsn >= &end_lsn {
|
||||
continue;
|
||||
}
|
||||
|
||||
inner
|
||||
.page_version_metas
|
||||
.append((blknum, lsn), blob_range)
|
||||
.unwrap();
|
||||
let buf = PageVersion::ser(page_version)?;
|
||||
let blob_range = page_version_writer.write_blob(&buf)?;
|
||||
|
||||
inner
|
||||
.page_version_metas
|
||||
.append((blknum, *lsn), blob_range)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
let book = page_version_writer.close()?;
|
||||
@@ -446,7 +479,12 @@ impl DeltaLayer {
|
||||
&self.path_or_conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
&self.layer_name(),
|
||||
&DeltaFileName {
|
||||
seg: self.seg,
|
||||
start_lsn: self.start_lsn,
|
||||
end_lsn: self.end_lsn,
|
||||
dropped: self.dropped,
|
||||
},
|
||||
);
|
||||
|
||||
let file = File::open(&path)?;
|
||||
@@ -555,23 +593,4 @@ impl DeltaLayer {
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
fn layer_name(&self) -> DeltaFileName {
|
||||
DeltaFileName {
|
||||
seg: self.seg,
|
||||
start_lsn: self.start_lsn,
|
||||
end_lsn: self.end_lsn,
|
||||
dropped: self.dropped,
|
||||
}
|
||||
}
|
||||
|
||||
/// Path to the layer file in pageserver workdir.
|
||||
pub fn path(&self) -> PathBuf {
|
||||
Self::path_for(
|
||||
&self.path_or_conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
&self.layer_name(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,8 +13,6 @@ use anyhow::Result;
|
||||
use log::*;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
|
||||
use super::METADATA_FILE_NAME;
|
||||
|
||||
// Note: LayeredTimeline::load_layer_map() relies on this sort order
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub struct DeltaFileName {
|
||||
@@ -37,7 +35,7 @@ impl DeltaFileName {
|
||||
/// Parse a string as a delta file name. Returns None if the filename does not
|
||||
/// match the expected pattern.
|
||||
///
|
||||
pub fn parse_str(fname: &str) -> Option<Self> {
|
||||
pub fn from_str(fname: &str) -> Option<Self> {
|
||||
let rel;
|
||||
let mut parts;
|
||||
if let Some(rest) = fname.strip_prefix("rel_") {
|
||||
@@ -170,7 +168,7 @@ impl ImageFileName {
|
||||
/// Parse a string as an image file name. Returns None if the filename does not
|
||||
/// match the expected pattern.
|
||||
///
|
||||
pub fn parse_str(fname: &str) -> Option<Self> {
|
||||
pub fn from_str(fname: &str) -> Option<Self> {
|
||||
let rel;
|
||||
let mut parts;
|
||||
if let Some(rest) = fname.strip_prefix("rel_") {
|
||||
@@ -288,11 +286,11 @@ pub fn list_files(
|
||||
let fname = direntry?.file_name();
|
||||
let fname = fname.to_str().unwrap();
|
||||
|
||||
if let Some(deltafilename) = DeltaFileName::parse_str(fname) {
|
||||
if let Some(deltafilename) = DeltaFileName::from_str(fname) {
|
||||
deltafiles.push(deltafilename);
|
||||
} else if let Some(imgfilename) = ImageFileName::parse_str(fname) {
|
||||
} else if let Some(imgfilename) = ImageFileName::from_str(fname) {
|
||||
imgfiles.push(imgfilename);
|
||||
} else if fname == METADATA_FILE_NAME || fname == "ancestor" || fname.ends_with(".old") {
|
||||
} else if fname == "metadata" || fname == "ancestor" || fname.ends_with(".old") {
|
||||
// ignore these
|
||||
} else {
|
||||
warn!("unrecognized filename in timeline dir: {}", fname);
|
||||
|
||||
@@ -114,7 +114,25 @@ pub struct ImageLayerInner {
|
||||
|
||||
impl Layer for ImageLayer {
|
||||
fn filename(&self) -> PathBuf {
|
||||
PathBuf::from(self.layer_name().to_string())
|
||||
PathBuf::from(
|
||||
ImageFileName {
|
||||
seg: self.seg,
|
||||
lsn: self.lsn,
|
||||
}
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
fn path(&self) -> Option<PathBuf> {
|
||||
Some(Self::path_for(
|
||||
&self.path_or_conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
&ImageFileName {
|
||||
seg: self.seg,
|
||||
lsn: self.lsn,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
fn get_timeline_id(&self) -> ZTimelineId {
|
||||
@@ -204,7 +222,9 @@ impl Layer for ImageLayer {
|
||||
|
||||
fn delete(&self) -> Result<()> {
|
||||
// delete underlying file
|
||||
fs::remove_file(self.path())?;
|
||||
if let Some(path) = self.path() {
|
||||
fs::remove_file(path)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -280,7 +300,9 @@ impl ImageLayer {
|
||||
let inner = layer.inner.lock().unwrap();
|
||||
|
||||
// Write the images into a file
|
||||
let path = layer.path();
|
||||
let path = layer
|
||||
.path()
|
||||
.expect("ImageLayer is supposed to have a layer path on disk");
|
||||
// Note: This overwrites any existing file. There shouldn't be any.
|
||||
// FIXME: throw an error instead?
|
||||
let file = File::create(&path)?;
|
||||
@@ -318,7 +340,7 @@ impl ImageLayer {
|
||||
let writer = book.close()?;
|
||||
writer.get_ref().sync_all()?;
|
||||
|
||||
trace!("saved {}", path.display());
|
||||
trace!("saved {}", &path.display());
|
||||
|
||||
drop(inner);
|
||||
|
||||
@@ -423,7 +445,15 @@ impl ImageLayer {
|
||||
}
|
||||
|
||||
fn open_book(&self) -> Result<(PathBuf, Book<File>)> {
|
||||
let path = self.path();
|
||||
let path = Self::path_for(
|
||||
&self.path_or_conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
&ImageFileName {
|
||||
seg: self.seg,
|
||||
lsn: self.lsn,
|
||||
},
|
||||
);
|
||||
|
||||
let file = File::open(&path)?;
|
||||
let book = Book::new(file)?;
|
||||
@@ -470,21 +500,4 @@ impl ImageLayer {
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
fn layer_name(&self) -> ImageFileName {
|
||||
ImageFileName {
|
||||
seg: self.seg,
|
||||
lsn: self.lsn,
|
||||
}
|
||||
}
|
||||
|
||||
/// Path to the layer file in pageserver workdir.
|
||||
pub fn path(&self) -> PathBuf {
|
||||
Self::path_for(
|
||||
&self.path_or_conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
&self.layer_name(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,10 +15,12 @@ use crate::{ZTenantId, ZTimelineId};
|
||||
use anyhow::{bail, ensure, Result};
|
||||
use bytes::Bytes;
|
||||
use log::*;
|
||||
use std::cmp::min;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use zenith_utils::vec_map::VecMap;
|
||||
|
||||
use zenith_utils::accum::Accum;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
|
||||
use super::page_versions::PageVersions;
|
||||
@@ -35,6 +37,9 @@ pub struct InMemoryLayer {
|
||||
///
|
||||
start_lsn: Lsn,
|
||||
|
||||
/// Frozen in-memory layers have an inclusive end LSN.
|
||||
end_lsn: Option<Lsn>,
|
||||
|
||||
/// LSN of the oldest page version stored in this layer
|
||||
oldest_pending_lsn: Lsn,
|
||||
|
||||
@@ -47,13 +52,8 @@ pub struct InMemoryLayer {
|
||||
}
|
||||
|
||||
pub struct InMemoryLayerInner {
|
||||
/// Frozen in-memory layers have an exclusive end LSN.
|
||||
/// Writes are only allowed when this is None
|
||||
end_lsn: Option<Lsn>,
|
||||
|
||||
/// If this relation was dropped, remember when that happened.
|
||||
/// The drop LSN is recorded in [`end_lsn`].
|
||||
dropped: bool,
|
||||
drop_lsn: Option<Lsn>,
|
||||
|
||||
///
|
||||
/// All versions of all pages in the layer are are kept here.
|
||||
@@ -69,11 +69,19 @@ pub struct InMemoryLayerInner {
|
||||
/// a non-blocky rel, 'segsizes' is not used and is always empty.
|
||||
///
|
||||
segsizes: VecMap<Lsn, u32>,
|
||||
|
||||
/// Writes are only allowed when true.
|
||||
/// Set to false when this layer is in the process of being replaced.
|
||||
writeable: bool,
|
||||
}
|
||||
|
||||
impl InMemoryLayerInner {
|
||||
fn assert_writeable(&self) {
|
||||
assert!(self.end_lsn.is_none());
|
||||
fn check_writeable(&self) -> WriteResult<()> {
|
||||
if self.writeable {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(NonWriteableError)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_seg_size(&self, lsn: Lsn) -> u32 {
|
||||
@@ -96,23 +104,30 @@ impl Layer for InMemoryLayer {
|
||||
let inner = self.inner.read().unwrap();
|
||||
|
||||
let end_lsn;
|
||||
if let Some(drop_lsn) = inner.end_lsn {
|
||||
let dropped;
|
||||
if let Some(drop_lsn) = inner.drop_lsn {
|
||||
end_lsn = drop_lsn;
|
||||
dropped = true;
|
||||
} else {
|
||||
end_lsn = Lsn(u64::MAX);
|
||||
dropped = false;
|
||||
}
|
||||
|
||||
let delta_filename = DeltaFileName {
|
||||
seg: self.seg,
|
||||
start_lsn: self.start_lsn,
|
||||
end_lsn,
|
||||
dropped: inner.dropped,
|
||||
dropped,
|
||||
}
|
||||
.to_string();
|
||||
|
||||
PathBuf::from(format!("inmem-{}", delta_filename))
|
||||
}
|
||||
|
||||
fn path(&self) -> Option<PathBuf> {
|
||||
None
|
||||
}
|
||||
|
||||
fn get_timeline_id(&self) -> ZTimelineId {
|
||||
self.timelineid
|
||||
}
|
||||
@@ -126,10 +141,14 @@ impl Layer for InMemoryLayer {
|
||||
}
|
||||
|
||||
fn get_end_lsn(&self) -> Lsn {
|
||||
if let Some(end_lsn) = self.end_lsn {
|
||||
return Lsn(end_lsn.0 + 1);
|
||||
}
|
||||
|
||||
let inner = self.inner.read().unwrap();
|
||||
|
||||
if let Some(end_lsn) = inner.end_lsn {
|
||||
end_lsn
|
||||
if let Some(drop_lsn) = inner.drop_lsn {
|
||||
drop_lsn
|
||||
} else {
|
||||
Lsn(u64::MAX)
|
||||
}
|
||||
@@ -137,7 +156,7 @@ impl Layer for InMemoryLayer {
|
||||
|
||||
fn is_dropped(&self) -> bool {
|
||||
let inner = self.inner.read().unwrap();
|
||||
inner.dropped
|
||||
inner.drop_lsn.is_some()
|
||||
}
|
||||
|
||||
/// Look up given page in the cache.
|
||||
@@ -160,13 +179,13 @@ impl Layer for InMemoryLayer {
|
||||
.get_block_lsn_range(blknum, ..=lsn)
|
||||
.iter()
|
||||
.rev();
|
||||
for (entry_lsn, entry) in iter {
|
||||
for (_entry_lsn, entry) in iter {
|
||||
if let Some(img) = &entry.page_image {
|
||||
reconstruct_data.page_img = Some(img.clone());
|
||||
need_image = false;
|
||||
break;
|
||||
} else if let Some(rec) = &entry.record {
|
||||
reconstruct_data.records.push((*entry_lsn, rec.clone()));
|
||||
reconstruct_data.records.push(rec.clone());
|
||||
if rec.will_init {
|
||||
// This WAL record initializes the page, so no need to go further back
|
||||
need_image = false;
|
||||
@@ -215,8 +234,8 @@ impl Layer for InMemoryLayer {
|
||||
assert!(lsn >= self.start_lsn);
|
||||
|
||||
// Is the requested LSN after the segment was dropped?
|
||||
if let Some(end_lsn) = inner.end_lsn {
|
||||
if lsn >= end_lsn {
|
||||
if let Some(drop_lsn) = inner.drop_lsn {
|
||||
if lsn >= drop_lsn {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
@@ -247,47 +266,56 @@ impl Layer for InMemoryLayer {
|
||||
let inner = self.inner.read().unwrap();
|
||||
|
||||
let end_str = inner
|
||||
.end_lsn
|
||||
.drop_lsn
|
||||
.as_ref()
|
||||
.map(Lsn::to_string)
|
||||
.map(|drop_lsn| drop_lsn.to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
println!(
|
||||
"----- in-memory layer for tli {} seg {} {}-{} {} ----",
|
||||
self.timelineid, self.seg, self.start_lsn, end_str, inner.dropped,
|
||||
"----- in-memory layer for tli {} seg {} {}-{} ----",
|
||||
self.timelineid, self.seg, self.start_lsn, end_str
|
||||
);
|
||||
|
||||
for (k, v) in inner.segsizes.as_slice() {
|
||||
println!("segsizes {}: {}", k, v);
|
||||
}
|
||||
|
||||
for (blknum, lsn, pv) in inner.page_versions.ordered_page_version_iter(None) {
|
||||
println!(
|
||||
"blk {} at {}: {}/{}\n",
|
||||
blknum,
|
||||
lsn,
|
||||
pv.page_image.is_some(),
|
||||
pv.record.is_some()
|
||||
);
|
||||
for (blknum, history) in inner.page_versions.ordered_block_iter() {
|
||||
for (lsn, pv) in history.as_slice() {
|
||||
println!(
|
||||
"blk {} at {}: {}/{}\n",
|
||||
blknum,
|
||||
lsn,
|
||||
pv.page_image.is_some(),
|
||||
pv.record.is_some()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// A result of an inmemory layer data being written to disk.
|
||||
pub struct LayersOnDisk {
|
||||
pub delta_layers: Vec<DeltaLayer>,
|
||||
pub image_layers: Vec<ImageLayer>,
|
||||
}
|
||||
/// Write failed because the layer is in process of being replaced.
|
||||
/// See [`LayeredTimeline::perform_write_op`] for how to handle this error.
|
||||
#[derive(Debug)]
|
||||
pub struct NonWriteableError;
|
||||
|
||||
impl LayersOnDisk {
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.delta_layers.is_empty() && self.image_layers.is_empty()
|
||||
}
|
||||
pub type WriteResult<T> = std::result::Result<T, NonWriteableError>;
|
||||
|
||||
/// Helper struct to cleanup `InMemoryLayer::freeze` return signature.
|
||||
pub struct FreezeLayers {
|
||||
/// Replacement layer for the layer which freeze was called on.
|
||||
pub frozen: Arc<InMemoryLayer>,
|
||||
/// New open layer containing leftover data.
|
||||
pub open: Option<Arc<InMemoryLayer>>,
|
||||
}
|
||||
|
||||
impl InMemoryLayer {
|
||||
fn assert_not_frozen(&self) {
|
||||
assert!(self.end_lsn.is_none());
|
||||
}
|
||||
|
||||
/// Return the oldest page version that's stored in this layer
|
||||
pub fn get_oldest_pending_lsn(&self) -> Lsn {
|
||||
self.oldest_pending_lsn
|
||||
@@ -323,13 +351,14 @@ impl InMemoryLayer {
|
||||
tenantid,
|
||||
seg,
|
||||
start_lsn,
|
||||
end_lsn: None,
|
||||
oldest_pending_lsn,
|
||||
incremental: false,
|
||||
inner: RwLock::new(InMemoryLayerInner {
|
||||
end_lsn: None,
|
||||
dropped: false,
|
||||
drop_lsn: None,
|
||||
page_versions: PageVersions::default(),
|
||||
segsizes,
|
||||
writeable: true,
|
||||
}),
|
||||
})
|
||||
}
|
||||
@@ -337,10 +366,10 @@ impl InMemoryLayer {
|
||||
// Write operations
|
||||
|
||||
/// Remember new page version, as a WAL record over previous version
|
||||
pub fn put_wal_record(&self, lsn: Lsn, blknum: u32, rec: WALRecord) -> u32 {
|
||||
pub fn put_wal_record(&self, blknum: u32, rec: WALRecord) -> WriteResult<u32> {
|
||||
self.put_page_version(
|
||||
blknum,
|
||||
lsn,
|
||||
rec.lsn,
|
||||
PageVersion {
|
||||
page_image: None,
|
||||
record: Some(rec),
|
||||
@@ -349,7 +378,7 @@ impl InMemoryLayer {
|
||||
}
|
||||
|
||||
/// Remember new page version, as a full page image
|
||||
pub fn put_page_image(&self, blknum: u32, lsn: Lsn, img: Bytes) -> u32 {
|
||||
pub fn put_page_image(&self, blknum: u32, lsn: Lsn, img: Bytes) -> WriteResult<u32> {
|
||||
self.put_page_version(
|
||||
blknum,
|
||||
lsn,
|
||||
@@ -362,7 +391,8 @@ impl InMemoryLayer {
|
||||
|
||||
/// Common subroutine of the public put_wal_record() and put_page_image() functions.
|
||||
/// Adds the page version to the in-memory tree
|
||||
pub fn put_page_version(&self, blknum: u32, lsn: Lsn, pv: PageVersion) -> u32 {
|
||||
pub fn put_page_version(&self, blknum: u32, lsn: Lsn, pv: PageVersion) -> WriteResult<u32> {
|
||||
self.assert_not_frozen();
|
||||
assert!(self.seg.blknum_in_seg(blknum));
|
||||
|
||||
trace!(
|
||||
@@ -374,7 +404,7 @@ impl InMemoryLayer {
|
||||
);
|
||||
let mut inner = self.inner.write().unwrap();
|
||||
|
||||
inner.assert_writeable();
|
||||
inner.check_writeable()?;
|
||||
|
||||
let old = inner.page_versions.append_or_update_last(blknum, lsn, pv);
|
||||
|
||||
@@ -435,22 +465,22 @@ impl InMemoryLayer {
|
||||
}
|
||||
|
||||
inner.segsizes.append_or_update_last(lsn, newsize).unwrap();
|
||||
return newsize - oldsize;
|
||||
return Ok(newsize - oldsize);
|
||||
}
|
||||
}
|
||||
|
||||
0
|
||||
Ok(0)
|
||||
}
|
||||
|
||||
/// Remember that the relation was truncated at given LSN
|
||||
pub fn put_truncation(&self, lsn: Lsn, segsize: u32) {
|
||||
pub fn put_truncation(&self, lsn: Lsn, segsize: u32) -> WriteResult<()> {
|
||||
assert!(
|
||||
self.seg.rel.is_blocky(),
|
||||
"put_truncation() called on a non-blocky rel"
|
||||
);
|
||||
self.assert_not_frozen();
|
||||
|
||||
let mut inner = self.inner.write().unwrap();
|
||||
inner.assert_writeable();
|
||||
inner.check_writeable()?;
|
||||
|
||||
// check that this we truncate to a smaller size than segment was before the truncation
|
||||
let oldsize = inner.get_seg_size(lsn);
|
||||
@@ -462,19 +492,25 @@ impl InMemoryLayer {
|
||||
// We already had an entry for this LSN. That's odd..
|
||||
warn!("Inserting truncation, but had an entry for the LSN already");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Remember that the segment was dropped at given LSN
|
||||
pub fn drop_segment(&self, lsn: Lsn) {
|
||||
pub fn drop_segment(&self, lsn: Lsn) -> WriteResult<()> {
|
||||
self.assert_not_frozen();
|
||||
|
||||
let mut inner = self.inner.write().unwrap();
|
||||
|
||||
assert!(inner.end_lsn.is_none());
|
||||
assert!(!inner.dropped);
|
||||
inner.dropped = true;
|
||||
assert!(self.start_lsn < lsn);
|
||||
inner.end_lsn = Some(lsn);
|
||||
inner.check_writeable()?;
|
||||
|
||||
assert!(inner.drop_lsn.is_none());
|
||||
inner.drop_lsn = Some(lsn);
|
||||
inner.writeable = false;
|
||||
|
||||
trace!("dropped segment {} at {}", self.seg, lsn);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
@@ -514,43 +550,117 @@ impl InMemoryLayer {
|
||||
tenantid,
|
||||
seg,
|
||||
start_lsn,
|
||||
end_lsn: None,
|
||||
oldest_pending_lsn,
|
||||
incremental: true,
|
||||
inner: RwLock::new(InMemoryLayerInner {
|
||||
end_lsn: None,
|
||||
dropped: false,
|
||||
drop_lsn: None,
|
||||
page_versions: PageVersions::default(),
|
||||
segsizes,
|
||||
writeable: true,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_writeable(&self) -> bool {
|
||||
let inner = self.inner.read().unwrap();
|
||||
inner.end_lsn.is_none()
|
||||
inner.writeable
|
||||
}
|
||||
|
||||
/// Make the layer non-writeable. Only call once.
|
||||
/// Records the end_lsn for non-dropped layers.
|
||||
/// `end_lsn` is inclusive
|
||||
pub fn freeze(&self, end_lsn: Lsn) {
|
||||
let mut inner = self.inner.write().unwrap();
|
||||
/// Splits `self` into two InMemoryLayers: `frozen` and `open`.
|
||||
/// All data up to and including `cutoff_lsn`
|
||||
/// is copied to `frozen`, while the remaining data is copied to `open`.
|
||||
/// After completion, self is non-writeable, but not frozen.
|
||||
pub fn freeze(self: Arc<Self>, cutoff_lsn: Lsn) -> Result<FreezeLayers> {
|
||||
info!(
|
||||
"freezing in memory layer {} on timeline {} at {} (oldest {})",
|
||||
self.filename().display(),
|
||||
self.timelineid,
|
||||
cutoff_lsn,
|
||||
self.oldest_pending_lsn
|
||||
);
|
||||
|
||||
if inner.end_lsn.is_some() {
|
||||
assert!(inner.dropped);
|
||||
} else {
|
||||
assert!(!inner.dropped);
|
||||
assert!(self.start_lsn < end_lsn + 1);
|
||||
inner.end_lsn = Some(Lsn(end_lsn.0 + 1));
|
||||
self.assert_not_frozen();
|
||||
|
||||
if let Some((lsn, _)) = inner.segsizes.as_slice().last() {
|
||||
assert!(lsn <= &end_lsn, "{:?} {:?}", lsn, end_lsn);
|
||||
}
|
||||
let self_ref = self.clone();
|
||||
let mut inner = self_ref.inner.write().unwrap();
|
||||
// Dropped layers don't need any special freeze actions,
|
||||
// they are marked as non-writeable at drop and just
|
||||
// written out to disk by checkpointer.
|
||||
if inner.drop_lsn.is_some() {
|
||||
assert!(!inner.writeable);
|
||||
info!(
|
||||
"freezing in memory layer for {} on timeline {} is dropped at {}",
|
||||
self.seg,
|
||||
self.timelineid,
|
||||
inner.drop_lsn.unwrap()
|
||||
);
|
||||
|
||||
for (_blk, lsn, _pv) in inner.page_versions.ordered_page_version_iter(None) {
|
||||
assert!(lsn <= end_lsn);
|
||||
}
|
||||
// There should be no newer layer that refers this non-writeable layer,
|
||||
// because layer that is created after dropped one represents a new rel.
|
||||
return Ok(FreezeLayers {
|
||||
frozen: self,
|
||||
open: None,
|
||||
});
|
||||
}
|
||||
assert!(inner.writeable);
|
||||
inner.writeable = false;
|
||||
|
||||
// Divide all the page versions into old and new
|
||||
// at the 'cutoff_lsn' point.
|
||||
let mut after_oldest_lsn: Accum<Lsn> = Accum(None);
|
||||
|
||||
let cutoff_lsn_exclusive = Lsn(cutoff_lsn.0 + 1);
|
||||
|
||||
let (before_segsizes, mut after_segsizes) = inner.segsizes.split_at(&cutoff_lsn_exclusive);
|
||||
if let Some((lsn, _size)) = after_segsizes.as_slice().first() {
|
||||
after_oldest_lsn.accum(min, *lsn);
|
||||
}
|
||||
|
||||
let (before_page_versions, after_page_versions) = inner
|
||||
.page_versions
|
||||
.split_at(cutoff_lsn_exclusive, &mut after_oldest_lsn);
|
||||
|
||||
let frozen = Arc::new(InMemoryLayer {
|
||||
conf: self.conf,
|
||||
tenantid: self.tenantid,
|
||||
timelineid: self.timelineid,
|
||||
seg: self.seg,
|
||||
start_lsn: self.start_lsn,
|
||||
end_lsn: Some(cutoff_lsn),
|
||||
oldest_pending_lsn: self.start_lsn,
|
||||
incremental: self.incremental,
|
||||
inner: RwLock::new(InMemoryLayerInner {
|
||||
drop_lsn: inner.drop_lsn,
|
||||
page_versions: before_page_versions,
|
||||
segsizes: before_segsizes,
|
||||
writeable: false,
|
||||
}),
|
||||
});
|
||||
|
||||
let open = if !after_segsizes.is_empty() || !after_page_versions.is_empty() {
|
||||
let mut new_open = Self::create_successor_layer(
|
||||
self.conf,
|
||||
frozen.clone(),
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
cutoff_lsn + 1,
|
||||
after_oldest_lsn.0.unwrap(),
|
||||
)?;
|
||||
|
||||
let new_inner = new_open.inner.get_mut().unwrap();
|
||||
// Ensure page_versions doesn't contain anything
|
||||
// so we can just replace it
|
||||
assert!(new_inner.page_versions.is_empty());
|
||||
new_inner.page_versions = after_page_versions;
|
||||
new_inner.segsizes.extend(&mut after_segsizes).unwrap();
|
||||
|
||||
Some(Arc::new(new_open))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(FreezeLayers { frozen, open })
|
||||
}
|
||||
|
||||
/// Write the this frozen in-memory layer to disk.
|
||||
@@ -561,15 +671,16 @@ impl InMemoryLayer {
|
||||
/// WAL records between start and end LSN. (The delta layer is not needed
|
||||
/// when a new relish is created with a single LSN, so that the start and
|
||||
/// end LSN are the same.)
|
||||
pub fn write_to_disk(&self, timeline: &LayeredTimeline) -> Result<LayersOnDisk> {
|
||||
pub fn write_to_disk(&self, timeline: &LayeredTimeline) -> Result<Vec<Arc<dyn Layer>>> {
|
||||
trace!(
|
||||
"write_to_disk {} get_end_lsn is {}",
|
||||
"write_to_disk {} end_lsn is {} get_end_lsn is {}",
|
||||
self.filename().display(),
|
||||
self.end_lsn.unwrap_or(Lsn(0)),
|
||||
self.get_end_lsn()
|
||||
);
|
||||
|
||||
// Grab the lock in read-mode. We hold it over the I/O, but because this
|
||||
// layer is not writeable anymore, no one should be trying to acquire the
|
||||
// layer is not writeable anymore, no one should be trying to aquire the
|
||||
// write lock on it, so we shouldn't block anyone. There's one exception
|
||||
// though: another thread might have grabbed a reference to this layer
|
||||
// in `get_layer_for_write' just before the checkpointer called
|
||||
@@ -578,45 +689,36 @@ impl InMemoryLayer {
|
||||
// would have to wait until we release it. That race condition is very
|
||||
// rare though, so we just accept the potential latency hit for now.
|
||||
let inner = self.inner.read().unwrap();
|
||||
let end_lsn_exclusive = inner.end_lsn.unwrap();
|
||||
assert!(!inner.writeable);
|
||||
|
||||
if inner.dropped {
|
||||
if let Some(drop_lsn) = inner.drop_lsn {
|
||||
let delta_layer = DeltaLayer::create(
|
||||
self.conf,
|
||||
self.timelineid,
|
||||
self.tenantid,
|
||||
self.seg,
|
||||
self.start_lsn,
|
||||
end_lsn_exclusive,
|
||||
drop_lsn,
|
||||
true,
|
||||
inner.page_versions.ordered_page_version_iter(None),
|
||||
inner.page_versions.ordered_block_iter(),
|
||||
inner.segsizes.clone(),
|
||||
)?;
|
||||
trace!(
|
||||
"freeze: created delta layer for dropped segment {} {}-{}",
|
||||
self.seg,
|
||||
self.start_lsn,
|
||||
end_lsn_exclusive
|
||||
drop_lsn
|
||||
);
|
||||
return Ok(LayersOnDisk {
|
||||
delta_layers: vec![delta_layer],
|
||||
image_layers: Vec::new(),
|
||||
});
|
||||
return Ok(vec![Arc::new(delta_layer)]);
|
||||
}
|
||||
|
||||
// Since `end_lsn` is inclusive, subtract 1.
|
||||
// We want to make an ImageLayer for the last included LSN,
|
||||
// so the DeltaLayer should exlcude that LSN.
|
||||
let end_lsn_inclusive = Lsn(end_lsn_exclusive.0 - 1);
|
||||
let end_lsn = self.end_lsn.unwrap();
|
||||
|
||||
let mut page_versions = inner
|
||||
.page_versions
|
||||
.ordered_page_version_iter(Some(end_lsn_inclusive));
|
||||
let mut frozen_layers: Vec<Arc<dyn Layer>> = Vec::new();
|
||||
|
||||
let mut delta_layers = Vec::new();
|
||||
if self.start_lsn != end_lsn {
|
||||
let (before_segsizes, _after_segsizes) = inner.segsizes.split_at(&Lsn(end_lsn.0 + 1));
|
||||
|
||||
if self.start_lsn != end_lsn_inclusive {
|
||||
let (segsizes, _) = inner.segsizes.split_at(&end_lsn_exclusive);
|
||||
// Write the page versions before the cutoff to disk.
|
||||
let delta_layer = DeltaLayer::create(
|
||||
self.conf,
|
||||
@@ -624,36 +726,32 @@ impl InMemoryLayer {
|
||||
self.tenantid,
|
||||
self.seg,
|
||||
self.start_lsn,
|
||||
end_lsn_inclusive,
|
||||
end_lsn,
|
||||
false,
|
||||
page_versions,
|
||||
segsizes,
|
||||
inner.page_versions.ordered_block_iter(),
|
||||
before_segsizes,
|
||||
)?;
|
||||
delta_layers.push(delta_layer);
|
||||
frozen_layers.push(Arc::new(delta_layer));
|
||||
trace!(
|
||||
"freeze: created delta layer {} {}-{}",
|
||||
self.seg,
|
||||
self.start_lsn,
|
||||
end_lsn_inclusive
|
||||
end_lsn
|
||||
);
|
||||
} else {
|
||||
assert!(page_versions.next().is_none());
|
||||
for (_blknum, history) in inner.page_versions.ordered_block_iter() {
|
||||
let (lsn, _pv) = history.as_slice().first().unwrap();
|
||||
assert!(lsn >= &end_lsn);
|
||||
}
|
||||
}
|
||||
|
||||
drop(inner);
|
||||
|
||||
// Write a new base image layer at the cutoff point
|
||||
let image_layer =
|
||||
ImageLayer::create_from_src(self.conf, timeline, self, end_lsn_inclusive)?;
|
||||
trace!(
|
||||
"freeze: created image layer {} at {}",
|
||||
self.seg,
|
||||
end_lsn_inclusive
|
||||
);
|
||||
let image_layer = ImageLayer::create_from_src(self.conf, timeline, self, end_lsn)?;
|
||||
frozen_layers.push(Arc::new(image_layer));
|
||||
trace!("freeze: created image layer {} at {}", self.seg, end_lsn);
|
||||
|
||||
Ok(LayersOnDisk {
|
||||
delta_layers,
|
||||
image_layers: vec![image_layer],
|
||||
})
|
||||
Ok(frozen_layers)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::{collections::HashMap, ops::RangeBounds, slice};
|
||||
use std::{collections::HashMap, ops::RangeBounds};
|
||||
|
||||
use zenith_utils::{lsn::Lsn, vec_map::VecMap};
|
||||
use zenith_utils::{accum::Accum, lsn::Lsn, vec_map::VecMap};
|
||||
|
||||
use super::storage_layer::PageVersion;
|
||||
|
||||
@@ -10,6 +10,10 @@ const EMPTY_SLICE: &[(Lsn, PageVersion)] = &[];
|
||||
pub struct PageVersions(HashMap<u32, VecMap<Lsn, PageVersion>>);
|
||||
|
||||
impl PageVersions {
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
pub fn append_or_update_last(
|
||||
&mut self,
|
||||
blknum: u32,
|
||||
@@ -20,14 +24,6 @@ impl PageVersions {
|
||||
map.append_or_update_last(lsn, page_version).unwrap()
|
||||
}
|
||||
|
||||
/// Get all [`PageVersion`]s in a block
|
||||
pub fn get_block_slice(&self, blknum: u32) -> &[(Lsn, PageVersion)] {
|
||||
self.0
|
||||
.get(&blknum)
|
||||
.map(VecMap::as_slice)
|
||||
.unwrap_or(EMPTY_SLICE)
|
||||
}
|
||||
|
||||
/// Get a range of [`PageVersions`] in a block
|
||||
pub fn get_block_lsn_range<R: RangeBounds<Lsn>>(
|
||||
&self,
|
||||
@@ -40,65 +36,61 @@ impl PageVersions {
|
||||
.unwrap_or(EMPTY_SLICE)
|
||||
}
|
||||
|
||||
/// Iterate through [`PageVersion`]s in (block, lsn) order.
|
||||
/// If a [`cutoff_lsn`] is set, only show versions with `lsn < cutoff_lsn`
|
||||
pub fn ordered_page_version_iter(&self, cutoff_lsn: Option<Lsn>) -> OrderedPageVersionIter<'_> {
|
||||
/// Split the page version map into two.
|
||||
///
|
||||
/// Left contains everything up to and not including [`cutoff_lsn`].
|
||||
/// Right contains [`cutoff_lsn`] and everything after.
|
||||
pub fn split_at(&self, cutoff_lsn: Lsn, after_oldest_lsn: &mut Accum<Lsn>) -> (Self, Self) {
|
||||
let mut before_blocks = HashMap::new();
|
||||
let mut after_blocks = HashMap::new();
|
||||
|
||||
for (blknum, vec_map) in self.0.iter() {
|
||||
let (before_versions, after_versions) = vec_map.split_at(&cutoff_lsn);
|
||||
|
||||
if !before_versions.is_empty() {
|
||||
let old = before_blocks.insert(*blknum, before_versions);
|
||||
assert!(old.is_none());
|
||||
}
|
||||
|
||||
if !after_versions.is_empty() {
|
||||
let (first_lsn, _first_pv) = &after_versions.as_slice()[0];
|
||||
after_oldest_lsn.accum(std::cmp::min, *first_lsn);
|
||||
|
||||
let old = after_blocks.insert(*blknum, after_versions);
|
||||
assert!(old.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
(Self(before_blocks), Self(after_blocks))
|
||||
}
|
||||
|
||||
/// Iterate through block-history pairs in block order.
|
||||
pub fn ordered_block_iter(&self) -> OrderedBlockIter<'_> {
|
||||
let mut ordered_blocks: Vec<u32> = self.0.keys().cloned().collect();
|
||||
ordered_blocks.sort_unstable();
|
||||
|
||||
let slice = ordered_blocks
|
||||
.first()
|
||||
.map(|&blknum| self.get_block_slice(blknum))
|
||||
.unwrap_or(EMPTY_SLICE);
|
||||
|
||||
OrderedPageVersionIter {
|
||||
OrderedBlockIter {
|
||||
page_versions: self,
|
||||
ordered_blocks,
|
||||
cur_block_idx: 0,
|
||||
cutoff_lsn,
|
||||
cur_slice_iter: slice.iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OrderedPageVersionIter<'a> {
|
||||
pub struct OrderedBlockIter<'a> {
|
||||
page_versions: &'a PageVersions,
|
||||
|
||||
ordered_blocks: Vec<u32>,
|
||||
cur_block_idx: usize,
|
||||
|
||||
cutoff_lsn: Option<Lsn>,
|
||||
|
||||
cur_slice_iter: slice::Iter<'a, (Lsn, PageVersion)>,
|
||||
}
|
||||
|
||||
impl OrderedPageVersionIter<'_> {
|
||||
fn is_lsn_before_cutoff(&self, lsn: &Lsn) -> bool {
|
||||
if let Some(cutoff_lsn) = self.cutoff_lsn.as_ref() {
|
||||
lsn < cutoff_lsn
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for OrderedPageVersionIter<'a> {
|
||||
type Item = (u32, Lsn, &'a PageVersion);
|
||||
impl<'a> Iterator for OrderedBlockIter<'a> {
|
||||
type Item = (u32, &'a VecMap<Lsn, PageVersion>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
loop {
|
||||
if let Some((lsn, page_version)) = self.cur_slice_iter.next() {
|
||||
if self.is_lsn_before_cutoff(lsn) {
|
||||
let blknum = self.ordered_blocks[self.cur_block_idx];
|
||||
return Some((blknum, *lsn, page_version));
|
||||
}
|
||||
}
|
||||
|
||||
let next_block_idx = self.cur_block_idx + 1;
|
||||
let blknum: u32 = *self.ordered_blocks.get(next_block_idx)?;
|
||||
self.cur_block_idx = next_block_idx;
|
||||
self.cur_slice_iter = self.page_versions.get_block_slice(blknum).iter();
|
||||
}
|
||||
let blknum: u32 = *self.ordered_blocks.get(self.cur_block_idx)?;
|
||||
self.cur_block_idx += 1;
|
||||
Some((blknum, self.page_versions.0.get(&blknum).unwrap()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,24 +116,14 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
let mut iter = page_versions.ordered_page_version_iter(None);
|
||||
let mut iter = page_versions.ordered_block_iter();
|
||||
for blknum in 0..BLOCKS {
|
||||
let (actual_blknum, vec_map) = iter.next().unwrap();
|
||||
let slice = vec_map.as_slice();
|
||||
assert_eq!(actual_blknum, blknum);
|
||||
assert_eq!(slice.len(), LSNS as usize);
|
||||
for lsn in 0..LSNS {
|
||||
let (actual_blknum, actual_lsn, _pv) = iter.next().unwrap();
|
||||
assert_eq!(actual_blknum, blknum);
|
||||
assert_eq!(Lsn(lsn), actual_lsn);
|
||||
}
|
||||
}
|
||||
assert!(iter.next().is_none());
|
||||
assert!(iter.next().is_none()); // should be robust against excessive next() calls
|
||||
|
||||
const CUTOFF_LSN: Lsn = Lsn(30);
|
||||
let mut iter = page_versions.ordered_page_version_iter(Some(CUTOFF_LSN));
|
||||
for blknum in 0..BLOCKS {
|
||||
for lsn in 0..CUTOFF_LSN.0 {
|
||||
let (actual_blknum, actual_lsn, _pv) = iter.next().unwrap();
|
||||
assert_eq!(actual_blknum, blknum);
|
||||
assert_eq!(Lsn(lsn), actual_lsn);
|
||||
assert_eq!(Lsn(lsn), slice[lsn as usize].0);
|
||||
}
|
||||
}
|
||||
assert!(iter.next().is_none());
|
||||
|
||||
@@ -78,7 +78,7 @@ pub struct PageVersion {
|
||||
/// 'records' contains the records to apply over the base image.
|
||||
///
|
||||
pub struct PageReconstructData {
|
||||
pub records: Vec<(Lsn, WALRecord)>,
|
||||
pub records: Vec<WALRecord>,
|
||||
pub page_img: Option<Bytes>,
|
||||
}
|
||||
|
||||
@@ -123,6 +123,10 @@ pub trait Layer: Send + Sync {
|
||||
/// Is the segment represented by this layer dropped by PostgreSQL?
|
||||
fn is_dropped(&self) -> bool;
|
||||
|
||||
/// Gets the physical location of the layer on disk.
|
||||
/// Some layers, such as in-memory, might not have the location.
|
||||
fn path(&self) -> Option<PathBuf>;
|
||||
|
||||
/// Filename used to store this layer on disk. (Even in-memory layers
|
||||
/// implement this, to print a handy unique identifier for the layer for
|
||||
/// log messages, even though they're never not on disk.)
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
use anyhow::{anyhow, bail, ensure, Result};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use lazy_static::lazy_static;
|
||||
use log::*;
|
||||
use regex::Regex;
|
||||
use std::net::TcpListener;
|
||||
use std::str;
|
||||
@@ -20,12 +21,10 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use std::{io, net::TcpStream};
|
||||
use tracing::*;
|
||||
use zenith_metrics::{register_histogram_vec, HistogramVec};
|
||||
use zenith_utils::auth::{self, JwtAuth};
|
||||
use zenith_utils::auth::{Claims, Scope};
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::postgres_backend::is_socket_read_timed_out;
|
||||
use zenith_utils::postgres_backend::PostgresBackend;
|
||||
use zenith_utils::postgres_backend::{self, AuthType};
|
||||
use zenith_utils::pq_proto::{
|
||||
@@ -188,32 +187,17 @@ pub fn thread_main(
|
||||
listener: TcpListener,
|
||||
auth_type: AuthType,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut join_handles = Vec::new();
|
||||
|
||||
while !tenant_mgr::shutdown_requested() {
|
||||
loop {
|
||||
let (socket, peer_addr) = listener.accept()?;
|
||||
debug!("accepted connection from {}", peer_addr);
|
||||
socket.set_nodelay(true).unwrap();
|
||||
let local_auth = auth.clone();
|
||||
|
||||
let handle = thread::Builder::new()
|
||||
.name("serving Page Service thread".into())
|
||||
.spawn(move || {
|
||||
if let Err(err) = page_service_conn_main(conf, local_auth, socket, auth_type) {
|
||||
error!(%err, "page server thread exited with error");
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
join_handles.push(handle);
|
||||
thread::spawn(move || {
|
||||
if let Err(err) = page_service_conn_main(conf, local_auth, socket, auth_type) {
|
||||
error!("page server thread exiting with error: {:#}", err);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
debug!("page_service loop terminated. wait for connections to cancel");
|
||||
for handle in join_handles.into_iter() {
|
||||
handle.join().unwrap();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn page_service_conn_main(
|
||||
@@ -232,7 +216,7 @@ fn page_service_conn_main(
|
||||
}
|
||||
|
||||
let mut conn_handler = PageServerHandler::new(conf, auth);
|
||||
let pgbackend = PostgresBackend::new(socket, auth_type, None, true)?;
|
||||
let pgbackend = PostgresBackend::new(socket, auth_type, None)?;
|
||||
pgbackend.run(&mut conn_handler)
|
||||
}
|
||||
|
||||
@@ -276,66 +260,50 @@ impl PageServerHandler {
|
||||
timelineid: ZTimelineId,
|
||||
tenantid: ZTenantId,
|
||||
) -> anyhow::Result<()> {
|
||||
let _enter = info_span!("pagestream", timeline = %timelineid, tenant = %tenantid).entered();
|
||||
|
||||
// Check that the timeline exists
|
||||
let timeline = tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?;
|
||||
|
||||
/* switch client to COPYBOTH */
|
||||
pgb.write_message(&BeMessage::CopyBothResponse)?;
|
||||
|
||||
while !tenant_mgr::shutdown_requested() {
|
||||
match pgb.read_message() {
|
||||
Ok(message) => {
|
||||
if let Some(message) = message {
|
||||
trace!("query: {:?}", message);
|
||||
while let Some(message) = pgb.read_message()? {
|
||||
trace!("query({:?}): {:?}", timelineid, message);
|
||||
|
||||
let copy_data_bytes = match message {
|
||||
FeMessage::CopyData(bytes) => bytes,
|
||||
_ => continue,
|
||||
};
|
||||
let copy_data_bytes = match message {
|
||||
FeMessage::CopyData(bytes) => bytes,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let zenith_fe_msg = PagestreamFeMessage::parse(copy_data_bytes)?;
|
||||
let zenith_fe_msg = PagestreamFeMessage::parse(copy_data_bytes)?;
|
||||
|
||||
let response = match zenith_fe_msg {
|
||||
PagestreamFeMessage::Exists(req) => SMGR_QUERY_TIME
|
||||
.with_label_values(&["get_rel_exists"])
|
||||
.observe_closure_duration(|| {
|
||||
self.handle_get_rel_exists_request(&*timeline, &req)
|
||||
}),
|
||||
PagestreamFeMessage::Nblocks(req) => SMGR_QUERY_TIME
|
||||
.with_label_values(&["get_rel_size"])
|
||||
.observe_closure_duration(|| {
|
||||
self.handle_get_nblocks_request(&*timeline, &req)
|
||||
}),
|
||||
PagestreamFeMessage::GetPage(req) => SMGR_QUERY_TIME
|
||||
.with_label_values(&["get_page_at_lsn"])
|
||||
.observe_closure_duration(|| {
|
||||
self.handle_get_page_at_lsn_request(&*timeline, &req)
|
||||
}),
|
||||
};
|
||||
let response = match zenith_fe_msg {
|
||||
PagestreamFeMessage::Exists(req) => SMGR_QUERY_TIME
|
||||
.with_label_values(&["get_rel_exists"])
|
||||
.observe_closure_duration(|| {
|
||||
self.handle_get_rel_exists_request(&*timeline, &req)
|
||||
}),
|
||||
PagestreamFeMessage::Nblocks(req) => SMGR_QUERY_TIME
|
||||
.with_label_values(&["get_rel_size"])
|
||||
.observe_closure_duration(|| self.handle_get_nblocks_request(&*timeline, &req)),
|
||||
PagestreamFeMessage::GetPage(req) => SMGR_QUERY_TIME
|
||||
.with_label_values(&["get_page_at_lsn"])
|
||||
.observe_closure_duration(|| {
|
||||
self.handle_get_page_at_lsn_request(&*timeline, &req)
|
||||
}),
|
||||
};
|
||||
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough
|
||||
error!("error reading relation or page version: {:#}", e);
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
});
|
||||
let response = response.unwrap_or_else(|e| {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough
|
||||
error!("error reading relation or page version: {:#}", e);
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
});
|
||||
|
||||
pgb.write_message(&BeMessage::CopyData(&response.serialize()))?;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if !is_socket_read_timed_out(&e) {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
pgb.write_message(&BeMessage::CopyData(&response.serialize()))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -395,8 +363,6 @@ impl PageServerHandler {
|
||||
timeline: &dyn Timeline,
|
||||
req: &PagestreamExistsRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
let _enter = info_span!("get_rel_exists", rel = %req.rel, req_lsn = %req.lsn).entered();
|
||||
|
||||
let tag = RelishTag::Relation(req.rel);
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
|
||||
|
||||
@@ -412,7 +378,6 @@ impl PageServerHandler {
|
||||
timeline: &dyn Timeline,
|
||||
req: &PagestreamNblocksRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
let _enter = info_span!("get_nblocks", rel = %req.rel, req_lsn = %req.lsn).entered();
|
||||
let tag = RelishTag::Relation(req.rel);
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
|
||||
|
||||
@@ -432,8 +397,6 @@ impl PageServerHandler {
|
||||
timeline: &dyn Timeline,
|
||||
req: &PagestreamGetPageRequest,
|
||||
) -> Result<PagestreamBeMessage> {
|
||||
let _enter = info_span!("get_page", rel = %req.rel, blkno = &req.blkno, req_lsn = %req.lsn)
|
||||
.entered();
|
||||
let tag = RelishTag::Relation(req.rel);
|
||||
let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest)?;
|
||||
|
||||
@@ -451,20 +414,17 @@ impl PageServerHandler {
|
||||
lsn: Option<Lsn>,
|
||||
tenantid: ZTenantId,
|
||||
) -> anyhow::Result<()> {
|
||||
let span = info_span!("basebackup", timeline = %timelineid, tenant = %tenantid, lsn = field::Empty);
|
||||
let _enter = span.enter();
|
||||
|
||||
// check that the timeline exists
|
||||
let timeline = tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?;
|
||||
|
||||
// switch client to COPYOUT
|
||||
/* switch client to COPYOUT */
|
||||
pgb.write_message(&BeMessage::CopyOutResponse)?;
|
||||
info!("sent CopyOut");
|
||||
|
||||
/* Send a tarball of the latest layer on the timeline */
|
||||
{
|
||||
let mut writer = CopyDataSink { pgb };
|
||||
let mut basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn)?;
|
||||
span.record("lsn", &basebackup.lsn.to_string().as_str());
|
||||
basebackup.send_tarball()?;
|
||||
}
|
||||
pgb.write_message(&BeMessage::CopyDone)?;
|
||||
@@ -569,6 +529,11 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
None
|
||||
};
|
||||
|
||||
info!(
|
||||
"got basebackup command. tenantid=\"{}\" timelineid=\"{}\" lsn=\"{:#?}\"",
|
||||
tenantid, timelineid, lsn
|
||||
);
|
||||
|
||||
// Check that the timeline exists
|
||||
self.handle_basebackup_request(pgb, timelineid, lsn, tenantid)?;
|
||||
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||
@@ -586,9 +551,6 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
|
||||
self.check_permission(Some(tenantid))?;
|
||||
|
||||
let _enter =
|
||||
info_span!("callmemaybe", timeline = %timelineid, tenant = %tenantid).entered();
|
||||
|
||||
// Check that the timeline exists
|
||||
tenant_mgr::get_timeline_for_tenant(tenantid, timelineid)?;
|
||||
|
||||
@@ -611,9 +573,6 @@ impl postgres_backend::Handler for PageServerHandler {
|
||||
|
||||
self.check_permission(Some(tenantid))?;
|
||||
|
||||
let _enter =
|
||||
info_span!("branch_create", name = %branchname, tenant = %tenantid).entered();
|
||||
|
||||
let branch =
|
||||
branches::create_branch(self.conf, &branchname, &startpoint_str, &tenantid)?;
|
||||
let branch = serde_json::to_vec(&branch)?;
|
||||
|
||||
@@ -12,12 +12,14 @@ mod rust_s3;
|
||||
/// local page server layer files with external storage.
|
||||
mod synced_storage;
|
||||
|
||||
use std::{path::Path, thread};
|
||||
use std::path::Path;
|
||||
use std::thread;
|
||||
|
||||
use anyhow::Context;
|
||||
|
||||
use self::local_fs::LocalFs;
|
||||
pub use self::synced_storage::schedule_timeline_upload;
|
||||
use self::{local_fs::LocalFs, rust_s3::RustS3};
|
||||
use crate::relish_storage::rust_s3::RustS3;
|
||||
use crate::{PageServerConf, RelishStorageKind};
|
||||
|
||||
pub fn run_storage_sync_thread(
|
||||
@@ -55,21 +57,15 @@ pub trait RelishStorage: Send + Sync {
|
||||
|
||||
async fn list_relishes(&self) -> anyhow::Result<Vec<Self::RelishStoragePath>>;
|
||||
|
||||
async fn download_relish<W: 'static + std::io::Write + Send>(
|
||||
async fn download_relish(
|
||||
&self,
|
||||
from: &Self::RelishStoragePath,
|
||||
// rust_s3 `get_object_stream` method requires `std::io::BufWriter` for some reason, not the async counterpart
|
||||
// that forces us to consume and return the writer to satisfy the blocking operation async wrapper requirements
|
||||
to: std::io::BufWriter<W>,
|
||||
) -> anyhow::Result<std::io::BufWriter<W>>;
|
||||
to: &Path,
|
||||
) -> anyhow::Result<()>;
|
||||
|
||||
async fn delete_relish(&self, path: &Self::RelishStoragePath) -> anyhow::Result<()>;
|
||||
|
||||
async fn upload_relish<R: tokio::io::AsyncRead + std::marker::Unpin + Send>(
|
||||
&self,
|
||||
from: &mut tokio::io::BufReader<R>,
|
||||
to: &Self::RelishStoragePath,
|
||||
) -> anyhow::Result<()>;
|
||||
async fn upload_relish(&self, from: &Path, to: &Self::RelishStoragePath) -> anyhow::Result<()>;
|
||||
}
|
||||
|
||||
fn strip_workspace_prefix<'a>(
|
||||
|
||||
@@ -9,13 +9,11 @@
|
||||
|
||||
use std::{
|
||||
future::Future,
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
pin::Pin,
|
||||
};
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use tokio::{fs, io};
|
||||
|
||||
use super::{strip_workspace_prefix, RelishStorage};
|
||||
|
||||
@@ -66,33 +64,16 @@ impl RelishStorage for LocalFs {
|
||||
Ok(get_all_files(&self.root).await?.into_iter().collect())
|
||||
}
|
||||
|
||||
async fn download_relish<W: 'static + std::io::Write + Send>(
|
||||
async fn download_relish(
|
||||
&self,
|
||||
from: &Self::RelishStoragePath,
|
||||
mut to: std::io::BufWriter<W>,
|
||||
) -> anyhow::Result<std::io::BufWriter<W>> {
|
||||
to: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
let file_path = self.resolve_in_storage(from)?;
|
||||
if file_path.exists() && file_path.is_file() {
|
||||
let updated_buffer = tokio::task::spawn_blocking(move || {
|
||||
let mut source = std::io::BufReader::new(
|
||||
std::fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&file_path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to open source file '{}' to use in the download",
|
||||
file_path.display()
|
||||
)
|
||||
})?,
|
||||
);
|
||||
std::io::copy(&mut source, &mut to)
|
||||
.context("Failed to download the relish file")?;
|
||||
to.flush().context("Failed to flush the download buffer")?;
|
||||
Ok::<_, anyhow::Error>(to)
|
||||
})
|
||||
.await
|
||||
.context("Failed to spawn a blocking task")??;
|
||||
Ok(updated_buffer)
|
||||
create_target_directory(to).await?;
|
||||
tokio::fs::copy(file_path, to).await?;
|
||||
Ok(())
|
||||
} else {
|
||||
bail!(
|
||||
"File '{}' either does not exist or is not a file",
|
||||
@@ -113,30 +94,18 @@ impl RelishStorage for LocalFs {
|
||||
}
|
||||
}
|
||||
|
||||
async fn upload_relish<R: io::AsyncRead + std::marker::Unpin + Send>(
|
||||
&self,
|
||||
from: &mut io::BufReader<R>,
|
||||
to: &Self::RelishStoragePath,
|
||||
) -> anyhow::Result<()> {
|
||||
async fn upload_relish(&self, from: &Path, to: &Self::RelishStoragePath) -> anyhow::Result<()> {
|
||||
let target_file_path = self.resolve_in_storage(to)?;
|
||||
create_target_directory(&target_file_path).await?;
|
||||
let mut destination = io::BufWriter::new(
|
||||
fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create(true)
|
||||
.open(&target_file_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to open target fs destination at '{}'",
|
||||
target_file_path.display()
|
||||
)
|
||||
})?,
|
||||
);
|
||||
|
||||
io::copy_buf(from, &mut destination)
|
||||
tokio::fs::copy(&from, &target_file_path)
|
||||
.await
|
||||
.context("Failed to upload relish to local storage")?;
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to upload relish '{}' to local storage",
|
||||
from.display(),
|
||||
)
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,13 @@
|
||||
//! A wrapper around AWS S3 client library `rust_s3` to be used a relish storage.
|
||||
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::Context;
|
||||
use s3::{bucket::Bucket, creds::Credentials, region::Region};
|
||||
|
||||
use crate::{
|
||||
relish_storage::{strip_workspace_prefix, RelishStorage},
|
||||
S3Config,
|
||||
};
|
||||
use crate::{relish_storage::strip_workspace_prefix, S3Config};
|
||||
|
||||
use super::RelishStorage;
|
||||
|
||||
const S3_FILE_SEPARATOR: char = '/';
|
||||
|
||||
@@ -84,14 +82,18 @@ impl RelishStorage for RustS3 {
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn download_relish<W: 'static + std::io::Write + Send>(
|
||||
async fn download_relish(
|
||||
&self,
|
||||
from: &Self::RelishStoragePath,
|
||||
mut to: std::io::BufWriter<W>,
|
||||
) -> anyhow::Result<std::io::BufWriter<W>> {
|
||||
to: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut target_file = std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.open(to)
|
||||
.with_context(|| format!("Failed to open target s3 destination at {}", to.display()))?;
|
||||
let code = self
|
||||
.bucket
|
||||
.get_object_stream(from.key(), &mut to)
|
||||
.get_object_stream(from.key(), &mut target_file)
|
||||
.await
|
||||
.with_context(|| format!("Failed to download s3 object with key {}", from.key()))?;
|
||||
if code != 200 {
|
||||
@@ -100,12 +102,7 @@ impl RelishStorage for RustS3 {
|
||||
code
|
||||
))
|
||||
} else {
|
||||
tokio::task::spawn_blocking(move || {
|
||||
to.flush().context("Failed to fluch the downoad buffer")?;
|
||||
Ok::<_, anyhow::Error>(to)
|
||||
})
|
||||
.await
|
||||
.context("Failed to joim the download buffer flush task")?
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -115,9 +112,9 @@ impl RelishStorage for RustS3 {
|
||||
.delete_object(path.key())
|
||||
.await
|
||||
.with_context(|| format!("Failed to delete s3 object with key {}", path.key()))?;
|
||||
if code != 204 {
|
||||
if code != 200 {
|
||||
Err(anyhow::format_err!(
|
||||
"Received non-204 exit code during deleting object with key '{}', code: {}",
|
||||
"Received non-200 exit code during deleting object with key '{}', code: {}",
|
||||
path.key(),
|
||||
code
|
||||
))
|
||||
@@ -126,14 +123,12 @@ impl RelishStorage for RustS3 {
|
||||
}
|
||||
}
|
||||
|
||||
async fn upload_relish<R: tokio::io::AsyncRead + std::marker::Unpin + Send>(
|
||||
&self,
|
||||
from: &mut tokio::io::BufReader<R>,
|
||||
to: &Self::RelishStoragePath,
|
||||
) -> anyhow::Result<()> {
|
||||
async fn upload_relish(&self, from: &Path, to: &Self::RelishStoragePath) -> anyhow::Result<()> {
|
||||
let mut local_file = tokio::fs::OpenOptions::new().read(true).open(from).await?;
|
||||
|
||||
let code = self
|
||||
.bucket
|
||||
.put_object_stream(from, to.key())
|
||||
.put_object_stream(&mut local_file, to.key())
|
||||
.await
|
||||
.with_context(|| format!("Failed to create s3 object with key {}", to.key()))?;
|
||||
if code != 200 {
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::time::Duration;
|
||||
use std::{collections::BinaryHeap, sync::Mutex, thread};
|
||||
|
||||
use crate::tenant_mgr;
|
||||
use crate::{relish_storage::RelishStorage, PageServerConf};
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
@@ -32,26 +31,22 @@ pub fn run_storage_sync_thread<
|
||||
|
||||
let handle = thread::Builder::new()
|
||||
.name("Queue based relish storage sync".to_string())
|
||||
.spawn(move || {
|
||||
while !tenant_mgr::shutdown_requested() {
|
||||
let mut queue_accessor = UPLOAD_QUEUE.lock().unwrap();
|
||||
log::debug!("Upload queue length: {}", queue_accessor.len());
|
||||
let next_task = queue_accessor.pop();
|
||||
drop(queue_accessor);
|
||||
match next_task {
|
||||
Some(task) => runtime.block_on(async {
|
||||
// suppress warnings
|
||||
let _ = (config, task, &relish_storage, max_concurrent_sync);
|
||||
todo!("omitted for brevity")
|
||||
}),
|
||||
None => {
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
continue;
|
||||
}
|
||||
.spawn(move || loop {
|
||||
let mut queue_accessor = UPLOAD_QUEUE.lock().unwrap();
|
||||
log::debug!("Upload queue length: {}", queue_accessor.len());
|
||||
let next_task = queue_accessor.pop();
|
||||
drop(queue_accessor);
|
||||
match next_task {
|
||||
Some(task) => runtime.block_on(async {
|
||||
// suppress warnings
|
||||
let _ = (config, task, &relish_storage, max_concurrent_sync);
|
||||
todo!("omitted for brevity")
|
||||
}),
|
||||
None => {
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
log::debug!("Queue based relish storage sync thread shut down");
|
||||
Ok(())
|
||||
})?;
|
||||
Ok(Some(handle))
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use anyhow::Result;
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashSet;
|
||||
use std::ops::{AddAssign, Deref};
|
||||
use std::ops::AddAssign;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use zenith_utils::lsn::{Lsn, RecordLsn};
|
||||
@@ -13,8 +13,6 @@ use zenith_utils::zid::ZTimelineId;
|
||||
/// A repository corresponds to one .zenith directory. One repository holds multiple
|
||||
/// timelines, forked off from the same initial call to 'initdb'.
|
||||
pub trait Repository: Send + Sync {
|
||||
fn shutdown(&self) -> Result<()>;
|
||||
|
||||
/// Get Timeline handle for given zenith timeline ID.
|
||||
fn get_timeline(&self, timelineid: ZTimelineId) -> Result<Arc<dyn Timeline>>;
|
||||
|
||||
@@ -119,15 +117,32 @@ pub trait Timeline: Send + Sync {
|
||||
/// Get a list of all existing non-relational objects
|
||||
fn list_nonrels(&self, lsn: Lsn) -> Result<HashSet<RelishTag>>;
|
||||
|
||||
/// Get the LSN where this branch was created
|
||||
fn get_ancestor_lsn(&self) -> Lsn;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Public PUT functions, to update the repository with new page versions.
|
||||
//
|
||||
// These are called by the WAL receiver to digest WAL records.
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/// Put a new page version that can be constructed from a WAL record
|
||||
///
|
||||
/// This will implicitly extend the relation, if the page is beyond the
|
||||
/// current end-of-file.
|
||||
fn put_wal_record(&self, tag: RelishTag, blknum: u32, rec: WALRecord) -> Result<()>;
|
||||
|
||||
/// Like put_wal_record, but with ready-made image of the page.
|
||||
fn put_page_image(&self, tag: RelishTag, blknum: u32, lsn: Lsn, img: Bytes) -> Result<()>;
|
||||
|
||||
/// Truncate relation
|
||||
fn put_truncation(&self, rel: RelishTag, lsn: Lsn, nblocks: u32) -> Result<()>;
|
||||
|
||||
/// This method is used for marking dropped relations and truncated SLRU files and aborted two phase records
|
||||
fn drop_relish(&self, tag: RelishTag, lsn: Lsn) -> Result<()>;
|
||||
|
||||
/// Track end of the latest digested WAL record.
|
||||
///
|
||||
/// Advance requires aligned LSN as an argument and would wake wait_lsn() callers.
|
||||
/// Previous last record LSN is stored alongside the latest and can be read.
|
||||
fn advance_last_record_lsn(&self, lsn: Lsn);
|
||||
/// Atomically get both last and prev.
|
||||
fn get_last_record_rlsn(&self) -> RecordLsn;
|
||||
/// Get last or prev record separately. Same as get_last_record_rlsn().last/prev.
|
||||
@@ -135,9 +150,6 @@ pub trait Timeline: Send + Sync {
|
||||
fn get_prev_record_lsn(&self) -> Lsn;
|
||||
fn get_start_lsn(&self) -> Lsn;
|
||||
|
||||
/// Mutate the timeline with a [`TimelineWriter`].
|
||||
fn writer<'a>(&'a self) -> Box<dyn TimelineWriter + 'a>;
|
||||
|
||||
///
|
||||
/// Flush to disk all data that was written with the put_* functions
|
||||
///
|
||||
@@ -156,35 +168,9 @@ pub trait Timeline: Send + Sync {
|
||||
fn get_current_logical_size_non_incremental(&self, lsn: Lsn) -> Result<usize>;
|
||||
}
|
||||
|
||||
/// Various functions to mutate the timeline.
|
||||
// TODO Currently, Deref is used to allow easy access to read methods from this trait.
|
||||
// This is probably considered a bad practice in Rust and should be fixed eventually,
|
||||
// but will cause large code changes.
|
||||
pub trait TimelineWriter: Deref<Target = dyn Timeline> {
|
||||
/// Put a new page version that can be constructed from a WAL record
|
||||
///
|
||||
/// This will implicitly extend the relation, if the page is beyond the
|
||||
/// current end-of-file.
|
||||
fn put_wal_record(&self, lsn: Lsn, tag: RelishTag, blknum: u32, rec: WALRecord) -> Result<()>;
|
||||
|
||||
/// Like put_wal_record, but with ready-made image of the page.
|
||||
fn put_page_image(&self, tag: RelishTag, blknum: u32, lsn: Lsn, img: Bytes) -> Result<()>;
|
||||
|
||||
/// Truncate relation
|
||||
fn put_truncation(&self, rel: RelishTag, lsn: Lsn, nblocks: u32) -> Result<()>;
|
||||
|
||||
/// This method is used for marking dropped relations and truncated SLRU files and aborted two phase records
|
||||
fn drop_relish(&self, tag: RelishTag, lsn: Lsn) -> Result<()>;
|
||||
|
||||
/// Track end of the latest digested WAL record.
|
||||
///
|
||||
/// Advance requires aligned LSN as an argument and would wake wait_lsn() callers.
|
||||
/// Previous last record LSN is stored alongside the latest and can be read.
|
||||
fn advance_last_record_lsn(&self, lsn: Lsn);
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct WALRecord {
|
||||
pub lsn: Lsn, // LSN at the *end* of the record
|
||||
pub will_init: bool,
|
||||
pub rec: Bytes,
|
||||
// Remember the offset of main_data in rec,
|
||||
@@ -195,19 +181,22 @@ pub struct WALRecord {
|
||||
|
||||
impl WALRecord {
|
||||
pub fn pack(&self, buf: &mut BytesMut) {
|
||||
buf.put_u64(self.lsn.0);
|
||||
buf.put_u8(self.will_init as u8);
|
||||
buf.put_u32(self.main_data_offset);
|
||||
buf.put_u32(self.rec.len() as u32);
|
||||
buf.put_slice(&self.rec[..]);
|
||||
}
|
||||
pub fn unpack(buf: &mut Bytes) -> WALRecord {
|
||||
let lsn = Lsn::from(buf.get_u64());
|
||||
let will_init = buf.get_u8() != 0;
|
||||
let main_data_offset = buf.get_u32();
|
||||
let rec_len = buf.get_u32() as usize;
|
||||
let rec = buf.split_to(rec_len);
|
||||
let mut dst = vec![0u8; buf.get_u32() as usize];
|
||||
buf.copy_to_slice(&mut dst);
|
||||
WALRecord {
|
||||
lsn,
|
||||
will_init,
|
||||
rec,
|
||||
rec: Bytes::from(dst),
|
||||
main_data_offset,
|
||||
}
|
||||
}
|
||||
@@ -220,7 +209,7 @@ impl WALRecord {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::layered_repository::{LayeredRepository, METADATA_FILE_NAME};
|
||||
use crate::layered_repository::LayeredRepository;
|
||||
use crate::walredo::{WalRedoError, WalRedoManager};
|
||||
use crate::PageServerConf;
|
||||
use hex_literal::hex;
|
||||
@@ -317,15 +306,14 @@ mod tests {
|
||||
|
||||
// Create timeline to work on
|
||||
let tline = repo.create_empty_timeline(TIMELINE_ID)?;
|
||||
let writer = tline.writer();
|
||||
|
||||
writer.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
|
||||
writer.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
|
||||
writer.put_page_image(TESTREL_A, 0, Lsn(0x30), TEST_IMG("foo blk 0 at 3"))?;
|
||||
writer.put_page_image(TESTREL_A, 1, Lsn(0x40), TEST_IMG("foo blk 1 at 4"))?;
|
||||
writer.put_page_image(TESTREL_A, 2, Lsn(0x50), TEST_IMG("foo blk 2 at 5"))?;
|
||||
tline.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
|
||||
tline.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
|
||||
tline.put_page_image(TESTREL_A, 0, Lsn(0x30), TEST_IMG("foo blk 0 at 3"))?;
|
||||
tline.put_page_image(TESTREL_A, 1, Lsn(0x40), TEST_IMG("foo blk 1 at 4"))?;
|
||||
tline.put_page_image(TESTREL_A, 2, Lsn(0x50), TEST_IMG("foo blk 2 at 5"))?;
|
||||
|
||||
writer.advance_last_record_lsn(Lsn(0x50));
|
||||
tline.advance_last_record_lsn(Lsn(0x50));
|
||||
|
||||
assert_current_logical_size(&tline, Lsn(0x50));
|
||||
|
||||
@@ -371,8 +359,8 @@ mod tests {
|
||||
);
|
||||
|
||||
// Truncate last block
|
||||
writer.put_truncation(TESTREL_A, Lsn(0x60), 2)?;
|
||||
writer.advance_last_record_lsn(Lsn(0x60));
|
||||
tline.put_truncation(TESTREL_A, Lsn(0x60), 2)?;
|
||||
tline.advance_last_record_lsn(Lsn(0x60));
|
||||
assert_current_logical_size(&tline, Lsn(0x60));
|
||||
|
||||
// Check reported size and contents after truncation
|
||||
@@ -394,13 +382,13 @@ mod tests {
|
||||
);
|
||||
|
||||
// Truncate to zero length
|
||||
writer.put_truncation(TESTREL_A, Lsn(0x68), 0)?;
|
||||
writer.advance_last_record_lsn(Lsn(0x68));
|
||||
tline.put_truncation(TESTREL_A, Lsn(0x68), 0)?;
|
||||
tline.advance_last_record_lsn(Lsn(0x68));
|
||||
assert_eq!(tline.get_relish_size(TESTREL_A, Lsn(0x68))?.unwrap(), 0);
|
||||
|
||||
// Extend from 0 to 2 blocks, leaving a gap
|
||||
writer.put_page_image(TESTREL_A, 1, Lsn(0x70), TEST_IMG("foo blk 1"))?;
|
||||
writer.advance_last_record_lsn(Lsn(0x70));
|
||||
tline.put_page_image(TESTREL_A, 1, Lsn(0x70), TEST_IMG("foo blk 1"))?;
|
||||
tline.advance_last_record_lsn(Lsn(0x70));
|
||||
assert_eq!(tline.get_relish_size(TESTREL_A, Lsn(0x70))?.unwrap(), 2);
|
||||
assert_eq!(tline.get_page_at_lsn(TESTREL_A, 0, Lsn(0x70))?, ZERO_PAGE);
|
||||
assert_eq!(
|
||||
@@ -435,26 +423,25 @@ mod tests {
|
||||
|
||||
// Create timeline to work on
|
||||
let tline = repo.create_empty_timeline(TIMELINE_ID)?;
|
||||
let writer = tline.writer();
|
||||
|
||||
writer.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
|
||||
writer.advance_last_record_lsn(Lsn(0x20));
|
||||
tline.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
|
||||
tline.advance_last_record_lsn(Lsn(0x20));
|
||||
|
||||
// Check that rel exists and size is correct
|
||||
assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x20))?, true);
|
||||
assert_eq!(tline.get_relish_size(TESTREL_A, Lsn(0x20))?.unwrap(), 1);
|
||||
|
||||
// Drop relish
|
||||
writer.drop_relish(TESTREL_A, Lsn(0x30))?;
|
||||
writer.advance_last_record_lsn(Lsn(0x30));
|
||||
tline.drop_relish(TESTREL_A, Lsn(0x30))?;
|
||||
tline.advance_last_record_lsn(Lsn(0x30));
|
||||
|
||||
// Check that rel is not visible anymore
|
||||
assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x30))?, false);
|
||||
assert!(tline.get_relish_size(TESTREL_A, Lsn(0x30))?.is_none());
|
||||
|
||||
// Extend it again
|
||||
writer.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("foo blk 0 at 4"))?;
|
||||
writer.advance_last_record_lsn(Lsn(0x40));
|
||||
tline.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("foo blk 0 at 4"))?;
|
||||
tline.advance_last_record_lsn(Lsn(0x40));
|
||||
|
||||
// Check that rel exists and size is correct
|
||||
assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x40))?, true);
|
||||
@@ -472,7 +459,6 @@ mod tests {
|
||||
|
||||
// Create timeline to work on
|
||||
let tline = repo.create_empty_timeline(TIMELINE_ID)?;
|
||||
let writer = tline.writer();
|
||||
|
||||
//from storage_layer.rs
|
||||
const RELISH_SEG_SIZE: u32 = 10 * 1024 * 1024 / 8192;
|
||||
@@ -482,10 +468,10 @@ mod tests {
|
||||
for blkno in 0..relsize {
|
||||
let lsn = Lsn(0x20);
|
||||
let data = format!("foo blk {} at {}", blkno, lsn);
|
||||
writer.put_page_image(TESTREL_A, blkno, lsn, TEST_IMG(&data))?;
|
||||
tline.put_page_image(TESTREL_A, blkno, lsn, TEST_IMG(&data))?;
|
||||
}
|
||||
|
||||
writer.advance_last_record_lsn(Lsn(0x20));
|
||||
tline.advance_last_record_lsn(Lsn(0x20));
|
||||
|
||||
// The relation was created at LSN 2, not visible at LSN 1 yet.
|
||||
assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x10))?, false);
|
||||
@@ -509,8 +495,8 @@ mod tests {
|
||||
|
||||
// Truncate relation so that second segment was dropped
|
||||
// - only leave one page
|
||||
writer.put_truncation(TESTREL_A, Lsn(0x60), 1)?;
|
||||
writer.advance_last_record_lsn(Lsn(0x60));
|
||||
tline.put_truncation(TESTREL_A, Lsn(0x60), 1)?;
|
||||
tline.advance_last_record_lsn(Lsn(0x60));
|
||||
|
||||
// Check reported size and contents after truncation
|
||||
assert_eq!(tline.get_relish_size(TESTREL_A, Lsn(0x60))?.unwrap(), 1);
|
||||
@@ -543,9 +529,9 @@ mod tests {
|
||||
for blkno in 0..relsize {
|
||||
let lsn = Lsn(0x80);
|
||||
let data = format!("foo blk {} at {}", blkno, lsn);
|
||||
writer.put_page_image(TESTREL_A, blkno, lsn, TEST_IMG(&data))?;
|
||||
tline.put_page_image(TESTREL_A, blkno, lsn, TEST_IMG(&data))?;
|
||||
}
|
||||
writer.advance_last_record_lsn(Lsn(0x80));
|
||||
tline.advance_last_record_lsn(Lsn(0x80));
|
||||
|
||||
assert_eq!(tline.get_rel_exists(TESTREL_A, Lsn(0x80))?, true);
|
||||
assert_eq!(
|
||||
@@ -571,15 +557,14 @@ mod tests {
|
||||
fn test_large_rel() -> Result<()> {
|
||||
let repo = RepoHarness::create("test_large_rel")?.load();
|
||||
let tline = repo.create_empty_timeline(TIMELINE_ID)?;
|
||||
let writer = tline.writer();
|
||||
|
||||
let mut lsn = 0x10;
|
||||
for blknum in 0..pg_constants::RELSEG_SIZE + 1 {
|
||||
let img = TEST_IMG(&format!("foo blk {} at {}", blknum, Lsn(lsn)));
|
||||
lsn += 0x10;
|
||||
writer.put_page_image(TESTREL_A, blknum as u32, Lsn(lsn), img)?;
|
||||
tline.put_page_image(TESTREL_A, blknum as u32, Lsn(lsn), img)?;
|
||||
}
|
||||
writer.advance_last_record_lsn(Lsn(lsn));
|
||||
tline.advance_last_record_lsn(Lsn(lsn));
|
||||
|
||||
assert_current_logical_size(&tline, Lsn(lsn));
|
||||
|
||||
@@ -590,8 +575,8 @@ mod tests {
|
||||
|
||||
// Truncate one block
|
||||
lsn += 0x10;
|
||||
writer.put_truncation(TESTREL_A, Lsn(lsn), pg_constants::RELSEG_SIZE)?;
|
||||
writer.advance_last_record_lsn(Lsn(lsn));
|
||||
tline.put_truncation(TESTREL_A, Lsn(lsn), pg_constants::RELSEG_SIZE)?;
|
||||
tline.advance_last_record_lsn(Lsn(lsn));
|
||||
assert_eq!(
|
||||
tline.get_relish_size(TESTREL_A, Lsn(lsn))?.unwrap(),
|
||||
pg_constants::RELSEG_SIZE
|
||||
@@ -600,8 +585,8 @@ mod tests {
|
||||
|
||||
// Truncate another block
|
||||
lsn += 0x10;
|
||||
writer.put_truncation(TESTREL_A, Lsn(lsn), pg_constants::RELSEG_SIZE - 1)?;
|
||||
writer.advance_last_record_lsn(Lsn(lsn));
|
||||
tline.put_truncation(TESTREL_A, Lsn(lsn), pg_constants::RELSEG_SIZE - 1)?;
|
||||
tline.advance_last_record_lsn(Lsn(lsn));
|
||||
assert_eq!(
|
||||
tline.get_relish_size(TESTREL_A, Lsn(lsn))?.unwrap(),
|
||||
pg_constants::RELSEG_SIZE - 1
|
||||
@@ -613,8 +598,8 @@ mod tests {
|
||||
let mut size: i32 = 3000;
|
||||
while size >= 0 {
|
||||
lsn += 0x10;
|
||||
writer.put_truncation(TESTREL_A, Lsn(lsn), size as u32)?;
|
||||
writer.advance_last_record_lsn(Lsn(lsn));
|
||||
tline.put_truncation(TESTREL_A, Lsn(lsn), size as u32)?;
|
||||
tline.advance_last_record_lsn(Lsn(lsn));
|
||||
assert_eq!(
|
||||
tline.get_relish_size(TESTREL_A, Lsn(lsn))?.unwrap(),
|
||||
size as u32
|
||||
@@ -634,17 +619,16 @@ mod tests {
|
||||
fn test_list_rels_drop() -> Result<()> {
|
||||
let repo = RepoHarness::create("test_list_rels_drop")?.load();
|
||||
let tline = repo.create_empty_timeline(TIMELINE_ID)?;
|
||||
let writer = tline.writer();
|
||||
const TESTDB: u32 = 111;
|
||||
|
||||
// Import initial dummy checkpoint record, otherwise the get_timeline() call
|
||||
// after branching fails below
|
||||
writer.put_page_image(RelishTag::Checkpoint, 0, Lsn(0x10), ZERO_CHECKPOINT.clone())?;
|
||||
tline.put_page_image(RelishTag::Checkpoint, 0, Lsn(0x10), ZERO_CHECKPOINT.clone())?;
|
||||
|
||||
// Create a relation on the timeline
|
||||
writer.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
|
||||
tline.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
|
||||
|
||||
writer.advance_last_record_lsn(Lsn(0x30));
|
||||
tline.advance_last_record_lsn(Lsn(0x30));
|
||||
|
||||
// Check that list_rels() lists it after LSN 2, but no before it
|
||||
assert!(!tline.list_rels(0, TESTDB, Lsn(0x10))?.contains(&TESTREL_A));
|
||||
@@ -654,17 +638,14 @@ mod tests {
|
||||
// Create a branch, check that the relation is visible there
|
||||
repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x30))?;
|
||||
let newtline = repo.get_timeline(NEW_TIMELINE_ID)?;
|
||||
let new_writer = newtline.writer();
|
||||
|
||||
assert!(newtline
|
||||
.list_rels(0, TESTDB, Lsn(0x30))?
|
||||
.contains(&TESTREL_A));
|
||||
|
||||
// Drop it on the branch
|
||||
new_writer.drop_relish(TESTREL_A, Lsn(0x40))?;
|
||||
new_writer.advance_last_record_lsn(Lsn(0x40));
|
||||
|
||||
drop(new_writer);
|
||||
newtline.drop_relish(TESTREL_A, Lsn(0x40))?;
|
||||
newtline.advance_last_record_lsn(Lsn(0x40));
|
||||
|
||||
// Check that it's no longer listed on the branch after the point where it was dropped
|
||||
assert!(newtline
|
||||
@@ -692,30 +673,28 @@ mod tests {
|
||||
fn test_branch() -> Result<()> {
|
||||
let repo = RepoHarness::create("test_branch")?.load();
|
||||
let tline = repo.create_empty_timeline(TIMELINE_ID)?;
|
||||
let writer = tline.writer();
|
||||
|
||||
// Import initial dummy checkpoint record, otherwise the get_timeline() call
|
||||
// after branching fails below
|
||||
writer.put_page_image(RelishTag::Checkpoint, 0, Lsn(0x10), ZERO_CHECKPOINT.clone())?;
|
||||
tline.put_page_image(RelishTag::Checkpoint, 0, Lsn(0x10), ZERO_CHECKPOINT.clone())?;
|
||||
|
||||
// Create a relation on the timeline
|
||||
writer.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
|
||||
writer.put_page_image(TESTREL_A, 0, Lsn(0x30), TEST_IMG("foo blk 0 at 3"))?;
|
||||
writer.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("foo blk 0 at 4"))?;
|
||||
tline.put_page_image(TESTREL_A, 0, Lsn(0x20), TEST_IMG("foo blk 0 at 2"))?;
|
||||
tline.put_page_image(TESTREL_A, 0, Lsn(0x30), TEST_IMG("foo blk 0 at 3"))?;
|
||||
tline.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("foo blk 0 at 4"))?;
|
||||
|
||||
// Create another relation
|
||||
writer.put_page_image(TESTREL_B, 0, Lsn(0x20), TEST_IMG("foobar blk 0 at 2"))?;
|
||||
tline.put_page_image(TESTREL_B, 0, Lsn(0x20), TEST_IMG("foobar blk 0 at 2"))?;
|
||||
|
||||
writer.advance_last_record_lsn(Lsn(0x40));
|
||||
tline.advance_last_record_lsn(Lsn(0x40));
|
||||
assert_current_logical_size(&tline, Lsn(0x40));
|
||||
|
||||
// Branch the history, modify relation differently on the new timeline
|
||||
repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x30))?;
|
||||
let newtline = repo.get_timeline(NEW_TIMELINE_ID)?;
|
||||
let new_writer = newtline.writer();
|
||||
|
||||
new_writer.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("bar blk 0 at 4"))?;
|
||||
new_writer.advance_last_record_lsn(Lsn(0x40));
|
||||
newtline.put_page_image(TESTREL_A, 0, Lsn(0x40), TEST_IMG("bar blk 0 at 4"))?;
|
||||
newtline.advance_last_record_lsn(Lsn(0x40));
|
||||
|
||||
// Check page contents on both branches
|
||||
assert_eq!(
|
||||
@@ -749,7 +728,7 @@ mod tests {
|
||||
repo.create_empty_timeline(TIMELINE_ID)?;
|
||||
drop(repo);
|
||||
|
||||
let metadata_path = harness.timeline_path(&TIMELINE_ID).join(METADATA_FILE_NAME);
|
||||
let metadata_path = harness.timeline_path(&TIMELINE_ID).join("metadata");
|
||||
|
||||
assert!(metadata_path.is_file());
|
||||
|
||||
@@ -831,7 +810,7 @@ mod tests {
|
||||
blknum: u32,
|
||||
lsn: Lsn,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, WALRecord)>,
|
||||
records: Vec<WALRecord>,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
let s = format!(
|
||||
"redo for {} blk {} to get to {}, with {} and {} records",
|
||||
|
||||
@@ -2,17 +2,17 @@
|
||||
//! Import data and WAL from a PostgreSQL data directory and WAL segments into
|
||||
//! zenith Timeline.
|
||||
//!
|
||||
use log::*;
|
||||
use postgres_ffi::nonrelfile_utils::clogpage_precedes;
|
||||
use postgres_ffi::nonrelfile_utils::slru_may_delete_clogsegment;
|
||||
use std::cmp::min;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use anyhow::{bail, Result};
|
||||
use bytes::{Buf, Bytes};
|
||||
use tracing::*;
|
||||
|
||||
use crate::relish::*;
|
||||
use crate::repository::*;
|
||||
@@ -34,11 +34,9 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; 8192]);
|
||||
///
|
||||
pub fn import_timeline_from_postgres_datadir(
|
||||
path: &Path,
|
||||
writer: &dyn TimelineWriter,
|
||||
timeline: &dyn Timeline,
|
||||
lsn: Lsn,
|
||||
) -> Result<()> {
|
||||
let mut pg_control: Option<ControlFileData> = None;
|
||||
|
||||
// Scan 'global'
|
||||
for direntry in fs::read_dir(path.join("global"))? {
|
||||
let direntry = direntry?;
|
||||
@@ -46,10 +44,10 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
None => continue,
|
||||
|
||||
Some("pg_control") => {
|
||||
pg_control = Some(import_control_file(writer, lsn, &direntry.path())?);
|
||||
import_control_file(timeline, lsn, &direntry.path())?;
|
||||
}
|
||||
Some("pg_filenode.map") => import_nonrel_file(
|
||||
writer,
|
||||
timeline,
|
||||
lsn,
|
||||
RelishTag::FileNodeMap {
|
||||
spcnode: pg_constants::GLOBALTABLESPACE_OID,
|
||||
@@ -61,7 +59,7 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
// Load any relation files into the page server
|
||||
_ => import_relfile(
|
||||
&direntry.path(),
|
||||
writer,
|
||||
timeline,
|
||||
lsn,
|
||||
pg_constants::GLOBALTABLESPACE_OID,
|
||||
0,
|
||||
@@ -88,7 +86,7 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
|
||||
Some("PG_VERSION") => continue,
|
||||
Some("pg_filenode.map") => import_nonrel_file(
|
||||
writer,
|
||||
timeline,
|
||||
lsn,
|
||||
RelishTag::FileNodeMap {
|
||||
spcnode: pg_constants::DEFAULTTABLESPACE_OID,
|
||||
@@ -100,7 +98,7 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
// Load any relation files into the page server
|
||||
_ => import_relfile(
|
||||
&direntry.path(),
|
||||
writer,
|
||||
timeline,
|
||||
lsn,
|
||||
pg_constants::DEFAULTTABLESPACE_OID,
|
||||
dboid,
|
||||
@@ -110,36 +108,24 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
}
|
||||
for entry in fs::read_dir(path.join("pg_xact"))? {
|
||||
let entry = entry?;
|
||||
import_slru_file(writer, lsn, SlruKind::Clog, &entry.path())?;
|
||||
import_slru_file(timeline, lsn, SlruKind::Clog, &entry.path())?;
|
||||
}
|
||||
for entry in fs::read_dir(path.join("pg_multixact").join("members"))? {
|
||||
let entry = entry?;
|
||||
import_slru_file(writer, lsn, SlruKind::MultiXactMembers, &entry.path())?;
|
||||
import_slru_file(timeline, lsn, SlruKind::MultiXactMembers, &entry.path())?;
|
||||
}
|
||||
for entry in fs::read_dir(path.join("pg_multixact").join("offsets"))? {
|
||||
let entry = entry?;
|
||||
import_slru_file(writer, lsn, SlruKind::MultiXactOffsets, &entry.path())?;
|
||||
import_slru_file(timeline, lsn, SlruKind::MultiXactOffsets, &entry.path())?;
|
||||
}
|
||||
for entry in fs::read_dir(path.join("pg_twophase"))? {
|
||||
let entry = entry?;
|
||||
let xid = u32::from_str_radix(entry.path().to_str().unwrap(), 16)?;
|
||||
import_nonrel_file(writer, lsn, RelishTag::TwoPhase { xid }, &entry.path())?;
|
||||
import_nonrel_file(timeline, lsn, RelishTag::TwoPhase { xid }, &entry.path())?;
|
||||
}
|
||||
// TODO: Scan pg_tblspc
|
||||
|
||||
writer.advance_last_record_lsn(lsn);
|
||||
|
||||
// Import WAL. This is needed even when starting from a shutdown checkpoint, because
|
||||
// this reads the checkpoint record itself, advancing the tip of the timeline to
|
||||
// *after* the checkpoint record. And crucially, it initializes the 'prev_lsn'
|
||||
let pg_control = pg_control.ok_or_else(|| anyhow!("pg_control file not found"))?;
|
||||
import_wal(
|
||||
&path.join("pg_wal"),
|
||||
writer,
|
||||
Lsn(pg_control.checkPointCopy.redo),
|
||||
lsn,
|
||||
&mut pg_control.checkPointCopy.clone(),
|
||||
)?;
|
||||
timeline.advance_last_record_lsn(lsn);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -147,13 +133,12 @@ pub fn import_timeline_from_postgres_datadir(
|
||||
// subroutine of import_timeline_from_postgres_datadir(), to load one relation file.
|
||||
fn import_relfile(
|
||||
path: &Path,
|
||||
timeline: &dyn TimelineWriter,
|
||||
timeline: &dyn Timeline,
|
||||
lsn: Lsn,
|
||||
spcoid: Oid,
|
||||
dboid: Oid,
|
||||
) -> Result<()> {
|
||||
// Does it look like a relation file?
|
||||
trace!("importing rel file {}", path.display());
|
||||
|
||||
let p = parse_relfilename(path.file_name().unwrap().to_str().unwrap());
|
||||
if let Err(e) = p {
|
||||
@@ -181,14 +166,14 @@ fn import_relfile(
|
||||
}
|
||||
|
||||
// TODO: UnexpectedEof is expected
|
||||
Err(err) => match err.kind() {
|
||||
Err(e) => match e.kind() {
|
||||
std::io::ErrorKind::UnexpectedEof => {
|
||||
// reached EOF. That's expected.
|
||||
// FIXME: maybe check that we read the full length of the file?
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
bail!("error reading file {}: {:#}", path.display(), err);
|
||||
bail!("error reading file {}: {:#}", path.display(), e);
|
||||
}
|
||||
},
|
||||
};
|
||||
@@ -205,7 +190,7 @@ fn import_relfile(
|
||||
/// are just slurped into the repository as one blob.
|
||||
///
|
||||
fn import_nonrel_file(
|
||||
timeline: &dyn TimelineWriter,
|
||||
timeline: &dyn Timeline,
|
||||
lsn: Lsn,
|
||||
tag: RelishTag,
|
||||
path: &Path,
|
||||
@@ -215,7 +200,7 @@ fn import_nonrel_file(
|
||||
// read the whole file
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
trace!("importing non-rel file {}", path.display());
|
||||
info!("importing non-rel file {}", path.display());
|
||||
|
||||
timeline.put_page_image(tag, 0, lsn, Bytes::copy_from_slice(&buffer[..]))?;
|
||||
Ok(())
|
||||
@@ -226,17 +211,13 @@ fn import_nonrel_file(
|
||||
///
|
||||
/// The control file is imported as is, but we also extract the checkpoint record
|
||||
/// from it and store it separated.
|
||||
fn import_control_file(
|
||||
timeline: &dyn TimelineWriter,
|
||||
lsn: Lsn,
|
||||
path: &Path,
|
||||
) -> Result<ControlFileData> {
|
||||
fn import_control_file(timeline: &dyn Timeline, lsn: Lsn, path: &Path) -> Result<()> {
|
||||
let mut file = File::open(path)?;
|
||||
let mut buffer = Vec::new();
|
||||
// read the whole file
|
||||
file.read_to_end(&mut buffer)?;
|
||||
|
||||
trace!("importing control file {}", path.display());
|
||||
info!("importing control file {}", path.display());
|
||||
|
||||
// Import it as ControlFile
|
||||
timeline.put_page_image(
|
||||
@@ -251,24 +232,19 @@ fn import_control_file(
|
||||
let checkpoint_bytes = pg_control.checkPointCopy.encode();
|
||||
timeline.put_page_image(RelishTag::Checkpoint, 0, lsn, checkpoint_bytes)?;
|
||||
|
||||
Ok(pg_control)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Import an SLRU segment file
|
||||
///
|
||||
fn import_slru_file(
|
||||
timeline: &dyn TimelineWriter,
|
||||
lsn: Lsn,
|
||||
slru: SlruKind,
|
||||
path: &Path,
|
||||
) -> Result<()> {
|
||||
fn import_slru_file(timeline: &dyn Timeline, lsn: Lsn, slru: SlruKind, path: &Path) -> Result<()> {
|
||||
// Does it look like an SLRU file?
|
||||
let mut file = File::open(path)?;
|
||||
let mut buf: [u8; 8192] = [0u8; 8192];
|
||||
let segno = u32::from_str_radix(path.file_name().unwrap().to_str().unwrap(), 16)?;
|
||||
|
||||
trace!("importing slru file {}", path.display());
|
||||
info!("importing slru file {}", path.display());
|
||||
|
||||
let mut rpageno = 0;
|
||||
loop {
|
||||
@@ -284,14 +260,14 @@ fn import_slru_file(
|
||||
}
|
||||
|
||||
// TODO: UnexpectedEof is expected
|
||||
Err(err) => match err.kind() {
|
||||
Err(e) => match e.kind() {
|
||||
std::io::ErrorKind::UnexpectedEof => {
|
||||
// reached EOF. That's expected.
|
||||
// FIXME: maybe check that we read the full length of the file?
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
bail!("error reading file {}: {:#}", path.display(), err);
|
||||
bail!("error reading file {}: {:#}", path.display(), e);
|
||||
}
|
||||
},
|
||||
};
|
||||
@@ -303,119 +279,18 @@ fn import_slru_file(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scan PostgreSQL WAL files in given directory and load all records between
|
||||
/// 'startpoint' and 'endpoint' into the repository.
|
||||
fn import_wal(
|
||||
walpath: &Path,
|
||||
timeline: &dyn TimelineWriter,
|
||||
startpoint: Lsn,
|
||||
endpoint: Lsn,
|
||||
checkpoint: &mut CheckPoint,
|
||||
) -> Result<()> {
|
||||
let mut waldecoder = WalStreamDecoder::new(startpoint);
|
||||
|
||||
let mut segno = startpoint.segment_number(pg_constants::WAL_SEGMENT_SIZE);
|
||||
let mut offset = startpoint.segment_offset(pg_constants::WAL_SEGMENT_SIZE);
|
||||
let mut last_lsn = startpoint;
|
||||
|
||||
while last_lsn <= endpoint {
|
||||
// FIXME: assume postgresql tli 1 for now
|
||||
let filename = XLogFileName(1, segno, pg_constants::WAL_SEGMENT_SIZE);
|
||||
let mut buf = Vec::new();
|
||||
|
||||
// Read local file
|
||||
let mut path = walpath.join(&filename);
|
||||
|
||||
// It could be as .partial
|
||||
if !PathBuf::from(&path).exists() {
|
||||
path = walpath.join(filename + ".partial");
|
||||
}
|
||||
|
||||
// Slurp the WAL file
|
||||
let mut file = File::open(&path)?;
|
||||
|
||||
if offset > 0 {
|
||||
file.seek(SeekFrom::Start(offset as u64))?;
|
||||
}
|
||||
|
||||
let nread = file.read_to_end(&mut buf)?;
|
||||
if nread != pg_constants::WAL_SEGMENT_SIZE - offset as usize {
|
||||
// Maybe allow this for .partial files?
|
||||
error!("read only {} bytes from WAL file", nread);
|
||||
}
|
||||
|
||||
waldecoder.feed_bytes(&buf);
|
||||
|
||||
let mut nrecords = 0;
|
||||
while last_lsn <= endpoint {
|
||||
if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
|
||||
let mut checkpoint_modified = false;
|
||||
|
||||
let decoded = decode_wal_record(recdata.clone());
|
||||
save_decoded_record(
|
||||
checkpoint,
|
||||
&mut checkpoint_modified,
|
||||
timeline,
|
||||
&decoded,
|
||||
recdata,
|
||||
lsn,
|
||||
)?;
|
||||
last_lsn = lsn;
|
||||
|
||||
if checkpoint_modified {
|
||||
let checkpoint_bytes = checkpoint.encode();
|
||||
timeline.put_page_image(
|
||||
RelishTag::Checkpoint,
|
||||
0,
|
||||
last_lsn,
|
||||
checkpoint_bytes,
|
||||
)?;
|
||||
}
|
||||
|
||||
// Now that this record has been fully handled, including updating the
|
||||
// checkpoint data, let the repository know that it is up-to-date to this LSN
|
||||
timeline.advance_last_record_lsn(last_lsn);
|
||||
nrecords += 1;
|
||||
|
||||
trace!("imported record at {} (end {})", lsn, endpoint);
|
||||
}
|
||||
}
|
||||
|
||||
debug!("imported {} records up to {}", nrecords, last_lsn);
|
||||
|
||||
segno += 1;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
if last_lsn != startpoint {
|
||||
debug!(
|
||||
"reached end of WAL at {}, updating checkpoint info",
|
||||
last_lsn
|
||||
);
|
||||
|
||||
timeline.advance_last_record_lsn(last_lsn);
|
||||
} else {
|
||||
info!("no WAL to import at {}", last_lsn);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Helper function to parse a WAL record and call the Timeline's PUT functions for all the
|
||||
/// relations/pages that the record affects.
|
||||
///
|
||||
pub fn save_decoded_record(
|
||||
checkpoint: &mut CheckPoint,
|
||||
checkpoint_modified: &mut bool,
|
||||
timeline: &dyn TimelineWriter,
|
||||
timeline: &dyn Timeline,
|
||||
decoded: &DecodedWALRecord,
|
||||
recdata: Bytes,
|
||||
lsn: Lsn,
|
||||
) -> Result<()> {
|
||||
if checkpoint.update_next_xid(decoded.xl_xid) {
|
||||
*checkpoint_modified = true;
|
||||
}
|
||||
checkpoint.update_next_xid(decoded.xl_xid);
|
||||
|
||||
// Iterate through all the blocks that the record modifies, and
|
||||
// "put" a separate copy of the record for each block.
|
||||
@@ -428,12 +303,13 @@ pub fn save_decoded_record(
|
||||
});
|
||||
|
||||
let rec = WALRecord {
|
||||
lsn,
|
||||
will_init: blk.will_init || blk.apply_image,
|
||||
rec: recdata.clone(),
|
||||
main_data_offset: decoded.main_data_offset as u32,
|
||||
};
|
||||
|
||||
timeline.put_wal_record(lsn, tag, blk.blkno, rec)?;
|
||||
timeline.put_wal_record(tag, blk.blkno, rec)?;
|
||||
}
|
||||
|
||||
let mut buf = decoded.record.clone();
|
||||
@@ -498,7 +374,7 @@ pub fn save_decoded_record(
|
||||
} else {
|
||||
assert!(info == pg_constants::CLOG_TRUNCATE);
|
||||
let xlrec = XlClogTruncate::decode(&mut buf);
|
||||
save_clog_truncate_record(checkpoint, checkpoint_modified, timeline, lsn, &xlrec)?;
|
||||
save_clog_truncate_record(checkpoint, timeline, lsn, &xlrec)?;
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_XACT_ID {
|
||||
let info = decoded.xl_info & pg_constants::XLOG_XACT_OPMASK;
|
||||
@@ -567,17 +443,10 @@ pub fn save_decoded_record(
|
||||
)?;
|
||||
} else if info == pg_constants::XLOG_MULTIXACT_CREATE_ID {
|
||||
let xlrec = XlMultiXactCreate::decode(&mut buf);
|
||||
save_multixact_create_record(
|
||||
checkpoint,
|
||||
checkpoint_modified,
|
||||
timeline,
|
||||
lsn,
|
||||
&xlrec,
|
||||
decoded,
|
||||
)?;
|
||||
save_multixact_create_record(checkpoint, timeline, lsn, &xlrec, decoded)?;
|
||||
} else if info == pg_constants::XLOG_MULTIXACT_TRUNCATE_ID {
|
||||
let xlrec = XlMultiXactTruncate::decode(&mut buf);
|
||||
save_multixact_truncate_record(checkpoint, checkpoint_modified, timeline, lsn, &xlrec)?;
|
||||
save_multixact_truncate_record(checkpoint, timeline, lsn, &xlrec)?;
|
||||
}
|
||||
} else if decoded.xl_rmid == pg_constants::RM_RELMAP_ID {
|
||||
let xlrec = XlRelmapUpdate::decode(&mut buf);
|
||||
@@ -586,10 +455,7 @@ pub fn save_decoded_record(
|
||||
let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
|
||||
if info == pg_constants::XLOG_NEXTOID {
|
||||
let next_oid = buf.get_u32_le();
|
||||
if checkpoint.nextOid != next_oid {
|
||||
checkpoint.nextOid = next_oid;
|
||||
*checkpoint_modified = true;
|
||||
}
|
||||
checkpoint.nextOid = next_oid;
|
||||
} else if info == pg_constants::XLOG_CHECKPOINT_ONLINE
|
||||
|| info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
|
||||
{
|
||||
@@ -605,7 +471,6 @@ pub fn save_decoded_record(
|
||||
);
|
||||
if (checkpoint.oldestXid.wrapping_sub(xlog_checkpoint.oldestXid) as i32) < 0 {
|
||||
checkpoint.oldestXid = xlog_checkpoint.oldestXid;
|
||||
*checkpoint_modified = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -613,11 +478,7 @@ pub fn save_decoded_record(
|
||||
}
|
||||
|
||||
/// Subroutine of save_decoded_record(), to handle an XLOG_DBASE_CREATE record.
|
||||
fn save_xlog_dbase_create(
|
||||
timeline: &dyn TimelineWriter,
|
||||
lsn: Lsn,
|
||||
rec: &XlCreateDatabase,
|
||||
) -> Result<()> {
|
||||
fn save_xlog_dbase_create(timeline: &dyn Timeline, lsn: Lsn, rec: &XlCreateDatabase) -> Result<()> {
|
||||
let db_id = rec.db_id;
|
||||
let tablespace_id = rec.tablespace_id;
|
||||
let src_db_id = rec.src_db_id;
|
||||
@@ -694,11 +555,7 @@ fn save_xlog_dbase_create(
|
||||
/// Subroutine of save_decoded_record(), to handle an XLOG_SMGR_TRUNCATE record.
|
||||
///
|
||||
/// This is the same logic as in PostgreSQL's smgr_redo() function.
|
||||
fn save_xlog_smgr_truncate(
|
||||
timeline: &dyn TimelineWriter,
|
||||
lsn: Lsn,
|
||||
rec: &XlSmgrTruncate,
|
||||
) -> Result<()> {
|
||||
fn save_xlog_smgr_truncate(timeline: &dyn Timeline, lsn: Lsn, rec: &XlSmgrTruncate) -> Result<()> {
|
||||
let spcnode = rec.rnode.spcnode;
|
||||
let dbnode = rec.rnode.dbnode;
|
||||
let relnode = rec.rnode.relnode;
|
||||
@@ -760,7 +617,7 @@ fn save_xlog_smgr_truncate(
|
||||
/// Subroutine of save_decoded_record(), to handle an XLOG_XACT_* records.
|
||||
///
|
||||
fn save_xact_record(
|
||||
timeline: &dyn TimelineWriter,
|
||||
timeline: &dyn Timeline,
|
||||
lsn: Lsn,
|
||||
parsed: &XlXactParsedRecord,
|
||||
decoded: &DecodedWALRecord,
|
||||
@@ -771,12 +628,12 @@ fn save_xact_record(
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rec = WALRecord {
|
||||
lsn,
|
||||
will_init: false,
|
||||
rec: decoded.record.clone(),
|
||||
main_data_offset: decoded.main_data_offset as u32,
|
||||
};
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
RelishTag::Slru {
|
||||
slru: SlruKind::Clog,
|
||||
segno,
|
||||
@@ -792,7 +649,6 @@ fn save_xact_record(
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
RelishTag::Slru {
|
||||
slru: SlruKind::Clog,
|
||||
segno,
|
||||
@@ -818,8 +674,7 @@ fn save_xact_record(
|
||||
|
||||
fn save_clog_truncate_record(
|
||||
checkpoint: &mut CheckPoint,
|
||||
checkpoint_modified: &mut bool,
|
||||
timeline: &dyn TimelineWriter,
|
||||
timeline: &dyn Timeline,
|
||||
lsn: Lsn,
|
||||
xlrec: &XlClogTruncate,
|
||||
) -> Result<()> {
|
||||
@@ -837,7 +692,6 @@ fn save_clog_truncate_record(
|
||||
// TODO Figure out if there will be any issues with replica.
|
||||
checkpoint.oldestXid = xlrec.oldest_xid;
|
||||
checkpoint.oldestXidDB = xlrec.oldest_xid_db;
|
||||
*checkpoint_modified = true;
|
||||
|
||||
// TODO Treat AdvanceOldestClogXid() or write a comment why we don't need it
|
||||
|
||||
@@ -880,13 +734,13 @@ fn save_clog_truncate_record(
|
||||
|
||||
fn save_multixact_create_record(
|
||||
checkpoint: &mut CheckPoint,
|
||||
checkpoint_modified: &mut bool,
|
||||
timeline: &dyn TimelineWriter,
|
||||
timeline: &dyn Timeline,
|
||||
lsn: Lsn,
|
||||
xlrec: &XlMultiXactCreate,
|
||||
decoded: &DecodedWALRecord,
|
||||
) -> Result<()> {
|
||||
let rec = WALRecord {
|
||||
lsn,
|
||||
will_init: false,
|
||||
rec: decoded.record.clone(),
|
||||
main_data_offset: decoded.main_data_offset as u32,
|
||||
@@ -895,7 +749,6 @@ fn save_multixact_create_record(
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
RelishTag::Slru {
|
||||
slru: SlruKind::MultiXactOffsets,
|
||||
segno,
|
||||
@@ -915,7 +768,6 @@ fn save_multixact_create_record(
|
||||
let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
|
||||
timeline.put_wal_record(
|
||||
lsn,
|
||||
RelishTag::Slru {
|
||||
slru: SlruKind::MultiXactMembers,
|
||||
segno,
|
||||
@@ -938,11 +790,9 @@ fn save_multixact_create_record(
|
||||
}
|
||||
if xlrec.mid >= checkpoint.nextMulti {
|
||||
checkpoint.nextMulti = xlrec.mid + 1;
|
||||
*checkpoint_modified = true;
|
||||
}
|
||||
if xlrec.moff + xlrec.nmembers > checkpoint.nextMultiOffset {
|
||||
checkpoint.nextMultiOffset = xlrec.moff + xlrec.nmembers;
|
||||
*checkpoint_modified = true;
|
||||
}
|
||||
let max_mbr_xid = xlrec.members.iter().fold(0u32, |acc, mbr| {
|
||||
if mbr.xid.wrapping_sub(acc) as i32 > 0 {
|
||||
@@ -952,22 +802,18 @@ fn save_multixact_create_record(
|
||||
}
|
||||
});
|
||||
|
||||
if checkpoint.update_next_xid(max_mbr_xid) {
|
||||
*checkpoint_modified = true;
|
||||
}
|
||||
checkpoint.update_next_xid(max_mbr_xid);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn save_multixact_truncate_record(
|
||||
checkpoint: &mut CheckPoint,
|
||||
checkpoint_modified: &mut bool,
|
||||
timeline: &dyn TimelineWriter,
|
||||
timeline: &dyn Timeline,
|
||||
lsn: Lsn,
|
||||
xlrec: &XlMultiXactTruncate,
|
||||
) -> Result<()> {
|
||||
checkpoint.oldestMulti = xlrec.end_trunc_off;
|
||||
checkpoint.oldestMultiDB = xlrec.oldest_multi_db;
|
||||
*checkpoint_modified = true;
|
||||
|
||||
// PerformMembersTruncation
|
||||
let maxsegment: i32 = mx_offset_to_member_segment(pg_constants::MAX_MULTIXACT_OFFSET);
|
||||
@@ -1001,7 +847,7 @@ fn save_multixact_truncate_record(
|
||||
}
|
||||
|
||||
fn save_relmap_page(
|
||||
timeline: &dyn TimelineWriter,
|
||||
timeline: &dyn Timeline,
|
||||
lsn: Lsn,
|
||||
xlrec: &XlRelmapUpdate,
|
||||
decoded: &DecodedWALRecord,
|
||||
|
||||
@@ -8,92 +8,35 @@ use crate::walredo::PostgresRedoManager;
|
||||
use crate::PageServerConf;
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use log::{debug, info};
|
||||
use log::info;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
use std::fs;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex, MutexGuard};
|
||||
use std::thread::JoinHandle;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
lazy_static! {
|
||||
static ref TENANTS: Mutex<HashMap<ZTenantId, Tenant>> = Mutex::new(HashMap::new());
|
||||
}
|
||||
|
||||
struct Tenant {
|
||||
state: TenantState,
|
||||
repo: Option<Arc<dyn Repository>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum TenantState {
|
||||
// This tenant only exists in cloud storage. It cannot be accessed.
|
||||
CloudOnly,
|
||||
// This tenant exists in cloud storage, and we are currently downloading it to local disk.
|
||||
// It cannot be accessed yet, not until it's been fully downloaded to local disk.
|
||||
Downloading,
|
||||
// All data for this tenant is complete on local disk, but we haven't loaded the Repository,
|
||||
// Timeline and Layer structs into memory yet, so it cannot be accessed yet.
|
||||
//Ready,
|
||||
// This tenant exists on local disk, and the layer map has been loaded into memory.
|
||||
// The local disk might have some newer files that don't exist in cloud storage yet.
|
||||
Active,
|
||||
// This tenant exists on local disk, and the layer map has been loaded into memory.
|
||||
// The local disk might have some newer files that don't exist in cloud storage yet.
|
||||
// The tenant cannot be accessed anymore for any reason, but graceful shutdown.
|
||||
//Stopping,
|
||||
}
|
||||
|
||||
impl fmt::Display for TenantState {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
TenantState::CloudOnly => f.write_str("CloudOnly"),
|
||||
TenantState::Downloading => f.write_str("Downloading"),
|
||||
TenantState::Active => f.write_str("Active"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn access_tenants() -> MutexGuard<'static, HashMap<ZTenantId, Tenant>> {
|
||||
TENANTS.lock().unwrap()
|
||||
}
|
||||
|
||||
struct TenantHandleEntry {
|
||||
checkpointer_handle: Option<JoinHandle<()>>,
|
||||
gc_handle: Option<JoinHandle<()>>,
|
||||
}
|
||||
|
||||
// Logically these handles belong to Repository,
|
||||
// but it's just simpler to store them separately
|
||||
lazy_static! {
|
||||
static ref TENANT_HANDLES: Mutex<HashMap<ZTenantId, TenantHandleEntry>> =
|
||||
static ref REPOSITORY: Mutex<HashMap<ZTenantId, Arc<dyn Repository>>> =
|
||||
Mutex::new(HashMap::new());
|
||||
}
|
||||
|
||||
static SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
|
||||
fn access_repository() -> MutexGuard<'static, HashMap<ZTenantId, Arc<dyn Repository>>> {
|
||||
REPOSITORY.lock().unwrap()
|
||||
}
|
||||
|
||||
pub fn init(conf: &'static PageServerConf) {
|
||||
let mut m = access_repository();
|
||||
for dir_entry in fs::read_dir(conf.tenants_path()).unwrap() {
|
||||
let tenantid =
|
||||
ZTenantId::from_str(dir_entry.unwrap().file_name().to_str().unwrap()).unwrap();
|
||||
|
||||
{
|
||||
let mut m = access_tenants();
|
||||
let tenant = Tenant {
|
||||
state: TenantState::CloudOnly,
|
||||
repo: None,
|
||||
};
|
||||
m.insert(tenantid, tenant);
|
||||
}
|
||||
|
||||
init_repo(conf, tenantid);
|
||||
let repo = init_repo(conf, tenantid);
|
||||
info!("initialized storage for tenant: {}", &tenantid);
|
||||
m.insert(tenantid, repo);
|
||||
}
|
||||
}
|
||||
|
||||
fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) {
|
||||
fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) -> Arc<LayeredRepository> {
|
||||
// Set up a WAL redo manager, for applying WAL records.
|
||||
let walredo_mgr = PostgresRedoManager::new(conf, tenant_id);
|
||||
|
||||
@@ -104,22 +47,9 @@ fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) {
|
||||
tenant_id,
|
||||
true,
|
||||
));
|
||||
|
||||
let checkpointer_handle = LayeredRepository::launch_checkpointer_thread(conf, repo.clone());
|
||||
let gc_handle = LayeredRepository::launch_gc_thread(conf, repo.clone());
|
||||
|
||||
let mut handles = TENANT_HANDLES.lock().unwrap();
|
||||
let h = TenantHandleEntry {
|
||||
checkpointer_handle: Some(checkpointer_handle),
|
||||
gc_handle: Some(gc_handle),
|
||||
};
|
||||
|
||||
handles.insert(tenant_id, h);
|
||||
|
||||
let mut m = access_tenants();
|
||||
let tenant = m.get_mut(&tenant_id).unwrap();
|
||||
tenant.repo = Some(repo);
|
||||
tenant.state = TenantState::Active;
|
||||
LayeredRepository::launch_checkpointer_thread(conf, repo.clone());
|
||||
LayeredRepository::launch_gc_thread(conf, repo.clone());
|
||||
repo
|
||||
}
|
||||
|
||||
// TODO kb Currently unused function, will later be used when the relish storage downloads a new layer.
|
||||
@@ -134,23 +64,15 @@ pub fn register_relish_download(
|
||||
tenant_id,
|
||||
timeline_id
|
||||
);
|
||||
|
||||
{
|
||||
let mut m = access_tenants();
|
||||
let mut tenant = m.get_mut(&tenant_id).unwrap();
|
||||
tenant.state = TenantState::Downloading;
|
||||
match &tenant.repo {
|
||||
Some(repo) => init_timeline(repo.as_ref(), timeline_id),
|
||||
None => {
|
||||
log::info!("Initialize new repo");
|
||||
}
|
||||
match access_repository().entry(tenant_id) {
|
||||
Entry::Occupied(o) => init_timeline(o.get().as_ref(), timeline_id),
|
||||
Entry::Vacant(v) => {
|
||||
log::info!("New repo initialized");
|
||||
let new_repo = init_repo(conf, tenant_id);
|
||||
init_timeline(new_repo.as_ref(), timeline_id);
|
||||
v.insert(new_repo);
|
||||
}
|
||||
}
|
||||
|
||||
// init repo updates Tenant state
|
||||
init_repo(conf, tenant_id);
|
||||
let new_repo = get_repository_for_tenant(tenant_id).unwrap();
|
||||
init_timeline(new_repo.as_ref(), timeline_id);
|
||||
}
|
||||
|
||||
fn init_timeline(repo: &dyn Repository, timeline_id: ZTimelineId) {
|
||||
@@ -160,73 +82,29 @@ fn init_timeline(repo: &dyn Repository, timeline_id: ZTimelineId) {
|
||||
}
|
||||
}
|
||||
|
||||
// Check this flag in the thread loops to know when to exit
|
||||
pub fn shutdown_requested() -> bool {
|
||||
SHUTDOWN_REQUESTED.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
pub fn stop_tenant_threads(tenantid: ZTenantId) {
|
||||
let mut handles = TENANT_HANDLES.lock().unwrap();
|
||||
if let Some(h) = handles.get_mut(&tenantid) {
|
||||
h.checkpointer_handle.take().map(JoinHandle::join);
|
||||
debug!("checkpointer for tenant {} has stopped", tenantid);
|
||||
h.gc_handle.take().map(JoinHandle::join);
|
||||
debug!("gc for tenant {} has stopped", tenantid);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn shutdown_all_tenants() -> Result<()> {
|
||||
SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
|
||||
|
||||
let tenantids = list_tenantids()?;
|
||||
for tenantid in tenantids {
|
||||
stop_tenant_threads(tenantid);
|
||||
let repo = get_repository_for_tenant(tenantid)?;
|
||||
debug!("shutdown tenant {}", tenantid);
|
||||
repo.shutdown()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn create_repository_for_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenantid: ZTenantId,
|
||||
) -> Result<()> {
|
||||
{
|
||||
let mut m = access_tenants();
|
||||
// First check that the tenant doesn't exist already
|
||||
if m.get(&tenantid).is_some() {
|
||||
bail!("tenant {} already exists", tenantid);
|
||||
}
|
||||
let tenant = Tenant {
|
||||
state: TenantState::CloudOnly,
|
||||
repo: None,
|
||||
};
|
||||
m.insert(tenantid, tenant);
|
||||
}
|
||||
let mut m = access_repository();
|
||||
|
||||
// First check that the tenant doesn't exist already
|
||||
if m.get(&tenantid).is_some() {
|
||||
bail!("tenant {} already exists", tenantid);
|
||||
}
|
||||
let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenantid));
|
||||
let repo = branches::create_repo(conf, tenantid, wal_redo_manager)?;
|
||||
|
||||
let mut m = access_tenants();
|
||||
let tenant = m.get_mut(&tenantid).unwrap();
|
||||
tenant.repo = Some(repo);
|
||||
tenant.state = TenantState::Active;
|
||||
m.insert(tenantid, repo);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_repository_for_tenant(tenantid: ZTenantId) -> Result<Arc<dyn Repository>> {
|
||||
let m = access_tenants();
|
||||
let tenant = m
|
||||
access_repository()
|
||||
.get(&tenantid)
|
||||
.ok_or_else(|| anyhow!("Tenant not found for tenant {}", tenantid));
|
||||
|
||||
match &tenant.unwrap().repo {
|
||||
Some(repo) => Ok(Arc::clone(repo)),
|
||||
None => anyhow::bail!("Repository for tenant {} is not yet valid", tenantid),
|
||||
}
|
||||
.map(Arc::clone)
|
||||
.ok_or_else(|| anyhow!("repository not found for tenant name {}", tenantid))
|
||||
}
|
||||
|
||||
pub fn get_timeline_for_tenant(
|
||||
@@ -237,13 +115,3 @@ pub fn get_timeline_for_tenant(
|
||||
.get_timeline(timelineid)
|
||||
.with_context(|| format!("cannot fetch timeline {}", timelineid))
|
||||
}
|
||||
|
||||
fn list_tenantids() -> Result<Vec<ZTenantId>> {
|
||||
let m = access_tenants();
|
||||
m.iter()
|
||||
.map(|v| {
|
||||
let (tenantid, _) = v;
|
||||
Ok(*tenantid)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -72,10 +72,6 @@ impl WalStreamDecoder {
|
||||
/// Err(WalDecodeError): an error occured while decoding, meaning the input was invalid.
|
||||
///
|
||||
pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
|
||||
let recordbuf;
|
||||
|
||||
// Run state machine that validates page headers, and reassembles records
|
||||
// that cross page boundaries.
|
||||
loop {
|
||||
// parse and verify page boundaries as we go
|
||||
if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
|
||||
@@ -124,41 +120,29 @@ impl WalStreamDecoder {
|
||||
self.lsn += self.padlen as u64;
|
||||
self.padlen = 0;
|
||||
} else if self.contlen == 0 {
|
||||
assert!(self.recordbuf.is_empty());
|
||||
|
||||
// need to have at least the xl_tot_len field
|
||||
|
||||
if self.inputbuf.remaining() < 4 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// peek xl_tot_len at the beginning of the record.
|
||||
// FIXME: assumes little-endian
|
||||
// read xl_tot_len FIXME: assumes little-endian
|
||||
self.startlsn = self.lsn;
|
||||
let xl_tot_len = (&self.inputbuf[0..4]).get_u32_le();
|
||||
let xl_tot_len = self.inputbuf.get_u32_le();
|
||||
if (xl_tot_len as usize) < XLOG_SIZE_OF_XLOG_RECORD {
|
||||
return Err(WalDecodeError {
|
||||
msg: format!("invalid xl_tot_len {}", xl_tot_len),
|
||||
lsn: self.lsn,
|
||||
});
|
||||
}
|
||||
self.lsn += 4;
|
||||
|
||||
// Fast path for the common case that the whole record fits on the page.
|
||||
let pageleft = self.lsn.remaining_in_block() as u32;
|
||||
if self.inputbuf.remaining() >= xl_tot_len as usize && xl_tot_len <= pageleft {
|
||||
// Take the record from the 'inputbuf', and validate it.
|
||||
recordbuf = self.inputbuf.copy_to_bytes(xl_tot_len as usize);
|
||||
self.lsn += xl_tot_len as u64;
|
||||
break;
|
||||
} else {
|
||||
// Need to assemble the record from pieces. Remember the size of the
|
||||
// record, and loop back. On next iteration, we will reach the 'else'
|
||||
// branch below, and copy the part of the record that was on this page
|
||||
// to 'recordbuf'. Subsequent iterations will skip page headers, and
|
||||
// append the continuations from the next pages to 'recordbuf'.
|
||||
self.recordbuf.reserve(xl_tot_len as usize);
|
||||
self.contlen = xl_tot_len;
|
||||
continue;
|
||||
}
|
||||
self.recordbuf.clear();
|
||||
self.recordbuf.reserve(xl_tot_len as usize);
|
||||
self.recordbuf.put_u32_le(xl_tot_len);
|
||||
|
||||
self.contlen = xl_tot_len - 4;
|
||||
continue;
|
||||
} else {
|
||||
// we're continuing a record, possibly from previous page.
|
||||
let pageleft = self.lsn.remaining_in_block() as u32;
|
||||
@@ -175,42 +159,47 @@ impl WalStreamDecoder {
|
||||
self.contlen -= n as u32;
|
||||
|
||||
if self.contlen == 0 {
|
||||
// The record is now complete.
|
||||
recordbuf = std::mem::replace(&mut self.recordbuf, BytesMut::new()).freeze();
|
||||
break;
|
||||
let recordbuf = std::mem::replace(&mut self.recordbuf, BytesMut::new());
|
||||
|
||||
let recordbuf = recordbuf.freeze();
|
||||
let mut buf = recordbuf.clone();
|
||||
|
||||
let xlogrec = XLogRecord::from_bytes(&mut buf);
|
||||
|
||||
// XLOG_SWITCH records are special. If we see one, we need to skip
|
||||
// to the next WAL segment.
|
||||
if xlogrec.is_xlog_switch_record() {
|
||||
trace!("saw xlog switch record at {}", self.lsn);
|
||||
self.padlen =
|
||||
self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64) as u32;
|
||||
} else {
|
||||
// Pad to an 8-byte boundary
|
||||
self.padlen = self.lsn.calc_padding(8u32) as u32;
|
||||
}
|
||||
|
||||
let mut crc = crc32c_append(0, &recordbuf[XLOG_RECORD_CRC_OFFS + 4..]);
|
||||
crc = crc32c_append(crc, &recordbuf[0..XLOG_RECORD_CRC_OFFS]);
|
||||
if crc != xlogrec.xl_crc {
|
||||
return Err(WalDecodeError {
|
||||
msg: "WAL record crc mismatch".into(),
|
||||
lsn: self.lsn,
|
||||
});
|
||||
}
|
||||
|
||||
// Always align resulting LSN on 0x8 boundary -- that is important for getPage()
|
||||
// and WalReceiver integration. Since this code is used both for WalReceiver and
|
||||
// initial WAL import let's force alignment right here.
|
||||
let result = (self.lsn.align(), recordbuf);
|
||||
return Ok(Some(result));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// check record boundaries
|
||||
|
||||
// We now have a record in the 'recordbuf' local variable.
|
||||
let xlogrec = XLogRecord::from_slice(&recordbuf[0..XLOG_SIZE_OF_XLOG_RECORD]);
|
||||
// deal with continuation records
|
||||
|
||||
let mut crc = 0;
|
||||
crc = crc32c_append(crc, &recordbuf[XLOG_RECORD_CRC_OFFS + 4..]);
|
||||
crc = crc32c_append(crc, &recordbuf[0..XLOG_RECORD_CRC_OFFS]);
|
||||
if crc != xlogrec.xl_crc {
|
||||
return Err(WalDecodeError {
|
||||
msg: "WAL record crc mismatch".into(),
|
||||
lsn: self.lsn,
|
||||
});
|
||||
}
|
||||
|
||||
// XLOG_SWITCH records are special. If we see one, we need to skip
|
||||
// to the next WAL segment.
|
||||
if xlogrec.is_xlog_switch_record() {
|
||||
trace!("saw xlog switch record at {}", self.lsn);
|
||||
self.padlen = self.lsn.calc_padding(pg_constants::WAL_SEGMENT_SIZE as u64) as u32;
|
||||
} else {
|
||||
// Pad to an 8-byte boundary
|
||||
self.padlen = self.lsn.calc_padding(8u32) as u32;
|
||||
}
|
||||
|
||||
// Always align resulting LSN on 0x8 boundary -- that is important for getPage()
|
||||
// and WalReceiver integration. Since this code is used both for WalReceiver and
|
||||
// initial WAL import let's force alignment right here.
|
||||
let result = (self.lsn.align(), recordbuf);
|
||||
Ok(Some(result))
|
||||
// deal with xlog_switch records
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ use crate::waldecoder::*;
|
||||
use crate::PageServerConf;
|
||||
use anyhow::{bail, Error, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use log::*;
|
||||
use postgres::fallible_iterator::FallibleIterator;
|
||||
use postgres::replication::ReplicationIter;
|
||||
use postgres::{Client, NoTls, SimpleQueryMessage, SimpleQueryRow};
|
||||
@@ -24,10 +25,8 @@ use std::str::FromStr;
|
||||
use std::sync::Mutex;
|
||||
use std::thread;
|
||||
use std::thread::sleep;
|
||||
use std::thread::JoinHandle;
|
||||
use std::thread_local;
|
||||
use std::time::{Duration, SystemTime};
|
||||
use tracing::*;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
use zenith_utils::zid::ZTimelineId;
|
||||
@@ -37,7 +36,6 @@ use zenith_utils::zid::ZTimelineId;
|
||||
//
|
||||
struct WalReceiverEntry {
|
||||
wal_producer_connstr: String,
|
||||
wal_receiver_handle: Option<JoinHandle<()>>,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
@@ -52,19 +50,6 @@ thread_local! {
|
||||
pub(crate) static IS_WAL_RECEIVER: Cell<bool> = Cell::new(false);
|
||||
}
|
||||
|
||||
// Wait for walreceiver to stop
|
||||
// Now it stops when pageserver shutdown is requested.
|
||||
// In future we can make this more granular and send shutdown signals
|
||||
// per tenant/timeline to cancel inactive walreceivers.
|
||||
// TODO deal with blocking pg connections
|
||||
pub fn stop_wal_receiver(timelineid: ZTimelineId) {
|
||||
let mut receivers = WAL_RECEIVERS.lock().unwrap();
|
||||
if let Some(r) = receivers.get_mut(&timelineid) {
|
||||
r.wal_receiver_handle.take();
|
||||
// r.wal_receiver_handle.take().map(JoinHandle::join);
|
||||
}
|
||||
}
|
||||
|
||||
// Launch a new WAL receiver, or tell one that's running about change in connection string
|
||||
pub fn launch_wal_receiver(
|
||||
conf: &'static PageServerConf,
|
||||
@@ -79,19 +64,19 @@ pub fn launch_wal_receiver(
|
||||
receiver.wal_producer_connstr = wal_producer_connstr.into();
|
||||
}
|
||||
None => {
|
||||
let wal_receiver_handle = thread::Builder::new()
|
||||
let receiver = WalReceiverEntry {
|
||||
wal_producer_connstr: wal_producer_connstr.into(),
|
||||
};
|
||||
receivers.insert(timelineid, receiver);
|
||||
|
||||
// Also launch a new thread to handle this connection
|
||||
let _walreceiver_thread = thread::Builder::new()
|
||||
.name("WAL receiver thread".into())
|
||||
.spawn(move || {
|
||||
IS_WAL_RECEIVER.with(|c| c.set(true));
|
||||
thread_main(conf, timelineid, tenantid);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let receiver = WalReceiverEntry {
|
||||
wal_producer_connstr: wal_producer_connstr.into(),
|
||||
wal_receiver_handle: Some(wal_receiver_handle),
|
||||
};
|
||||
receivers.insert(timelineid, receiver);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -111,14 +96,16 @@ fn get_wal_producer_connstr(timelineid: ZTimelineId) -> String {
|
||||
// This is the entry point for the WAL receiver thread.
|
||||
//
|
||||
fn thread_main(conf: &'static PageServerConf, timelineid: ZTimelineId, tenantid: ZTenantId) {
|
||||
let _enter = info_span!("WAL receiver", timeline = %timelineid, tenant = %tenantid).entered();
|
||||
info!("WAL receiver thread started");
|
||||
info!(
|
||||
"WAL receiver thread started for timeline : '{}'",
|
||||
timelineid
|
||||
);
|
||||
|
||||
//
|
||||
// Make a connection to the WAL safekeeper, or directly to the primary PostgreSQL server,
|
||||
// and start streaming WAL from it. If the connection is lost, keep retrying.
|
||||
//
|
||||
while !tenant_mgr::shutdown_requested() {
|
||||
loop {
|
||||
// Look up the current WAL producer address
|
||||
let wal_producer_connstr = get_wal_producer_connstr(timelineid);
|
||||
|
||||
@@ -132,7 +119,6 @@ fn thread_main(conf: &'static PageServerConf, timelineid: ZTimelineId, tenantid:
|
||||
sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
debug!("WAL streaming shut down");
|
||||
}
|
||||
|
||||
fn walreceiver_main(
|
||||
@@ -183,8 +169,8 @@ fn walreceiver_main(
|
||||
startpoint += startpoint.calc_padding(8u32);
|
||||
|
||||
info!(
|
||||
"last_record_lsn {} starting replication from {}, server is at {}...",
|
||||
last_rec_lsn, startpoint, end_of_wal
|
||||
"last_record_lsn {} starting replication from {} for timeline {}, server is at {}...",
|
||||
last_rec_lsn, startpoint, timelineid, end_of_wal
|
||||
);
|
||||
|
||||
let query = format!("START_REPLICATION PHYSICAL {}", startpoint);
|
||||
@@ -212,32 +198,27 @@ fn walreceiver_main(
|
||||
waldecoder.feed_bytes(data);
|
||||
|
||||
while let Some((lsn, recdata)) = waldecoder.poll_decode()? {
|
||||
let _enter = info_span!("processing record", lsn = %lsn).entered();
|
||||
// Save old checkpoint value to compare with it after decoding WAL record
|
||||
let old_checkpoint_bytes = checkpoint.encode();
|
||||
let decoded = decode_wal_record(recdata.clone());
|
||||
|
||||
// It is important to deal with the aligned records as lsn in getPage@LSN is
|
||||
// aligned and can be several bytes bigger. Without this alignment we are
|
||||
// at risk of hittind a deadlock.
|
||||
assert!(lsn.is_aligned());
|
||||
|
||||
let writer = timeline.writer();
|
||||
|
||||
let mut checkpoint_modified = false;
|
||||
|
||||
let decoded = decode_wal_record(recdata.clone());
|
||||
restore_local_repo::save_decoded_record(
|
||||
&mut checkpoint,
|
||||
&mut checkpoint_modified,
|
||||
writer.as_ref(),
|
||||
&*timeline,
|
||||
&decoded,
|
||||
recdata,
|
||||
lsn,
|
||||
)?;
|
||||
|
||||
let new_checkpoint_bytes = checkpoint.encode();
|
||||
// Check if checkpoint data was updated by save_decoded_record
|
||||
if checkpoint_modified {
|
||||
let new_checkpoint_bytes = checkpoint.encode();
|
||||
|
||||
writer.put_page_image(
|
||||
if new_checkpoint_bytes != old_checkpoint_bytes {
|
||||
timeline.put_page_image(
|
||||
RelishTag::Checkpoint,
|
||||
0,
|
||||
lsn,
|
||||
@@ -247,7 +228,7 @@ fn walreceiver_main(
|
||||
|
||||
// Now that this record has been fully handled, including updating the
|
||||
// checkpoint data, let the repository know that it is up-to-date to this LSN
|
||||
writer.advance_last_record_lsn(lsn);
|
||||
timeline.advance_last_record_lsn(lsn);
|
||||
last_rec_lsn = lsn;
|
||||
}
|
||||
|
||||
@@ -292,11 +273,6 @@ fn walreceiver_main(
|
||||
|
||||
physical_stream.standby_status_update(write_lsn, flush_lsn, apply_lsn, ts, NO_REPLY)?;
|
||||
}
|
||||
|
||||
if tenant_mgr::shutdown_requested() {
|
||||
debug!("stop walreceiver because pageserver shutdown is requested");
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -82,7 +82,7 @@ pub trait WalRedoManager: Send + Sync {
|
||||
blknum: u32,
|
||||
lsn: Lsn,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, WALRecord)>,
|
||||
records: Vec<WALRecord>,
|
||||
) -> Result<Bytes, WalRedoError>;
|
||||
}
|
||||
|
||||
@@ -99,7 +99,7 @@ impl crate::walredo::WalRedoManager for DummyRedoManager {
|
||||
_blknum: u32,
|
||||
_lsn: Lsn,
|
||||
_base_img: Option<Bytes>,
|
||||
_records: Vec<(Lsn, WALRecord)>,
|
||||
_records: Vec<WALRecord>,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
Err(WalRedoError::InvalidState)
|
||||
}
|
||||
@@ -150,7 +150,7 @@ struct WalRedoRequest {
|
||||
lsn: Lsn,
|
||||
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, WALRecord)>,
|
||||
records: Vec<WALRecord>,
|
||||
}
|
||||
|
||||
/// An error happened in WAL redo
|
||||
@@ -179,7 +179,7 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
blknum: u32,
|
||||
lsn: Lsn,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, WALRecord)>,
|
||||
records: Vec<WALRecord>,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
let start_time;
|
||||
let lock_time;
|
||||
@@ -277,7 +277,7 @@ impl PostgresRedoManager {
|
||||
page.extend_from_slice(&ZERO_PAGE);
|
||||
}
|
||||
// Apply all collected WAL records
|
||||
for (_lsn, record) in records {
|
||||
for record in records {
|
||||
let mut buf = record.rec.clone();
|
||||
|
||||
WAL_REDO_RECORD_COUNTER.inc();
|
||||
@@ -544,7 +544,7 @@ impl PostgresRedoProcess {
|
||||
&mut self,
|
||||
tag: BufferTag,
|
||||
base_img: Option<Bytes>,
|
||||
records: &[(Lsn, WALRecord)],
|
||||
records: &[WALRecord],
|
||||
) -> Result<Bytes, std::io::Error> {
|
||||
let stdout = &mut self.stdout;
|
||||
// Buffer the writes to avoid a lot of small syscalls.
|
||||
@@ -565,16 +565,22 @@ impl PostgresRedoProcess {
|
||||
stdin.write_all(&build_begin_redo_for_block_msg(tag)),
|
||||
)
|
||||
.await??;
|
||||
if let Some(img) = base_img {
|
||||
timeout(TIMEOUT, stdin.write_all(&build_push_page_msg(tag, &img))).await??;
|
||||
if base_img.is_some() {
|
||||
timeout(
|
||||
TIMEOUT,
|
||||
stdin.write_all(&build_push_page_msg(tag, base_img.unwrap())),
|
||||
)
|
||||
.await??;
|
||||
}
|
||||
|
||||
// Send WAL records.
|
||||
for (lsn, rec) in records.iter() {
|
||||
for rec in records.iter() {
|
||||
let r = rec.clone();
|
||||
|
||||
WAL_REDO_RECORD_COUNTER.inc();
|
||||
|
||||
stdin
|
||||
.write_all(&build_apply_record_msg(*lsn, &rec.rec))
|
||||
.write_all(&build_apply_record_msg(r.lsn, r.rec))
|
||||
.await?;
|
||||
|
||||
//debug!("sent WAL record to wal redo postgres process ({:X}/{:X}",
|
||||
@@ -611,41 +617,58 @@ impl PostgresRedoProcess {
|
||||
// process. See vendor/postgres/src/backend/tcop/zenith_wal_redo.c for
|
||||
// explanation of the protocol.
|
||||
|
||||
fn build_begin_redo_for_block_msg(tag: BufferTag) -> Vec<u8> {
|
||||
fn build_begin_redo_for_block_msg(tag: BufferTag) -> Bytes {
|
||||
let len = 4 + 1 + 4 * 4;
|
||||
let mut buf = Vec::with_capacity(1 + len);
|
||||
let mut buf = BytesMut::with_capacity(1 + len);
|
||||
|
||||
buf.put_u8(b'B');
|
||||
buf.put_u32(len as u32);
|
||||
|
||||
tag.ser_into(&mut buf)
|
||||
// FIXME: this is a temporary hack that should go away when we refactor
|
||||
// the postgres protocol serialization + handlers.
|
||||
//
|
||||
// BytesMut is a dynamic growable buffer, used a lot in tokio code but
|
||||
// not in the std library. To write to a BytesMut from a serde serializer,
|
||||
// we need to either:
|
||||
// - pre-allocate the required buffer space. This is annoying because we
|
||||
// shouldn't care what the exact serialized size is-- that's the
|
||||
// serializer's job.
|
||||
// - Or, we need to create a temporary "writer" (which implements the
|
||||
// `Write` trait). It's a bit awkward, because the writer consumes the
|
||||
// underlying BytesMut, and we need to extract it later with
|
||||
// `into_inner`.
|
||||
let mut writer = buf.writer();
|
||||
tag.ser_into(&mut writer)
|
||||
.expect("serialize BufferTag should always succeed");
|
||||
let buf = writer.into_inner();
|
||||
|
||||
debug_assert!(buf.len() == 1 + len);
|
||||
|
||||
buf
|
||||
buf.freeze()
|
||||
}
|
||||
|
||||
fn build_push_page_msg(tag: BufferTag, base_img: &[u8]) -> Vec<u8> {
|
||||
fn build_push_page_msg(tag: BufferTag, base_img: Bytes) -> Bytes {
|
||||
assert!(base_img.len() == 8192);
|
||||
|
||||
let len = 4 + 1 + 4 * 4 + base_img.len();
|
||||
let mut buf = Vec::with_capacity(1 + len);
|
||||
let mut buf = BytesMut::with_capacity(1 + len);
|
||||
|
||||
buf.put_u8(b'P');
|
||||
buf.put_u32(len as u32);
|
||||
tag.ser_into(&mut buf)
|
||||
let mut writer = buf.writer();
|
||||
tag.ser_into(&mut writer)
|
||||
.expect("serialize BufferTag should always succeed");
|
||||
let mut buf = writer.into_inner();
|
||||
buf.put(base_img);
|
||||
|
||||
debug_assert!(buf.len() == 1 + len);
|
||||
|
||||
buf
|
||||
buf.freeze()
|
||||
}
|
||||
|
||||
fn build_apply_record_msg(endlsn: Lsn, rec: &[u8]) -> Vec<u8> {
|
||||
fn build_apply_record_msg(endlsn: Lsn, rec: Bytes) -> Bytes {
|
||||
let len = 4 + 8 + rec.len();
|
||||
let mut buf: Vec<u8> = Vec::with_capacity(1 + len);
|
||||
let mut buf = BytesMut::with_capacity(1 + len);
|
||||
|
||||
buf.put_u8(b'A');
|
||||
buf.put_u32(len as u32);
|
||||
@@ -654,19 +677,21 @@ fn build_apply_record_msg(endlsn: Lsn, rec: &[u8]) -> Vec<u8> {
|
||||
|
||||
debug_assert!(buf.len() == 1 + len);
|
||||
|
||||
buf
|
||||
buf.freeze()
|
||||
}
|
||||
|
||||
fn build_get_page_msg(tag: BufferTag) -> Vec<u8> {
|
||||
fn build_get_page_msg(tag: BufferTag) -> Bytes {
|
||||
let len = 4 + 1 + 4 * 4;
|
||||
let mut buf = Vec::with_capacity(1 + len);
|
||||
let mut buf = BytesMut::with_capacity(1 + len);
|
||||
|
||||
buf.put_u8(b'G');
|
||||
buf.put_u32(len as u32);
|
||||
tag.ser_into(&mut buf)
|
||||
let mut writer = buf.writer();
|
||||
tag.ser_into(&mut writer)
|
||||
.expect("serialize BufferTag should always succeed");
|
||||
let buf = writer.into_inner();
|
||||
|
||||
debug_assert!(buf.len() == 1 + len);
|
||||
|
||||
buf
|
||||
buf.freeze()
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
use crate::pg_constants;
|
||||
use crate::CheckPoint;
|
||||
use crate::ControlFileData;
|
||||
use crate::FullTransactionId;
|
||||
use crate::XLogLongPageHeaderData;
|
||||
use crate::XLogPageHeaderData;
|
||||
@@ -17,8 +18,8 @@ use crate::XLOG_PAGE_MAGIC;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use bytes::BytesMut;
|
||||
use bytes::{Buf, Bytes};
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use crc32c::*;
|
||||
use log::*;
|
||||
use std::cmp::max;
|
||||
@@ -328,12 +329,7 @@ pub fn main() {
|
||||
}
|
||||
|
||||
impl XLogRecord {
|
||||
pub fn from_slice(buf: &[u8]) -> XLogRecord {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
XLogRecord::des(buf).unwrap()
|
||||
}
|
||||
|
||||
pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogRecord {
|
||||
pub fn from_bytes(buf: &mut Bytes) -> XLogRecord {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
XLogRecord::des_from(&mut buf.reader()).unwrap()
|
||||
}
|
||||
@@ -381,12 +377,10 @@ impl CheckPoint {
|
||||
Ok(CheckPoint::des(buf)?)
|
||||
}
|
||||
|
||||
/// Update next XID based on provided new_xid and stored epoch.
|
||||
/// Next XID should be greater than new_xid. This handles 32-bit
|
||||
/// XID wraparound correctly.
|
||||
///
|
||||
/// Returns 'true' if the XID was updated.
|
||||
pub fn update_next_xid(&mut self, xid: u32) -> bool {
|
||||
// Update next XID based on provided new_xid and stored epoch.
|
||||
// Next XID should be greater than new_xid.
|
||||
// Also take in account 32-bit wrap-around.
|
||||
pub fn update_next_xid(&mut self, xid: u32) {
|
||||
let xid = xid.wrapping_add(XID_CHECKPOINT_INTERVAL - 1) & !(XID_CHECKPOINT_INTERVAL - 1);
|
||||
let full_xid = self.nextXid.value;
|
||||
let new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
|
||||
@@ -397,37 +391,35 @@ impl CheckPoint {
|
||||
// wrap-around
|
||||
epoch += 1;
|
||||
}
|
||||
let nextXid = (epoch << 32) | new_xid as u64;
|
||||
|
||||
if nextXid != self.nextXid.value {
|
||||
self.nextXid = FullTransactionId { value: nextXid };
|
||||
return true;
|
||||
}
|
||||
self.nextXid = FullTransactionId {
|
||||
value: (epoch << 32) | new_xid as u64,
|
||||
};
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Generate new, empty WAL segment.
|
||||
// Generate new WAL segment with single XLOG_CHECKPOINT_SHUTDOWN record.
|
||||
// We need this segment to start compute node.
|
||||
// In order to minimize changes in Postgres core, we prefer to
|
||||
// provide WAL segment from which is can extract checkpoint record in standard way,
|
||||
// rather then implement some alternative mechanism.
|
||||
//
|
||||
pub fn generate_wal_segment(segno: u64, system_id: u64) -> Bytes {
|
||||
pub fn generate_wal_segment(pg_control: &ControlFileData) -> Bytes {
|
||||
let mut seg_buf = BytesMut::with_capacity(pg_constants::WAL_SEGMENT_SIZE as usize);
|
||||
|
||||
let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, pg_constants::WAL_SEGMENT_SIZE);
|
||||
let hdr = XLogLongPageHeaderData {
|
||||
std: {
|
||||
XLogPageHeaderData {
|
||||
xlp_magic: XLOG_PAGE_MAGIC as u16,
|
||||
xlp_info: pg_constants::XLP_LONG_HEADER,
|
||||
xlp_tli: 1, // FIXME: always use Postgres timeline 1
|
||||
xlp_pageaddr: pageaddr,
|
||||
xlp_pageaddr: pg_control.checkPoint - XLOG_SIZE_OF_XLOG_LONG_PHD as u64,
|
||||
xlp_rem_len: 0,
|
||||
..Default::default() // Put 0 in padding fields.
|
||||
}
|
||||
},
|
||||
xlp_sysid: system_id,
|
||||
xlp_sysid: pg_control.system_identifier,
|
||||
xlp_seg_size: pg_constants::WAL_SEGMENT_SIZE as u32,
|
||||
xlp_xlog_blcksz: XLOG_BLCKSZ as u32,
|
||||
};
|
||||
@@ -435,6 +427,36 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Bytes {
|
||||
let hdr_bytes = hdr.encode();
|
||||
seg_buf.extend_from_slice(&hdr_bytes);
|
||||
|
||||
let rec_hdr = XLogRecord {
|
||||
xl_tot_len: (XLOG_SIZE_OF_XLOG_RECORD
|
||||
+ SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT
|
||||
+ SIZEOF_CHECKPOINT) as u32,
|
||||
xl_xid: 0, //0 is for InvalidTransactionId
|
||||
xl_prev: 0,
|
||||
xl_info: pg_constants::XLOG_CHECKPOINT_SHUTDOWN,
|
||||
xl_rmid: pg_constants::RM_XLOG_ID,
|
||||
xl_crc: 0,
|
||||
..Default::default() // Put 0 in padding fields.
|
||||
};
|
||||
|
||||
let mut rec_shord_hdr_bytes = BytesMut::new();
|
||||
rec_shord_hdr_bytes.put_u8(pg_constants::XLR_BLOCK_ID_DATA_SHORT);
|
||||
rec_shord_hdr_bytes.put_u8(SIZEOF_CHECKPOINT as u8);
|
||||
|
||||
let rec_bytes = rec_hdr.encode();
|
||||
let checkpoint_bytes = pg_control.checkPointCopy.encode();
|
||||
|
||||
//calculate record checksum
|
||||
let mut crc = 0;
|
||||
crc = crc32c_append(crc, &rec_shord_hdr_bytes[..]);
|
||||
crc = crc32c_append(crc, &checkpoint_bytes[..]);
|
||||
crc = crc32c_append(crc, &rec_bytes[0..XLOG_RECORD_CRC_OFFS]);
|
||||
|
||||
seg_buf.extend_from_slice(&rec_bytes[0..XLOG_RECORD_CRC_OFFS]);
|
||||
seg_buf.put_u32_le(crc);
|
||||
seg_buf.extend_from_slice(&rec_shord_hdr_bytes);
|
||||
seg_buf.extend_from_slice(&checkpoint_bytes);
|
||||
|
||||
//zero out the rest of the file
|
||||
seg_buf.resize(pg_constants::WAL_SEGMENT_SIZE, 0);
|
||||
seg_buf.freeze()
|
||||
|
||||
@@ -12,7 +12,7 @@ pub struct DatabaseInfo {
|
||||
pub port: u16,
|
||||
pub dbname: String,
|
||||
pub user: String,
|
||||
pub password: Option<String>,
|
||||
pub password: String,
|
||||
}
|
||||
|
||||
impl DatabaseInfo {
|
||||
@@ -24,23 +24,12 @@ impl DatabaseInfo {
|
||||
.next()
|
||||
.ok_or_else(|| anyhow::Error::msg("cannot resolve at least one SocketAddr"))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DatabaseInfo> for tokio_postgres::Config {
|
||||
fn from(db_info: DatabaseInfo) -> Self {
|
||||
let mut config = tokio_postgres::Config::new();
|
||||
|
||||
config
|
||||
.host(&db_info.host)
|
||||
.port(db_info.port)
|
||||
.dbname(&db_info.dbname)
|
||||
.user(&db_info.user);
|
||||
|
||||
if let Some(password) = db_info.password {
|
||||
config.password(password);
|
||||
}
|
||||
|
||||
config
|
||||
pub fn conn_string(&self) -> String {
|
||||
format!(
|
||||
"dbname={} user={} password={}",
|
||||
self.dbname, self.user, self.password
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -145,18 +145,18 @@ fn main() -> anyhow::Result<()> {
|
||||
println!("Starting mgmt on {}", state.conf.mgmt_address);
|
||||
let mgmt_listener = TcpListener::bind(state.conf.mgmt_address)?;
|
||||
|
||||
let threads = [
|
||||
let threads = vec![
|
||||
// Spawn a thread to listen for connections. It will spawn further threads
|
||||
// for each connection.
|
||||
thread::Builder::new()
|
||||
.name("Listener thread".into())
|
||||
.name("Proxy thread".into())
|
||||
.spawn(move || proxy::thread_main(state, pageserver_listener))?,
|
||||
thread::Builder::new()
|
||||
.name("Mgmt thread".into())
|
||||
.spawn(move || mgmt::thread_main(state, mgmt_listener))?,
|
||||
];
|
||||
|
||||
for t in threads {
|
||||
for t in threads.into_iter() {
|
||||
t.join().unwrap()?;
|
||||
}
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ pub fn thread_main(state: &'static ProxyState, listener: TcpListener) -> anyhow:
|
||||
|
||||
pub fn mgmt_conn_main(state: &'static ProxyState, socket: TcpStream) -> anyhow::Result<()> {
|
||||
let mut conn_handler = MgmtHandler { state };
|
||||
let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None, true)?;
|
||||
let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None)?;
|
||||
pgbackend.run(&mut conn_handler)
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ use anyhow::bail;
|
||||
use tokio_postgres::NoTls;
|
||||
|
||||
use rand::Rng;
|
||||
use std::io::Write;
|
||||
use std::{io, sync::mpsc::channel, thread};
|
||||
use zenith_utils::postgres_backend::Stream;
|
||||
use zenith_utils::postgres_backend::{PostgresBackend, ProtoState};
|
||||
@@ -27,13 +28,11 @@ pub fn thread_main(
|
||||
println!("accepted connection from {}", peer_addr);
|
||||
socket.set_nodelay(true).unwrap();
|
||||
|
||||
thread::Builder::new()
|
||||
.name("Proxy thread".into())
|
||||
.spawn(move || {
|
||||
if let Err(err) = proxy_conn_main(state, socket) {
|
||||
println!("error: {}", err);
|
||||
}
|
||||
})?;
|
||||
thread::spawn(move || {
|
||||
if let Err(err) = proxy_conn_main(state, socket) {
|
||||
println!("error: {}", err);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,7 +64,6 @@ pub fn proxy_conn_main(
|
||||
socket,
|
||||
postgres_backend::AuthType::MD5,
|
||||
state.conf.ssl_config.clone(),
|
||||
false,
|
||||
)?,
|
||||
md5_salt: [0u8; 4],
|
||||
psql_session_id: "".into(),
|
||||
@@ -159,7 +157,6 @@ impl ProxyConnection {
|
||||
fn handle_existing_user(&mut self) -> anyhow::Result<DatabaseInfo> {
|
||||
// ask password
|
||||
rand::thread_rng().fill(&mut self.md5_salt);
|
||||
|
||||
self.pgb
|
||||
.write_message(&BeMessage::AuthenticationMD5Password(&self.md5_salt))?;
|
||||
self.pgb.state = ProtoState::Authentication; // XXX
|
||||
@@ -252,68 +249,51 @@ databases without opening the browser.
|
||||
/// Create a TCP connection to a postgres database, authenticate with it, and receive the ReadyForQuery message
|
||||
async fn connect_to_db(db_info: DatabaseInfo) -> anyhow::Result<tokio::net::TcpStream> {
|
||||
let mut socket = tokio::net::TcpStream::connect(db_info.socket_addr()?).await?;
|
||||
let config = tokio_postgres::Config::from(db_info);
|
||||
let config = db_info.conn_string().parse::<tokio_postgres::Config>()?;
|
||||
let _ = config.connect_raw(&mut socket, NoTls).await?;
|
||||
Ok(socket)
|
||||
}
|
||||
|
||||
/// Concurrently proxy both directions of the client and server connections
|
||||
fn proxy(
|
||||
(client_read, client_write): (ReadStream, WriteStream),
|
||||
(server_read, server_write): (ReadStream, WriteStream),
|
||||
client_read: ReadStream,
|
||||
client_write: WriteStream,
|
||||
server_read: ReadStream,
|
||||
server_write: WriteStream,
|
||||
) -> anyhow::Result<()> {
|
||||
fn do_proxy(mut reader: impl io::Read, mut writer: WriteStream) -> io::Result<u64> {
|
||||
/// FlushWriter will make sure that every message is sent as soon as possible
|
||||
struct FlushWriter<W>(W);
|
||||
|
||||
impl<W: io::Write> io::Write for FlushWriter<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
// `std::io::copy` is guaranteed to exit if we return an error,
|
||||
// so we can afford to lose `res` in case `flush` fails
|
||||
let res = self.0.write(buf);
|
||||
if res.is_ok() {
|
||||
self.0.flush()?;
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
self.0.flush()
|
||||
}
|
||||
}
|
||||
|
||||
let res = std::io::copy(&mut reader, &mut FlushWriter(&mut writer));
|
||||
writer.shutdown(std::net::Shutdown::Both)?;
|
||||
res
|
||||
fn do_proxy(mut reader: ReadStream, mut writer: WriteStream) -> io::Result<()> {
|
||||
std::io::copy(&mut reader, &mut writer)?;
|
||||
writer.flush()?;
|
||||
writer.shutdown(std::net::Shutdown::Both)
|
||||
}
|
||||
|
||||
let client_to_server_jh = thread::spawn(move || do_proxy(client_read, server_write));
|
||||
|
||||
do_proxy(server_read, client_write)?;
|
||||
client_to_server_jh.join().unwrap()?;
|
||||
let res1 = do_proxy(server_read, client_write);
|
||||
let res2 = client_to_server_jh.join().unwrap();
|
||||
res1?;
|
||||
res2?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Proxy a client connection to a postgres database
|
||||
fn proxy_pass(pgb: PostgresBackend, db_info: DatabaseInfo) -> anyhow::Result<()> {
|
||||
let db_stream = {
|
||||
// We'll get rid of this once migration to async is complete
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()?;
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()?;
|
||||
let db_stream = runtime.block_on(connect_to_db(db_info))?;
|
||||
let db_stream = db_stream.into_std()?;
|
||||
db_stream.set_nonblocking(false)?;
|
||||
|
||||
let stream = runtime.block_on(connect_to_db(db_info))?.into_std()?;
|
||||
stream.set_nonblocking(false)?;
|
||||
stream
|
||||
};
|
||||
let db_stream = zenith_utils::sock_split::BidiStream::from_tcp(db_stream);
|
||||
let (db_read, db_write) = db_stream.split();
|
||||
|
||||
let db = zenith_utils::sock_split::BidiStream::from_tcp(db_stream);
|
||||
|
||||
let client = match pgb.into_stream() {
|
||||
let stream = match pgb.into_stream() {
|
||||
Stream::Bidirectional(bidi_stream) => bidi_stream,
|
||||
_ => bail!("invalid stream"),
|
||||
};
|
||||
|
||||
proxy(client.split(), db.split())
|
||||
let (client_read, client_write) = stream.split();
|
||||
proxy(client_read, client_write, db_read, db_write)
|
||||
}
|
||||
|
||||
@@ -11,14 +11,11 @@ pyjwt = {extras = ["crypto"], version = "*"}
|
||||
requests = "*"
|
||||
pytest-xdist = "*"
|
||||
asyncpg = "*"
|
||||
cached-property = "*"
|
||||
|
||||
[dev-packages]
|
||||
yapf = "*"
|
||||
flake8 = "*"
|
||||
mypy = "*"
|
||||
# Behavior may change slightly between versions. These are run continuously,
|
||||
# so we pin exact versions to avoid suprising breaks. Update if comfortable.
|
||||
yapf = "==0.31.0"
|
||||
|
||||
[requires]
|
||||
# we need at least 3.6, but pipenv doesn't allow to say this directly
|
||||
|
||||
198
test_runner/Pipfile.lock
generated
198
test_runner/Pipfile.lock
generated
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "3645ae8d2dcf55bd2a54963c44cfeedf577f3b289d1077365214a80a7f36e643"
|
||||
"sha256": "3cdc048691824d0b93912b6b78a0aa01dc98f278212c1badb0cc2edbd2103c3a"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
@@ -43,108 +43,94 @@
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==21.2.0"
|
||||
},
|
||||
"cached-property": {
|
||||
"hashes": [
|
||||
"sha256:9fa5755838eecbb2d234c3aa390bd80fbd3ac6b6869109bfc1b499f7bd89a130",
|
||||
"sha256:df4f613cf7ad9a588cc381aaf4a512d26265ecebd5eb9e1ba12f1319eb85a6a0"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.5.2"
|
||||
},
|
||||
"certifi": {
|
||||
"hashes": [
|
||||
"sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872",
|
||||
"sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"
|
||||
"sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee",
|
||||
"sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8"
|
||||
],
|
||||
"version": "==2021.10.8"
|
||||
"version": "==2021.5.30"
|
||||
},
|
||||
"cffi": {
|
||||
"hashes": [
|
||||
"sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3",
|
||||
"sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2",
|
||||
"sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636",
|
||||
"sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20",
|
||||
"sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728",
|
||||
"sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27",
|
||||
"sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66",
|
||||
"sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443",
|
||||
"sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0",
|
||||
"sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7",
|
||||
"sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39",
|
||||
"sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605",
|
||||
"sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a",
|
||||
"sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37",
|
||||
"sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029",
|
||||
"sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139",
|
||||
"sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc",
|
||||
"sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df",
|
||||
"sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14",
|
||||
"sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880",
|
||||
"sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2",
|
||||
"sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a",
|
||||
"sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e",
|
||||
"sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474",
|
||||
"sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024",
|
||||
"sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8",
|
||||
"sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0",
|
||||
"sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e",
|
||||
"sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a",
|
||||
"sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e",
|
||||
"sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032",
|
||||
"sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6",
|
||||
"sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e",
|
||||
"sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b",
|
||||
"sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e",
|
||||
"sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954",
|
||||
"sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962",
|
||||
"sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c",
|
||||
"sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4",
|
||||
"sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55",
|
||||
"sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962",
|
||||
"sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023",
|
||||
"sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c",
|
||||
"sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6",
|
||||
"sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8",
|
||||
"sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382",
|
||||
"sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7",
|
||||
"sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc",
|
||||
"sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997",
|
||||
"sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796"
|
||||
"sha256:06c54a68935738d206570b20da5ef2b6b6d92b38ef3ec45c5422c0ebaf338d4d",
|
||||
"sha256:0c0591bee64e438883b0c92a7bed78f6290d40bf02e54c5bf0978eaf36061771",
|
||||
"sha256:19ca0dbdeda3b2615421d54bef8985f72af6e0c47082a8d26122adac81a95872",
|
||||
"sha256:22b9c3c320171c108e903d61a3723b51e37aaa8c81255b5e7ce102775bd01e2c",
|
||||
"sha256:26bb2549b72708c833f5abe62b756176022a7b9a7f689b571e74c8478ead51dc",
|
||||
"sha256:33791e8a2dc2953f28b8d8d300dde42dd929ac28f974c4b4c6272cb2955cb762",
|
||||
"sha256:3c8d896becff2fa653dc4438b54a5a25a971d1f4110b32bd3068db3722c80202",
|
||||
"sha256:4373612d59c404baeb7cbd788a18b2b2a8331abcc84c3ba40051fcd18b17a4d5",
|
||||
"sha256:487d63e1454627c8e47dd230025780e91869cfba4c753a74fda196a1f6ad6548",
|
||||
"sha256:48916e459c54c4a70e52745639f1db524542140433599e13911b2f329834276a",
|
||||
"sha256:4922cd707b25e623b902c86188aca466d3620892db76c0bdd7b99a3d5e61d35f",
|
||||
"sha256:55af55e32ae468e9946f741a5d51f9896da6b9bf0bbdd326843fec05c730eb20",
|
||||
"sha256:57e555a9feb4a8460415f1aac331a2dc833b1115284f7ded7278b54afc5bd218",
|
||||
"sha256:5d4b68e216fc65e9fe4f524c177b54964af043dde734807586cf5435af84045c",
|
||||
"sha256:64fda793737bc4037521d4899be780534b9aea552eb673b9833b01f945904c2e",
|
||||
"sha256:6d6169cb3c6c2ad50db5b868db6491a790300ade1ed5d1da29289d73bbe40b56",
|
||||
"sha256:7bcac9a2b4fdbed2c16fa5681356d7121ecabf041f18d97ed5b8e0dd38a80224",
|
||||
"sha256:80b06212075346b5546b0417b9f2bf467fea3bfe7352f781ffc05a8ab24ba14a",
|
||||
"sha256:818014c754cd3dba7229c0f5884396264d51ffb87ec86e927ef0be140bfdb0d2",
|
||||
"sha256:8eb687582ed7cd8c4bdbff3df6c0da443eb89c3c72e6e5dcdd9c81729712791a",
|
||||
"sha256:99f27fefe34c37ba9875f224a8f36e31d744d8083e00f520f133cab79ad5e819",
|
||||
"sha256:9f3e33c28cd39d1b655ed1ba7247133b6f7fc16fa16887b120c0c670e35ce346",
|
||||
"sha256:a8661b2ce9694ca01c529bfa204dbb144b275a31685a075ce123f12331be790b",
|
||||
"sha256:a9da7010cec5a12193d1af9872a00888f396aba3dc79186604a09ea3ee7c029e",
|
||||
"sha256:aedb15f0a5a5949ecb129a82b72b19df97bbbca024081ed2ef88bd5c0a610534",
|
||||
"sha256:b315d709717a99f4b27b59b021e6207c64620790ca3e0bde636a6c7f14618abb",
|
||||
"sha256:ba6f2b3f452e150945d58f4badd92310449876c4c954836cfb1803bdd7b422f0",
|
||||
"sha256:c33d18eb6e6bc36f09d793c0dc58b0211fccc6ae5149b808da4a62660678b156",
|
||||
"sha256:c9a875ce9d7fe32887784274dd533c57909b7b1dcadcc128a2ac21331a9765dd",
|
||||
"sha256:c9e005e9bd57bc987764c32a1bee4364c44fdc11a3cc20a40b93b444984f2b87",
|
||||
"sha256:d2ad4d668a5c0645d281dcd17aff2be3212bc109b33814bbb15c4939f44181cc",
|
||||
"sha256:d950695ae4381ecd856bcaf2b1e866720e4ab9a1498cba61c602e56630ca7195",
|
||||
"sha256:e22dcb48709fc51a7b58a927391b23ab37eb3737a98ac4338e2448bef8559b33",
|
||||
"sha256:e8c6a99be100371dbb046880e7a282152aa5d6127ae01783e37662ef73850d8f",
|
||||
"sha256:e9dc245e3ac69c92ee4c167fbdd7428ec1956d4e754223124991ef29eb57a09d",
|
||||
"sha256:eb687a11f0a7a1839719edd80f41e459cc5366857ecbed383ff376c4e3cc6afd",
|
||||
"sha256:eb9e2a346c5238a30a746893f23a9535e700f8192a68c07c0258e7ece6ff3728",
|
||||
"sha256:ed38b924ce794e505647f7c331b22a693bee1538fdf46b0222c4717b42f744e7",
|
||||
"sha256:f0010c6f9d1a4011e429109fda55a225921e3206e7f62a0c22a35344bfd13cca",
|
||||
"sha256:f0c5d1acbfca6ebdd6b1e3eded8d261affb6ddcf2186205518f1428b8569bb99",
|
||||
"sha256:f10afb1004f102c7868ebfe91c28f4a712227fe4cb24974350ace1f90e1febbf",
|
||||
"sha256:f174135f5609428cc6e1b9090f9268f5c8935fddb1b25ccb8255a2d50de6789e",
|
||||
"sha256:f3ebe6e73c319340830a9b2825d32eb6d8475c1dac020b4f0aa774ee3b898d1c",
|
||||
"sha256:f627688813d0a4140153ff532537fbe4afea5a3dffce1f9deb7f91f848a832b5",
|
||||
"sha256:fd4305f86f53dfd8cd3522269ed7fc34856a8ee3709a5e28b2836b2db9d4cd69"
|
||||
],
|
||||
"version": "==1.15.0"
|
||||
"version": "==1.14.6"
|
||||
},
|
||||
"charset-normalizer": {
|
||||
"hashes": [
|
||||
"sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0",
|
||||
"sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b"
|
||||
"sha256:5d209c0a931f215cee683b6445e2d77677e7e75e159f78def0db09d68fafcaa6",
|
||||
"sha256:5ec46d183433dcbd0ab716f2d7f29d8dee50505b3fdb40c6b985c7c4f5a3591f"
|
||||
],
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==2.0.7"
|
||||
"version": "==2.0.6"
|
||||
},
|
||||
"cryptography": {
|
||||
"hashes": [
|
||||
"sha256:07bb7fbfb5de0980590ddfc7f13081520def06dc9ed214000ad4372fb4e3c7f6",
|
||||
"sha256:18d90f4711bf63e2fb21e8c8e51ed8189438e6b35a6d996201ebd98a26abbbe6",
|
||||
"sha256:1ed82abf16df40a60942a8c211251ae72858b25b7421ce2497c2eb7a1cee817c",
|
||||
"sha256:22a38e96118a4ce3b97509443feace1d1011d0571fae81fc3ad35f25ba3ea999",
|
||||
"sha256:2d69645f535f4b2c722cfb07a8eab916265545b3475fdb34e0be2f4ee8b0b15e",
|
||||
"sha256:4a2d0e0acc20ede0f06ef7aa58546eee96d2592c00f450c9acb89c5879b61992",
|
||||
"sha256:54b2605e5475944e2213258e0ab8696f4f357a31371e538ef21e8d61c843c28d",
|
||||
"sha256:7075b304cd567694dc692ffc9747f3e9cb393cc4aa4fb7b9f3abd6f5c4e43588",
|
||||
"sha256:7b7ceeff114c31f285528ba8b390d3e9cfa2da17b56f11d366769a807f17cbaa",
|
||||
"sha256:7eba2cebca600a7806b893cb1d541a6e910afa87e97acf2021a22b32da1df52d",
|
||||
"sha256:928185a6d1ccdb816e883f56ebe92e975a262d31cc536429041921f8cb5a62fd",
|
||||
"sha256:9933f28f70d0517686bd7de36166dda42094eac49415459d9bdf5e7df3e0086d",
|
||||
"sha256:a688ebcd08250eab5bb5bca318cc05a8c66de5e4171a65ca51db6bd753ff8953",
|
||||
"sha256:abb5a361d2585bb95012a19ed9b2c8f412c5d723a9836418fab7aaa0243e67d2",
|
||||
"sha256:c10c797ac89c746e488d2ee92bd4abd593615694ee17b2500578b63cad6b93a8",
|
||||
"sha256:ced40344e811d6abba00295ced98c01aecf0c2de39481792d87af4fa58b7b4d6",
|
||||
"sha256:d57e0cdc1b44b6cdf8af1d01807db06886f10177469312fbde8f44ccbb284bc9",
|
||||
"sha256:d99915d6ab265c22873f1b4d6ea5ef462ef797b4140be4c9d8b179915e0985c6",
|
||||
"sha256:eb80e8a1f91e4b7ef8b33041591e6d89b2b8e122d787e87eeb2b08da71bb16ad",
|
||||
"sha256:ebeddd119f526bcf323a89f853afb12e225902a24d29b55fe18dd6fcb2838a76"
|
||||
"sha256:0a7dcbcd3f1913f664aca35d47c1331fce738d44ec34b7be8b9d332151b0b01e",
|
||||
"sha256:1eb7bb0df6f6f583dd8e054689def236255161ebbcf62b226454ab9ec663746b",
|
||||
"sha256:21ca464b3a4b8d8e86ba0ee5045e103a1fcfac3b39319727bc0fc58c09c6aff7",
|
||||
"sha256:34dae04a0dce5730d8eb7894eab617d8a70d0c97da76b905de9efb7128ad7085",
|
||||
"sha256:3520667fda779eb788ea00080124875be18f2d8f0848ec00733c0ec3bb8219fc",
|
||||
"sha256:3c4129fc3fdc0fa8e40861b5ac0c673315b3c902bbdc05fc176764815b43dd1d",
|
||||
"sha256:3fa3a7ccf96e826affdf1a0a9432be74dc73423125c8f96a909e3835a5ef194a",
|
||||
"sha256:5b0fbfae7ff7febdb74b574055c7466da334a5371f253732d7e2e7525d570498",
|
||||
"sha256:695104a9223a7239d155d7627ad912953b540929ef97ae0c34c7b8bf30857e89",
|
||||
"sha256:8695456444f277af73a4877db9fc979849cd3ee74c198d04fc0776ebc3db52b9",
|
||||
"sha256:94cc5ed4ceaefcbe5bf38c8fba6a21fc1d365bb8fb826ea1688e3370b2e24a1c",
|
||||
"sha256:94fff993ee9bc1b2440d3b7243d488c6a3d9724cc2b09cdb297f6a886d040ef7",
|
||||
"sha256:9965c46c674ba8cc572bc09a03f4c649292ee73e1b683adb1ce81e82e9a6a0fb",
|
||||
"sha256:a00cf305f07b26c351d8d4e1af84ad7501eca8a342dedf24a7acb0e7b7406e14",
|
||||
"sha256:a305600e7a6b7b855cd798e00278161b681ad6e9b7eca94c721d5f588ab212af",
|
||||
"sha256:cd65b60cfe004790c795cc35f272e41a3df4631e2fb6b35aa7ac6ef2859d554e",
|
||||
"sha256:d2a6e5ef66503da51d2110edf6c403dc6b494cc0082f85db12f54e9c5d4c3ec5",
|
||||
"sha256:d9ec0e67a14f9d1d48dd87a2531009a9b251c02ea42851c060b25c782516ff06",
|
||||
"sha256:f44d141b8c4ea5eb4dbc9b3ad992d45580c1d22bf5e24363f2fbf50c2d7ae8a7"
|
||||
],
|
||||
"version": "==35.0.0"
|
||||
"version": "==3.4.8"
|
||||
},
|
||||
"execnet": {
|
||||
"hashes": [
|
||||
@@ -156,11 +142,11 @@
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff",
|
||||
"sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
|
||||
"sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
|
||||
"sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
|
||||
],
|
||||
"markers": "python_version >= '3'",
|
||||
"version": "==3.3"
|
||||
"version": "==3.2"
|
||||
},
|
||||
"iniconfig": {
|
||||
"hashes": [
|
||||
@@ -221,11 +207,11 @@
|
||||
"crypto"
|
||||
],
|
||||
"hashes": [
|
||||
"sha256:b888b4d56f06f6dcd777210c334e69c737be74755d3e5e9ee3fe67dc18a0ee41",
|
||||
"sha256:e0c4bb8d9f0af0c7f5b1ec4c5036309617d03d56932877f2f7a0beeb5318322f"
|
||||
"sha256:934d73fbba91b0483d3857d1aff50e96b2a892384ee2c17417ed3203f173fca1",
|
||||
"sha256:fba44e7898bbca160a2b2b501f492824fc8382485d3a6f11ba5d0c1937ce6130"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==2.3.0"
|
||||
"version": "==2.1.0"
|
||||
},
|
||||
"pyparsing": {
|
||||
"hashes": [
|
||||
@@ -286,21 +272,21 @@
|
||||
},
|
||||
"urllib3": {
|
||||
"hashes": [
|
||||
"sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece",
|
||||
"sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844"
|
||||
"sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
|
||||
"sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
|
||||
"version": "==1.26.7"
|
||||
"version": "==1.26.6"
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
"flake8": {
|
||||
"hashes": [
|
||||
"sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d",
|
||||
"sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"
|
||||
"sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b",
|
||||
"sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==4.0.1"
|
||||
"version": "==3.9.2"
|
||||
},
|
||||
"mccabe": {
|
||||
"hashes": [
|
||||
@@ -347,19 +333,19 @@
|
||||
},
|
||||
"pycodestyle": {
|
||||
"hashes": [
|
||||
"sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20",
|
||||
"sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"
|
||||
"sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068",
|
||||
"sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
|
||||
"version": "==2.8.0"
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==2.7.0"
|
||||
},
|
||||
"pyflakes": {
|
||||
"hashes": [
|
||||
"sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c",
|
||||
"sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"
|
||||
"sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3",
|
||||
"sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
|
||||
"version": "==2.4.0"
|
||||
"version": "==2.3.1"
|
||||
},
|
||||
"toml": {
|
||||
"hashes": [
|
||||
|
||||
@@ -53,8 +53,8 @@ Useful environment variables:
|
||||
should go.
|
||||
`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
|
||||
|
||||
Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them:
|
||||
`pytest -s --log-cli-level=INFO ...`
|
||||
Let stdout and stderr go to the terminal instead of capturing them:
|
||||
`pytest -s ...`
|
||||
(Note many tests capture subprocess outputs separately, so this may not
|
||||
show much.)
|
||||
|
||||
@@ -95,13 +95,11 @@ Python destructors, e.g. `__del__()` aren't recommended for cleanup.
|
||||
|
||||
### Code quality
|
||||
|
||||
We force code formatting via yapf:
|
||||
|
||||
1. Install `yapf` and other tools (`flake8`, `mypy`) with `pipenv install --dev`.
|
||||
1. Reformat all your code by running `pipenv run yapf -ri .` in the `test_runner/` directory.
|
||||
|
||||
Before submitting a patch, please consider:
|
||||
|
||||
* Writing a couple of docstrings to clarify the reasoning behind a new test.
|
||||
* Running `flake8` (or a linter of your choice, e.g. `pycodestyle`) and fixing possible defects, if any.
|
||||
* Formatting the code with `yapf -r -i .` (TODO: implement an opt-in pre-commit hook for that).
|
||||
* (Optional) Typechecking the code with `mypy .`. Currently this mostly affects `fixtures/zenith_fixtures.py`.
|
||||
|
||||
The tools can be installed with `pipenv install --dev`.
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
from contextlib import closing
|
||||
from typing import Iterator
|
||||
from uuid import uuid4
|
||||
@@ -5,6 +6,7 @@ import psycopg2
|
||||
from fixtures.zenith_fixtures import PortDistributor, Postgres, ZenithCli, ZenithPageserver, PgBin
|
||||
import pytest
|
||||
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
@@ -33,9 +35,7 @@ def test_pageserver_auth(pageserver_auth_enabled: ZenithPageserver):
|
||||
ps.safe_psql(f"tenant_create {uuid4().hex}", password=management_token)
|
||||
|
||||
# fail to create tenant using tenant token
|
||||
with pytest.raises(
|
||||
psycopg2.DatabaseError,
|
||||
match='Attempt to access management api with tenant scope. Permission denied'):
|
||||
with pytest.raises(psycopg2.DatabaseError, match='Attempt to access management api with tenant scope. Permission denied'):
|
||||
ps.safe_psql(f"tenant_create {uuid4().hex}", password=tenant_token)
|
||||
|
||||
|
||||
@@ -60,14 +60,14 @@ def test_compute_auth_to_pageserver(
|
||||
wa_factory.start_n_new(3, management_token)
|
||||
|
||||
with Postgres(
|
||||
zenith_cli=zenith_cli,
|
||||
repo_dir=repo_dir,
|
||||
pg_bin=pg_bin,
|
||||
tenant_id=ps.initial_tenant,
|
||||
port=port_distributor.get_port(),
|
||||
zenith_cli=zenith_cli,
|
||||
repo_dir=repo_dir,
|
||||
pg_bin=pg_bin,
|
||||
tenant_id=ps.initial_tenant,
|
||||
port=port_distributor.get_port(),
|
||||
).create_start(
|
||||
branch,
|
||||
wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
|
||||
branch,
|
||||
wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
|
||||
) as pg:
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import subprocess
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
@@ -13,7 +13,7 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
zenith_cli.run(["branch", "test_branch_behind", "empty"])
|
||||
|
||||
pgmain = postgres.create_start('test_branch_behind')
|
||||
log.info("postgres is running on 'test_branch_behind' branch")
|
||||
print("postgres is running on 'test_branch_behind' branch")
|
||||
|
||||
main_pg_conn = pgmain.connect()
|
||||
main_cur = main_pg_conn.cursor()
|
||||
@@ -27,7 +27,7 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
''')
|
||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_a = main_cur.fetchone()[0]
|
||||
log.info(f'LSN after 100 rows: {lsn_a}')
|
||||
print('LSN after 100 rows: ' + lsn_a)
|
||||
|
||||
# Insert some more rows. (This generates enough WAL to fill a few segments.)
|
||||
main_cur.execute('''
|
||||
@@ -37,7 +37,7 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
''')
|
||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_b = main_cur.fetchone()[0]
|
||||
log.info(f'LSN after 200100 rows: {lsn_b}')
|
||||
print('LSN after 200100 rows: ' + lsn_b)
|
||||
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
zenith_cli.run(["branch", "test_branch_behind_hundred", "test_branch_behind@" + lsn_a])
|
||||
@@ -52,7 +52,7 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
|
||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_c = main_cur.fetchone()[0]
|
||||
log.info(f'LSN after 400100 rows: {lsn_c}')
|
||||
print('LSN after 400100 rows: ' + lsn_c)
|
||||
|
||||
# Branch at the point where only 200100 rows were inserted
|
||||
zenith_cli.run(["branch", "test_branch_behind_more", "test_branch_behind@" + lsn_b])
|
||||
@@ -86,10 +86,7 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
assert cur.fetchone() == (1, )
|
||||
|
||||
# branch at pre-initdb lsn
|
||||
#
|
||||
# FIXME: This works currently, but probably shouldn't be allowed
|
||||
try:
|
||||
zenith_cli.run(["branch", "test_branch_preinitdb", "test_branch_behind@0/42"])
|
||||
# FIXME: assert false, "branch with invalid LSN should have failed"
|
||||
except subprocess.CalledProcessError:
|
||||
log.info("Branch creation with pre-initdb LSN failed (as expected)")
|
||||
print("Branch creation with pre-initdb LSN failed (as expected)")
|
||||
|
||||
@@ -4,7 +4,6 @@ import os
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
@@ -18,17 +17,14 @@ def test_clog_truncate(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
|
||||
# set agressive autovacuum to make sure that truncation will happen
|
||||
config = [
|
||||
'autovacuum_max_workers=10',
|
||||
'autovacuum_vacuum_threshold=0',
|
||||
'autovacuum_vacuum_insert_threshold=0',
|
||||
'autovacuum_vacuum_cost_delay=0',
|
||||
'autovacuum_vacuum_cost_limit=10000',
|
||||
'autovacuum_naptime =1s',
|
||||
'autovacuum_max_workers=10', 'autovacuum_vacuum_threshold=0',
|
||||
'autovacuum_vacuum_insert_threshold=0', 'autovacuum_vacuum_cost_delay=0',
|
||||
'autovacuum_vacuum_cost_limit=10000', 'autovacuum_naptime =1s',
|
||||
'autovacuum_freeze_max_age=100000'
|
||||
]
|
||||
|
||||
pg = postgres.create_start('test_clog_truncate', config_lines=config)
|
||||
log.info('postgres is running on test_clog_truncate branch')
|
||||
print('postgres is running on test_clog_truncate branch')
|
||||
|
||||
# Install extension containing function needed for test
|
||||
pg.safe_psql('CREATE EXTENSION zenith_test_utils')
|
||||
@@ -37,22 +33,22 @@ def test_clog_truncate(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('select test_consume_xids(1000*1000*10);')
|
||||
log.info('xids consumed')
|
||||
print('xids consumed')
|
||||
|
||||
# call a checkpoint to trigger TruncateSubtrans
|
||||
cur.execute('CHECKPOINT;')
|
||||
|
||||
# ensure WAL flush
|
||||
cur.execute('select txid_current()')
|
||||
log.info(cur.fetchone())
|
||||
print(cur.fetchone())
|
||||
|
||||
# wait for autovacuum to truncate the pg_xact
|
||||
# XXX Is it worth to add a timeout here?
|
||||
pg_xact_0000_path = os.path.join(pg.pg_xact_dir_path(), '0000')
|
||||
log.info(f"pg_xact_0000_path = {pg_xact_0000_path}")
|
||||
print("pg_xact_0000_path = " + pg_xact_0000_path)
|
||||
|
||||
while os.path.isfile(pg_xact_0000_path):
|
||||
log.info(f"file exists. wait for truncation. " "pg_xact_0000_path = {pg_xact_0000_path}")
|
||||
print("file exists. wait for truncation. " "pg_xact_0000_path = " + pg_xact_0000_path)
|
||||
time.sleep(5)
|
||||
|
||||
# checkpoint to advance latest lsn
|
||||
@@ -63,14 +59,14 @@ def test_clog_truncate(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
lsn_after_truncation = cur.fetchone()[0]
|
||||
|
||||
# create new branch after clog truncation and start a compute node on it
|
||||
log.info(f'create branch at lsn_after_truncation {lsn_after_truncation}')
|
||||
print('create branch at lsn_after_truncation ' + lsn_after_truncation)
|
||||
zenith_cli.run(
|
||||
["branch", "test_clog_truncate_new", "test_clog_truncate@" + lsn_after_truncation])
|
||||
|
||||
pg2 = postgres.create_start('test_clog_truncate_new')
|
||||
log.info('postgres is running on test_clog_truncate_new branch')
|
||||
print('postgres is running on test_clog_truncate_new branch')
|
||||
|
||||
# check that new node doesn't contain truncated segment
|
||||
pg_xact_0000_path_new = os.path.join(pg2.pg_xact_dir_path(), '0000')
|
||||
log.info(f"pg_xact_0000_path_new = {pg_xact_0000_path_new}")
|
||||
print("pg_xact_0000_path_new = " + pg_xact_0000_path_new)
|
||||
assert os.path.isfile(pg_xact_0000_path_new) is False
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
@@ -15,7 +14,7 @@ def test_config(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFact
|
||||
|
||||
# change config
|
||||
pg = postgres.create_start('test_config', config_lines=['log_min_messages=debug1'])
|
||||
log.info('postgres is running on test_config branch')
|
||||
print('postgres is running on test_config branch')
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -3,7 +3,6 @@ import pathlib
|
||||
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory, ZenithCli, check_restored_datadir_content
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
@@ -20,7 +19,7 @@ def test_createdb(
|
||||
zenith_cli.run(["branch", "test_createdb", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_createdb')
|
||||
log.info("postgres is running on 'test_createdb' branch")
|
||||
print("postgres is running on 'test_createdb' branch")
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
@@ -41,7 +40,6 @@ def test_createdb(
|
||||
for db in (pg, pg2):
|
||||
db.connect(dbname='foodb').close()
|
||||
|
||||
|
||||
#
|
||||
# Test DROP DATABASE
|
||||
#
|
||||
@@ -50,12 +48,12 @@ def test_dropdb(
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
test_output_dir,
|
||||
test_output_dir
|
||||
):
|
||||
zenith_cli.run(["branch", "test_dropdb", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_dropdb')
|
||||
log.info("postgres is running on 'test_dropdb' branch")
|
||||
print("postgres is running on 'test_dropdb' branch")
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
@@ -67,6 +65,7 @@ def test_dropdb(
|
||||
cur.execute("SELECT oid FROM pg_database WHERE datname='foodb';")
|
||||
dboid = cur.fetchone()[0]
|
||||
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('DROP DATABASE foodb')
|
||||
@@ -76,6 +75,7 @@ def test_dropdb(
|
||||
cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_after_drop = cur.fetchone()[0]
|
||||
|
||||
|
||||
# Create two branches before and after database drop.
|
||||
zenith_cli.run(["branch", "test_before_dropdb", "test_dropdb@" + lsn_before_drop])
|
||||
pg_before = postgres.create_start('test_before_dropdb')
|
||||
@@ -88,13 +88,13 @@ def test_dropdb(
|
||||
|
||||
# Test that database subdir exists on the branch before drop
|
||||
dbpath = pathlib.Path(pg_before.pgdata_dir) / 'base' / str(dboid)
|
||||
log.info(dbpath)
|
||||
print(dbpath)
|
||||
|
||||
assert os.path.isdir(dbpath) == True
|
||||
|
||||
# Test that database subdir doesn't exist on the branch after drop
|
||||
dbpath = pathlib.Path(pg_after.pgdata_dir) / 'base' / str(dboid)
|
||||
log.info(dbpath)
|
||||
print(dbpath)
|
||||
|
||||
assert os.path.isdir(dbpath) == False
|
||||
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
@@ -13,7 +12,7 @@ def test_createuser(zenith_cli, pageserver: ZenithPageserver, postgres: Postgres
|
||||
zenith_cli.run(["branch", "test_createuser", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_createuser')
|
||||
log.info("postgres is running on 'test_createuser' branch")
|
||||
print("postgres is running on 'test_createuser' branch")
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, check_restored_datadir_content
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
@@ -10,17 +9,13 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
# it only checks next_multixact_id field in restored pg_control,
|
||||
# since we don't have functions to check multixact internals.
|
||||
#
|
||||
def test_multixact(pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
zenith_cli,
|
||||
base_dir,
|
||||
test_output_dir):
|
||||
def test_multixact(pageserver: ZenithPageserver, postgres: PostgresFactory,
|
||||
pg_bin, zenith_cli, base_dir, test_output_dir):
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_multixact", "empty"])
|
||||
pg = postgres.create_start('test_multixact')
|
||||
|
||||
log.info("postgres is running on 'test_multixact' branch")
|
||||
print("postgres is running on 'test_multixact' branch")
|
||||
pg_conn = pg.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
@@ -60,7 +55,7 @@ def test_multixact(pageserver: ZenithPageserver,
|
||||
zenith_cli.run(["branch", "test_multixact_new", "test_multixact@" + lsn])
|
||||
pg_new = postgres.create_start('test_multixact_new')
|
||||
|
||||
log.info("postgres is running on 'test_multixact_new' branch")
|
||||
print("postgres is running on 'test_multixact_new' branch")
|
||||
pg_new_conn = pg_new.connect()
|
||||
cur_new = pg_new_conn.cursor()
|
||||
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
#
|
||||
# Test where Postgres generates a lot of WAL, and it's garbage collected away, but
|
||||
# no pages are evicted so that Postgres uses an old LSN in a GetPage request.
|
||||
@@ -16,14 +14,11 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
# just a hint that the page hasn't been modified since that LSN, and the page
|
||||
# server should return the latest page version regardless of the LSN.
|
||||
#
|
||||
def test_old_request_lsn(zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin):
|
||||
def test_old_request_lsn(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_old_request_lsn", "empty"])
|
||||
pg = postgres.create_start('test_old_request_lsn')
|
||||
log.info('postgres is running on test_old_request_lsn branch')
|
||||
print('postgres is running on test_old_request_lsn branch')
|
||||
|
||||
pg_conn = pg.connect()
|
||||
cur = pg_conn.cursor()
|
||||
@@ -51,20 +46,20 @@ def test_old_request_lsn(zenith_cli,
|
||||
from pg_settings where name = 'shared_buffers'
|
||||
''')
|
||||
row = cur.fetchone()
|
||||
log.info(f'shared_buffers is {row[0]}, table size {row[1]}')
|
||||
print(f'shared_buffers is {row[0]}, table size {row[1]}');
|
||||
assert int(row[0]) < int(row[1])
|
||||
|
||||
cur.execute('VACUUM foo')
|
||||
cur.execute('VACUUM foo');
|
||||
|
||||
# Make a lot of updates on a single row, generating a lot of WAL. Trigger
|
||||
# garbage collections so that the page server will remove old page versions.
|
||||
for i in range(10):
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
for j in range(100):
|
||||
cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;')
|
||||
cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;');
|
||||
|
||||
# All (or at least most of) the updates should've been on the same page, so
|
||||
# that we haven't had to evict any dirty pages for a long time. Now run
|
||||
# a query that sends GetPage@LSN requests with the old LSN.
|
||||
cur.execute("SELECT COUNT(*), SUM(val) FROM foo")
|
||||
cur.execute("SELECT COUNT(*), SUM(val) FROM foo");
|
||||
assert cur.fetchone() == (100000, 101000)
|
||||
|
||||
@@ -63,8 +63,7 @@ def test_tenant_list_psql(pageserver: ZenithPageserver, zenith_cli):
|
||||
cur = conn.cursor()
|
||||
|
||||
# check same tenant cannot be created twice
|
||||
with pytest.raises(psycopg2.DatabaseError,
|
||||
match=f'tenant {pageserver.initial_tenant} already exists'):
|
||||
with pytest.raises(psycopg2.DatabaseError, match=f'tenant {pageserver.initial_tenant} already exists'):
|
||||
cur.execute(f'tenant_create {pageserver.initial_tenant}')
|
||||
|
||||
# create one more tenant
|
||||
@@ -103,6 +102,5 @@ def test_pageserver_http_api_client(pageserver: ZenithPageserver):
|
||||
|
||||
|
||||
def test_pageserver_http_api_client_auth_enabled(pageserver_auth_enabled: ZenithPageserver):
|
||||
client = pageserver_auth_enabled.http_client(
|
||||
auth_token=pageserver_auth_enabled.auth_keys.generate_management_token())
|
||||
client = pageserver_auth_enabled.http_client(auth_token=pageserver_auth_enabled.auth_keys.generate_management_token())
|
||||
check_client(client, pageserver_auth_enabled.initial_tenant)
|
||||
|
||||
@@ -5,24 +5,20 @@ import time
|
||||
from contextlib import closing
|
||||
from multiprocessing import Process, Value
|
||||
from fixtures.zenith_fixtures import WalAcceptorFactory, ZenithPageserver, PostgresFactory
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
# Check that dead minority doesn't prevent the commits: execute insert n_inserts
|
||||
# times, with fault_probability chance of getting a wal acceptor down or up
|
||||
# along the way. 2 of 3 are always alive, so the work keeps going.
|
||||
def test_pageserver_restart(zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
wa_factory: WalAcceptorFactory):
|
||||
def test_pageserver_restart(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory: WalAcceptorFactory):
|
||||
|
||||
# One safekeeper is enough for this test.
|
||||
wa_factory.start_n_new(1)
|
||||
|
||||
zenith_cli.run(["branch", "test_pageserver_restart", "empty"])
|
||||
pg = postgres.create_start('test_pageserver_restart', wal_acceptors=wa_factory.get_connstrs())
|
||||
pg = postgres.create_start('test_pageserver_restart',
|
||||
wal_acceptors=wa_factory.get_connstrs())
|
||||
|
||||
pg_conn = pg.connect()
|
||||
cur = pg_conn.cursor()
|
||||
@@ -44,14 +40,14 @@ def test_pageserver_restart(zenith_cli,
|
||||
from pg_settings where name = 'shared_buffers'
|
||||
''')
|
||||
row = cur.fetchone()
|
||||
log.info(f"shared_buffers is {row[0]}, table size {row[1]}")
|
||||
print("shared_buffers is {}, table size {}", row[0], row[1]);
|
||||
assert int(row[0]) < int(row[1])
|
||||
|
||||
# Stop and restart pageserver. This is a more or less graceful shutdown, although
|
||||
# the page server doesn't currently have a shutdown routine so there's no difference
|
||||
# between stopping and crashing.
|
||||
pageserver.stop()
|
||||
pageserver.start()
|
||||
pageserver.stop();
|
||||
pageserver.start();
|
||||
|
||||
# Stopping the pageserver breaks the connection from the postgres backend to
|
||||
# the page server, and causes the next query on the connection to fail. Start a new
|
||||
@@ -65,5 +61,6 @@ def test_pageserver_restart(zenith_cli,
|
||||
assert cur.fetchone() == (100000, )
|
||||
|
||||
# Stop the page server by force, and restart it
|
||||
pageserver.stop()
|
||||
pageserver.start()
|
||||
pageserver.stop();
|
||||
pageserver.start();
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from fixtures.zenith_fixtures import PostgresFactory
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
@@ -9,7 +8,7 @@ def test_pgbench(postgres: PostgresFactory, pg_bin, zenith_cli):
|
||||
zenith_cli.run(["branch", "test_pgbench", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_pgbench')
|
||||
log.info("postgres is running on 'test_pgbench' branch")
|
||||
print("postgres is running on 'test_pgbench' branch")
|
||||
|
||||
connstr = pg.connstr()
|
||||
|
||||
|
||||
@@ -1,89 +0,0 @@
|
||||
import subprocess
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
#
|
||||
# Create read-only compute nodes, anchored at historical points in time.
|
||||
#
|
||||
# This is very similar to the 'test_branch_behind' test, but instead of
|
||||
# creating branches, creates read-only nodes.
|
||||
#
|
||||
def test_readonly_node(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
|
||||
zenith_cli.run(["branch", "test_readonly_node", "empty"])
|
||||
|
||||
pgmain = postgres.create_start('test_readonly_node')
|
||||
print("postgres is running on 'test_readonly_node' branch")
|
||||
|
||||
main_pg_conn = pgmain.connect()
|
||||
main_cur = main_pg_conn.cursor()
|
||||
|
||||
# Create table, and insert the first 100 rows
|
||||
main_cur.execute('CREATE TABLE foo (t text)')
|
||||
main_cur.execute('''
|
||||
INSERT INTO foo
|
||||
SELECT 'long string to consume some space' || g
|
||||
FROM generate_series(1, 100) g
|
||||
''')
|
||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_a = main_cur.fetchone()[0]
|
||||
print('LSN after 100 rows: ' + lsn_a)
|
||||
|
||||
# Insert some more rows. (This generates enough WAL to fill a few segments.)
|
||||
main_cur.execute('''
|
||||
INSERT INTO foo
|
||||
SELECT 'long string to consume some space' || g
|
||||
FROM generate_series(1, 200000) g
|
||||
''')
|
||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_b = main_cur.fetchone()[0]
|
||||
print('LSN after 200100 rows: ' + lsn_b)
|
||||
|
||||
# Insert many more rows. This generates enough WAL to fill a few segments.
|
||||
main_cur.execute('''
|
||||
INSERT INTO foo
|
||||
SELECT 'long string to consume some space' || g
|
||||
FROM generate_series(1, 200000) g
|
||||
''')
|
||||
|
||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
||||
lsn_c = main_cur.fetchone()[0]
|
||||
print('LSN after 400100 rows: ' + lsn_c)
|
||||
|
||||
# Create first read-only node at the point where only 100 rows were inserted
|
||||
pg_hundred = postgres.create_start("test_readonly_node_hundred",
|
||||
branch=f'test_readonly_node@{lsn_a}')
|
||||
|
||||
# And another at the point where 200100 rows were inserted
|
||||
pg_more = postgres.create_start("test_readonly_node_more", branch=f'test_readonly_node@{lsn_b}')
|
||||
|
||||
# On the 'hundred' node, we should see only 100 rows
|
||||
hundred_pg_conn = pg_hundred.connect()
|
||||
hundred_cur = hundred_pg_conn.cursor()
|
||||
hundred_cur.execute('SELECT count(*) FROM foo')
|
||||
assert hundred_cur.fetchone() == (100, )
|
||||
|
||||
# On the 'more' node, we should see 100200 rows
|
||||
more_pg_conn = pg_more.connect()
|
||||
more_cur = more_pg_conn.cursor()
|
||||
more_cur.execute('SELECT count(*) FROM foo')
|
||||
assert more_cur.fetchone() == (200100, )
|
||||
|
||||
# All the rows are visible on the main branch
|
||||
main_cur.execute('SELECT count(*) FROM foo')
|
||||
assert main_cur.fetchone() == (400100, )
|
||||
|
||||
# Check creating a node at segment boundary
|
||||
pg = postgres.create_start("test_branch_segment_boundary",
|
||||
branch="test_readonly_node@0/3000000")
|
||||
cur = pg.connect().cursor()
|
||||
cur.execute('SELECT 1')
|
||||
assert cur.fetchone() == (1, )
|
||||
|
||||
# Create node at pre-initdb lsn
|
||||
try:
|
||||
zenith_cli.run(["pg", "start", "test_branch_preinitdb", "test_readonly_node@0/42"])
|
||||
assert false, "compute node startup with invalid LSN should have failed"
|
||||
except Exception:
|
||||
print("Node creation with pre-initdb LSN failed (as expected)")
|
||||
@@ -2,7 +2,6 @@ import pytest
|
||||
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
@@ -12,13 +11,13 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
#
|
||||
@pytest.mark.parametrize('with_wal_acceptors', [False, True])
|
||||
def test_restart_compute(
|
||||
zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
wa_factory,
|
||||
with_wal_acceptors: bool,
|
||||
):
|
||||
zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
wa_factory,
|
||||
with_wal_acceptors: bool,
|
||||
):
|
||||
wal_acceptor_connstrs = None
|
||||
zenith_cli.run(["branch", "test_restart_compute", "empty"])
|
||||
|
||||
@@ -26,8 +25,9 @@ def test_restart_compute(
|
||||
wa_factory.start_n_new(3)
|
||||
wal_acceptor_connstrs = wa_factory.get_connstrs()
|
||||
|
||||
pg = postgres.create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs)
|
||||
log.info("postgres is running on 'test_restart_compute' branch")
|
||||
pg = postgres.create_start('test_restart_compute',
|
||||
wal_acceptors=wal_acceptor_connstrs)
|
||||
print("postgres is running on 'test_restart_compute' branch")
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
@@ -36,10 +36,12 @@ def test_restart_compute(
|
||||
cur.execute('SELECT sum(key) FROM t')
|
||||
r = cur.fetchone()
|
||||
assert r == (5000050000, )
|
||||
log.info(f"res = {r}")
|
||||
print("res = ", r)
|
||||
|
||||
# Remove data directory and restart
|
||||
pg.stop_and_destroy().create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs)
|
||||
pg.stop_and_destroy().create_start('test_restart_compute',
|
||||
wal_acceptors=wal_acceptor_connstrs)
|
||||
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
@@ -47,7 +49,7 @@ def test_restart_compute(
|
||||
cur.execute('SELECT sum(key) FROM t')
|
||||
r = cur.fetchone()
|
||||
assert r == (5000050000, )
|
||||
log.info(f"res = {r}")
|
||||
print("res = ", r)
|
||||
|
||||
# Insert another row
|
||||
cur.execute("INSERT INTO t VALUES (100001, 'payload2')")
|
||||
@@ -55,10 +57,11 @@ def test_restart_compute(
|
||||
|
||||
r = cur.fetchone()
|
||||
assert r == (100001, )
|
||||
log.info(f"res = {r}")
|
||||
print("res = ", r)
|
||||
|
||||
# Again remove data directory and restart
|
||||
pg.stop_and_destroy().create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs)
|
||||
pg.stop_and_destroy().create_start('test_restart_compute',
|
||||
wal_acceptors=wal_acceptor_connstrs)
|
||||
|
||||
# That select causes lots of FPI's and increases probability of wakeepers
|
||||
# lagging behind after query completion
|
||||
@@ -69,10 +72,11 @@ def test_restart_compute(
|
||||
|
||||
r = cur.fetchone()
|
||||
assert r == (100001, )
|
||||
log.info(f"res = {r}")
|
||||
print("res = ", r)
|
||||
|
||||
# And again remove data directory and restart
|
||||
pg.stop_and_destroy().create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs)
|
||||
pg.stop_and_destroy().create_start('test_restart_compute',
|
||||
wal_acceptors=wal_acceptor_connstrs)
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
@@ -81,4 +85,4 @@ def test_restart_compute(
|
||||
|
||||
r = cur.fetchone()
|
||||
assert r == (100001, )
|
||||
log.info(f"res = {r}")
|
||||
print("res = ", r)
|
||||
|
||||
@@ -1,19 +1,13 @@
|
||||
from contextlib import closing
|
||||
import psycopg2.extras
|
||||
import time
|
||||
from fixtures.log_helper import log
|
||||
import time;
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def print_gc_result(row):
|
||||
log.info("GC duration {elapsed} ms".format_map(row))
|
||||
log.info(
|
||||
" REL total: {layer_relfiles_total}, needed_by_cutoff {layer_relfiles_needed_by_cutoff}, needed_by_branches: {layer_relfiles_needed_by_branches}, not_updated: {layer_relfiles_not_updated}, needed_as_tombstone {layer_relfiles_needed_as_tombstone}, removed: {layer_relfiles_removed}, dropped: {layer_relfiles_dropped}"
|
||||
.format_map(row))
|
||||
log.info(
|
||||
" NONREL total: {layer_nonrelfiles_total}, needed_by_cutoff {layer_nonrelfiles_needed_by_cutoff}, needed_by_branches: {layer_nonrelfiles_needed_by_branches}, not_updated: {layer_nonrelfiles_not_updated}, needed_as_tombstone {layer_nonrelfiles_needed_as_tombstone}, removed: {layer_nonrelfiles_removed}, dropped: {layer_nonrelfiles_dropped}"
|
||||
.format_map(row))
|
||||
print("GC duration {elapsed} ms".format_map(row));
|
||||
print(" REL total: {layer_relfiles_total}, needed_by_cutoff {layer_relfiles_needed_by_cutoff}, needed_by_branches: {layer_relfiles_needed_by_branches}, not_updated: {layer_relfiles_not_updated}, needed_as_tombstone {layer_relfiles_needed_as_tombstone}, removed: {layer_relfiles_removed}, dropped: {layer_relfiles_dropped}".format_map(row))
|
||||
print(" NONREL total: {layer_nonrelfiles_total}, needed_by_cutoff {layer_nonrelfiles_needed_by_cutoff}, needed_by_branches: {layer_nonrelfiles_needed_by_branches}, not_updated: {layer_nonrelfiles_not_updated}, needed_as_tombstone {layer_nonrelfiles_needed_as_tombstone}, removed: {layer_nonrelfiles_removed}, dropped: {layer_nonrelfiles_dropped}".format_map(row))
|
||||
|
||||
|
||||
#
|
||||
@@ -29,7 +23,7 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
with closing(pageserver.connect()) as psconn:
|
||||
with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur:
|
||||
with psconn.cursor(cursor_factory = psycopg2.extras.DictCursor) as pscur:
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the 'do_gc' command
|
||||
cur.execute("SHOW zenith.zenith_timeline")
|
||||
@@ -39,9 +33,9 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
cur.execute("CREATE TABLE foo(x integer)")
|
||||
cur.execute("INSERT INTO foo VALUES (1)")
|
||||
|
||||
cur.execute("select relfilenode from pg_class where oid = 'foo'::regclass")
|
||||
row = cur.fetchone()
|
||||
log.info(f"relfilenode is {row[0]}")
|
||||
cur.execute("select relfilenode from pg_class where oid = 'foo'::regclass");
|
||||
row = cur.fetchone();
|
||||
print("relfilenode is {}", row[0]);
|
||||
|
||||
# Run GC, to clear out any garbage left behind in the catalogs by
|
||||
# the CREATE TABLE command. We want to have a clean slate with no garbage
|
||||
@@ -56,23 +50,22 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
# update to confuse our numbers either.
|
||||
cur.execute("DELETE FROM foo")
|
||||
|
||||
log.info("Running GC before test")
|
||||
print("Running GC before test")
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row)
|
||||
print_gc_result(row);
|
||||
# remember the number of files
|
||||
layer_relfiles_remain = (row['layer_relfiles_total'] -
|
||||
row['layer_relfiles_removed'])
|
||||
layer_relfiles_remain = row['layer_relfiles_total'] - row['layer_relfiles_removed']
|
||||
assert layer_relfiles_remain > 0
|
||||
|
||||
# Insert a row and run GC. Checkpoint should freeze the layer
|
||||
# so that there is only the most recent image layer left for the rel,
|
||||
# removing the old image and delta layer.
|
||||
log.info("Inserting one row and running GC")
|
||||
print("Inserting one row and running GC")
|
||||
cur.execute("INSERT INTO foo VALUES (1)")
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row)
|
||||
print_gc_result(row);
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
||||
assert row['layer_relfiles_removed'] == 2
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
@@ -80,34 +73,34 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
# Insert two more rows and run GC.
|
||||
# This should create new image and delta layer file with the new contents, and
|
||||
# then remove the old one image and the just-created delta layer.
|
||||
log.info("Inserting two more rows and running GC")
|
||||
print("Inserting two more rows and running GC")
|
||||
cur.execute("INSERT INTO foo VALUES (2)")
|
||||
cur.execute("INSERT INTO foo VALUES (3)")
|
||||
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row)
|
||||
print_gc_result(row);
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
||||
assert row['layer_relfiles_removed'] == 2
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
|
||||
# Do it again. Should again create two new layer files and remove old ones.
|
||||
log.info("Inserting two more rows and running GC")
|
||||
print("Inserting two more rows and running GC")
|
||||
cur.execute("INSERT INTO foo VALUES (2)")
|
||||
cur.execute("INSERT INTO foo VALUES (3)")
|
||||
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row)
|
||||
print_gc_result(row);
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain + 2
|
||||
assert row['layer_relfiles_removed'] == 2
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
|
||||
# Run GC again, with no changes in the database. Should not remove anything.
|
||||
log.info("Run GC again, with nothing to do")
|
||||
print("Run GC again, with nothing to do")
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row)
|
||||
print_gc_result(row);
|
||||
assert row['layer_relfiles_total'] == layer_relfiles_remain
|
||||
assert row['layer_relfiles_removed'] == 0
|
||||
assert row['layer_relfiles_dropped'] == 0
|
||||
@@ -115,12 +108,12 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin):
|
||||
#
|
||||
# Test DROP TABLE checks that relation data and metadata was deleted by GC from object storage
|
||||
#
|
||||
log.info("Drop table and run GC again")
|
||||
print("Drop table and run GC again");
|
||||
cur.execute("DROP TABLE foo")
|
||||
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
row = pscur.fetchone()
|
||||
print_gc_result(row)
|
||||
print_gc_result(row);
|
||||
|
||||
# We still cannot remove the latest layers
|
||||
# because they serve as tombstones for earlier layers.
|
||||
|
||||
@@ -21,30 +21,18 @@ def test_tenants_normal_work(
|
||||
tenant_1 = tenant_factory.create()
|
||||
tenant_2 = tenant_factory.create()
|
||||
|
||||
zenith_cli.run([
|
||||
"branch",
|
||||
f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
"main",
|
||||
f"--tenantid={tenant_1}"
|
||||
])
|
||||
zenith_cli.run([
|
||||
"branch",
|
||||
f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
"main",
|
||||
f"--tenantid={tenant_2}"
|
||||
])
|
||||
zenith_cli.run(["branch", f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", "main", f"--tenantid={tenant_1}"])
|
||||
zenith_cli.run(["branch", f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", "main", f"--tenantid={tenant_2}"])
|
||||
if with_wal_acceptors:
|
||||
wa_factory.start_n_new(3)
|
||||
|
||||
pg_tenant1 = postgres.create_start(
|
||||
f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
None, # branch name, None means same as node name
|
||||
tenant_1,
|
||||
wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
|
||||
)
|
||||
pg_tenant2 = postgres.create_start(
|
||||
f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}",
|
||||
None, # branch name, None means same as node name
|
||||
tenant_2,
|
||||
wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None,
|
||||
)
|
||||
@@ -57,4 +45,4 @@ def test_tenants_normal_work(
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
cur.execute("SELECT sum(key) FROM t")
|
||||
assert cur.fetchone() == (5000050000, )
|
||||
assert cur.fetchone() == (5000050000,)
|
||||
|
||||
@@ -2,10 +2,11 @@ from contextlib import closing
|
||||
from uuid import UUID
|
||||
import psycopg2.extras
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def test_timeline_size(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin):
|
||||
def test_timeline_size(
|
||||
zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin
|
||||
):
|
||||
# Branch at the point where only 100 rows were inserted
|
||||
zenith_cli.run(["branch", "test_timeline_size", "empty"])
|
||||
|
||||
@@ -14,7 +15,7 @@ def test_timeline_size(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
|
||||
|
||||
pgmain = postgres.create_start("test_timeline_size")
|
||||
log.info("postgres is running on 'test_timeline_size' branch")
|
||||
print("postgres is running on 'test_timeline_size' branch")
|
||||
|
||||
with closing(pgmain.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
@@ -22,11 +23,13 @@ def test_timeline_size(zenith_cli, pageserver: ZenithPageserver, postgres: Postg
|
||||
|
||||
# Create table, and insert the first 100 rows
|
||||
cur.execute("CREATE TABLE foo (t text)")
|
||||
cur.execute("""
|
||||
cur.execute(
|
||||
"""
|
||||
INSERT INTO foo
|
||||
SELECT 'long string to consume some space' || g
|
||||
FROM generate_series(1, 10) g
|
||||
""")
|
||||
"""
|
||||
)
|
||||
|
||||
res = client.branch_detail(UUID(pageserver.initial_tenant), "test_timeline_size")
|
||||
assert res["current_logical_size"] == res["current_logical_size_non_incremental"]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, PgBin
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
@@ -9,14 +9,11 @@ pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
#
|
||||
# Test branching, when a transaction is in prepared state
|
||||
#
|
||||
def test_twophase(zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin: PgBin):
|
||||
def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin: PgBin):
|
||||
zenith_cli.run(["branch", "test_twophase", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5'])
|
||||
log.info("postgres is running on 'test_twophase' branch")
|
||||
print("postgres is running on 'test_twophase' branch")
|
||||
|
||||
conn = pg.connect()
|
||||
cur = conn.cursor()
|
||||
@@ -48,7 +45,7 @@ def test_twophase(zenith_cli,
|
||||
cur.execute('CHECKPOINT')
|
||||
|
||||
twophase_files = os.listdir(pg.pg_twophase_dir_path())
|
||||
log.info(twophase_files)
|
||||
print(twophase_files)
|
||||
assert len(twophase_files) == 4
|
||||
|
||||
cur.execute("COMMIT PREPARED 'insert_three'")
|
||||
@@ -56,7 +53,7 @@ def test_twophase(zenith_cli,
|
||||
cur.execute('CHECKPOINT')
|
||||
|
||||
twophase_files = os.listdir(pg.pg_twophase_dir_path())
|
||||
log.info(twophase_files)
|
||||
print(twophase_files)
|
||||
assert len(twophase_files) == 2
|
||||
|
||||
# Create a branch with the transaction in prepared state
|
||||
@@ -70,7 +67,7 @@ def test_twophase(zenith_cli,
|
||||
|
||||
# Check that we restored only needed twophase files
|
||||
twophase_files2 = os.listdir(pg2.pg_twophase_dir_path())
|
||||
log.info(twophase_files2)
|
||||
print(twophase_files2)
|
||||
assert twophase_files2.sort() == twophase_files.sort()
|
||||
|
||||
conn2 = pg2.connect()
|
||||
@@ -82,8 +79,8 @@ def test_twophase(zenith_cli,
|
||||
cur2.execute("ROLLBACK PREPARED 'insert_two'")
|
||||
|
||||
cur2.execute('SELECT * FROM foo')
|
||||
assert cur2.fetchall() == [('one', ), ('three', )]
|
||||
assert cur2.fetchall() == [('one',), ('three',)]
|
||||
|
||||
# Only one committed insert is visible on the original branch
|
||||
cur.execute('SELECT * FROM foo')
|
||||
assert cur.fetchall() == [('three', )]
|
||||
assert cur.fetchall() == [('three',)]
|
||||
|
||||
@@ -1,23 +1,17 @@
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
#
|
||||
# Test that the VM bit is cleared correctly at a HEAP_DELETE and
|
||||
# HEAP_UPDATE record.
|
||||
#
|
||||
def test_vm_bit_clear(pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
zenith_cli,
|
||||
base_dir):
|
||||
def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, base_dir):
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_vm_bit_clear", "empty"])
|
||||
pg = postgres.create_start('test_vm_bit_clear')
|
||||
|
||||
log.info("postgres is running on 'test_vm_bit_clear' branch")
|
||||
print("postgres is running on 'test_vm_bit_clear' branch")
|
||||
pg_conn = pg.connect()
|
||||
cur = pg_conn.cursor()
|
||||
|
||||
@@ -54,12 +48,13 @@ def test_vm_bit_clear(pageserver: ZenithPageserver,
|
||||
''')
|
||||
|
||||
cur.execute('SELECT * FROM vmtest_delete WHERE id = 1')
|
||||
assert (cur.fetchall() == [])
|
||||
assert(cur.fetchall() == []);
|
||||
cur.execute('SELECT * FROM vmtest_update WHERE id = 1')
|
||||
assert (cur.fetchall() == [])
|
||||
assert(cur.fetchall() == []);
|
||||
|
||||
cur.close()
|
||||
|
||||
|
||||
# Check the same thing on the branch that we created right after the DELETE
|
||||
#
|
||||
# As of this writing, the code in smgrwrite() creates a full-page image whenever
|
||||
@@ -68,7 +63,7 @@ def test_vm_bit_clear(pageserver: ZenithPageserver,
|
||||
# server at the right point-in-time avoids that full-page image.
|
||||
pg_new = postgres.create_start('test_vm_bit_clear_new')
|
||||
|
||||
log.info("postgres is running on 'test_vm_bit_clear_new' branch")
|
||||
print("postgres is running on 'test_vm_bit_clear_new' branch")
|
||||
pg_new_conn = pg_new.connect()
|
||||
cur_new = pg_new_conn.cursor()
|
||||
|
||||
@@ -79,6 +74,6 @@ def test_vm_bit_clear(pageserver: ZenithPageserver,
|
||||
''')
|
||||
|
||||
cur_new.execute('SELECT * FROM vmtest_delete WHERE id = 1')
|
||||
assert (cur_new.fetchall() == [])
|
||||
assert(cur_new.fetchall() == []);
|
||||
cur_new.execute('SELECT * FROM vmtest_update WHERE id = 1')
|
||||
assert (cur_new.fetchall() == [])
|
||||
assert(cur_new.fetchall() == []);
|
||||
|
||||
@@ -9,17 +9,13 @@ from contextlib import closing
|
||||
from multiprocessing import Process, Value
|
||||
from fixtures.zenith_fixtures import WalAcceptorFactory, ZenithPageserver, PostgresFactory, PgBin
|
||||
from fixtures.utils import lsn_to_hex, mkdir_if_needed
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
# basic test, write something in setup with wal acceptors, ensure that commits
|
||||
# succeed and data is written
|
||||
def test_normal_work(zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
wa_factory):
|
||||
def test_normal_work(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory):
|
||||
zenith_cli.run(["branch", "test_wal_acceptors_normal_work", "empty"])
|
||||
wa_factory.start_n_new(3)
|
||||
pg = postgres.create_start('test_wal_acceptors_normal_work',
|
||||
@@ -37,10 +33,7 @@ def test_normal_work(zenith_cli,
|
||||
|
||||
# Run page server and multiple acceptors, and multiple compute nodes running
|
||||
# against different timelines.
|
||||
def test_many_timelines(zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
wa_factory):
|
||||
def test_many_timelines(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory):
|
||||
n_timelines = 2
|
||||
|
||||
wa_factory.start_n_new(3)
|
||||
@@ -72,10 +65,7 @@ def test_many_timelines(zenith_cli,
|
||||
# Check that dead minority doesn't prevent the commits: execute insert n_inserts
|
||||
# times, with fault_probability chance of getting a wal acceptor down or up
|
||||
# along the way. 2 of 3 are always alive, so the work keeps going.
|
||||
def test_restarts(zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
wa_factory: WalAcceptorFactory):
|
||||
def test_restarts(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory: WalAcceptorFactory):
|
||||
fault_probability = 0.01
|
||||
n_inserts = 1000
|
||||
n_acceptors = 3
|
||||
@@ -186,11 +176,7 @@ def stop_value():
|
||||
|
||||
|
||||
# do inserts while concurrently getting up/down subsets of acceptors
|
||||
def test_race_conditions(zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
wa_factory,
|
||||
stop_value):
|
||||
def test_race_conditions(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory, stop_value):
|
||||
|
||||
wa_factory.start_n_new(3)
|
||||
|
||||
@@ -217,7 +203,6 @@ def test_race_conditions(zenith_cli,
|
||||
stop_value.value = 1
|
||||
proc.join()
|
||||
|
||||
|
||||
class ProposerPostgres:
|
||||
"""Object for running safekeepers sync with walproposer"""
|
||||
def __init__(self, pgdata_dir: str, pg_bin: PgBin, timeline_id: str, tenant_id: str):
|
||||
@@ -299,37 +284,10 @@ def test_sync_safekeepers(repo_dir: str, pg_bin: PgBin, wa_factory: WalAcceptorF
|
||||
)
|
||||
lsn_hex = lsn_to_hex(res["inserted_wal"]["end_lsn"])
|
||||
lsn_after_append.append(lsn_hex)
|
||||
log.info(f"safekeeper[{i}] lsn after append: {lsn_hex}")
|
||||
print(f"safekeeper[{i}] lsn after append: {lsn_hex}")
|
||||
|
||||
# run sync safekeepers
|
||||
lsn_after_sync = pg.sync_safekeepers()
|
||||
log.info(f"lsn after sync = {lsn_after_sync}")
|
||||
print(f"lsn after sync = {lsn_after_sync}")
|
||||
|
||||
assert all(lsn_after_sync == lsn for lsn in lsn_after_append)
|
||||
|
||||
|
||||
def test_timeline_status(zenith_cli, pageserver, postgres, wa_factory: WalAcceptorFactory):
|
||||
wa_factory.start_n_new(1)
|
||||
|
||||
zenith_cli.run(["branch", "test_timeline_status", "empty"])
|
||||
pg = postgres.create_start('test_timeline_status', wal_acceptors=wa_factory.get_connstrs())
|
||||
|
||||
wa = wa_factory.instances[0]
|
||||
wa_http_cli = wa.http_client()
|
||||
wa_http_cli.check_status()
|
||||
|
||||
# learn zenith timeline from compute
|
||||
tenant_id = pg.safe_psql("show zenith.zenith_tenant")[0][0]
|
||||
timeline_id = pg.safe_psql("show zenith.zenith_timeline")[0][0]
|
||||
|
||||
# fetch something sensible from status
|
||||
epoch = wa_http_cli.timeline_status(tenant_id, timeline_id).acceptor_epoch
|
||||
|
||||
pg.safe_psql("create table t(i int)")
|
||||
|
||||
# ensure epoch goes up after reboot
|
||||
pg.stop().start()
|
||||
pg.safe_psql("insert into t values(10)")
|
||||
|
||||
epoch_after_reboot = wa_http_cli.timeline_status(tenant_id, timeline_id).acceptor_epoch
|
||||
assert epoch_after_reboot > epoch
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
import asyncio
|
||||
import asyncpg
|
||||
import random
|
||||
import time
|
||||
|
||||
from fixtures.zenith_fixtures import WalAcceptor, WalAcceptorFactory, ZenithPageserver, PostgresFactory, Postgres
|
||||
from fixtures.log_helper import getLogger
|
||||
from fixtures.utils import lsn_from_hex, lsn_to_hex
|
||||
from typing import List
|
||||
from fixtures.utils import debug_print
|
||||
|
||||
log = getLogger('root.wal_acceptor_async')
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
@@ -21,16 +18,13 @@ class BankClient(object):
|
||||
async def initdb(self):
|
||||
await self.conn.execute('DROP TABLE IF EXISTS bank_accs')
|
||||
await self.conn.execute('CREATE TABLE bank_accs(uid int primary key, amount int)')
|
||||
await self.conn.execute(
|
||||
'''
|
||||
await self.conn.execute('''
|
||||
INSERT INTO bank_accs
|
||||
SELECT *, $1 FROM generate_series(0, $2)
|
||||
''',
|
||||
self.init_amount,
|
||||
self.n_accounts - 1)
|
||||
''', self.init_amount, self.n_accounts - 1)
|
||||
await self.conn.execute('DROP TABLE IF EXISTS bank_log')
|
||||
await self.conn.execute('CREATE TABLE bank_log(from_uid int, to_uid int, amount int)')
|
||||
|
||||
|
||||
# TODO: Remove when https://github.com/zenithdb/zenith/issues/644 is fixed
|
||||
await self.conn.execute('ALTER TABLE bank_accs SET (autovacuum_enabled = false)')
|
||||
await self.conn.execute('ALTER TABLE bank_log SET (autovacuum_enabled = false)')
|
||||
@@ -39,7 +33,6 @@ class BankClient(object):
|
||||
row = await self.conn.fetchrow('SELECT sum(amount) AS sum FROM bank_accs')
|
||||
assert row['sum'] == self.n_accounts * self.init_amount
|
||||
|
||||
|
||||
async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount):
|
||||
# avoid deadlocks by sorting uids
|
||||
if from_uid > to_uid:
|
||||
@@ -48,22 +41,16 @@ async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount):
|
||||
async with conn.transaction():
|
||||
await conn.execute(
|
||||
'UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2',
|
||||
amount,
|
||||
to_uid,
|
||||
amount, to_uid,
|
||||
)
|
||||
await conn.execute(
|
||||
'UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2',
|
||||
amount,
|
||||
from_uid,
|
||||
amount, from_uid,
|
||||
)
|
||||
await conn.execute(
|
||||
'INSERT INTO bank_log VALUES ($1, $2, $3)',
|
||||
from_uid,
|
||||
to_uid,
|
||||
amount,
|
||||
await conn.execute('INSERT INTO bank_log VALUES ($1, $2, $3)',
|
||||
from_uid, to_uid, amount,
|
||||
)
|
||||
|
||||
|
||||
class WorkerStats(object):
|
||||
def __init__(self, n_workers):
|
||||
self.counters = [0] * n_workers
|
||||
@@ -76,18 +63,18 @@ class WorkerStats(object):
|
||||
self.counters[worker_id] += 1
|
||||
|
||||
def check_progress(self):
|
||||
log.debug("Workers progress: {}".format(self.counters))
|
||||
debug_print("Workers progress: {}".format(self.counters))
|
||||
|
||||
# every worker should finish at least one tx
|
||||
assert all(cnt > 0 for cnt in self.counters)
|
||||
|
||||
progress = sum(self.counters)
|
||||
log.info('All workers made {} transactions'.format(progress))
|
||||
print('All workers made {} transactions'.format(progress))
|
||||
|
||||
|
||||
async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accounts, max_transfer):
|
||||
pg_conn = await pg.connect_async()
|
||||
log.debug('Started worker {}'.format(worker_id))
|
||||
debug_print('Started worker {}'.format(worker_id))
|
||||
|
||||
while stats.running:
|
||||
from_uid = random.randint(0, n_accounts - 1)
|
||||
@@ -97,45 +84,13 @@ async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accou
|
||||
await bank_transfer(pg_conn, from_uid, to_uid, amount)
|
||||
stats.inc_progress(worker_id)
|
||||
|
||||
log.debug('Executed transfer({}) {} => {}'.format(amount, from_uid, to_uid))
|
||||
debug_print('Executed transfer({}) {} => {}'.format(amount, from_uid, to_uid))
|
||||
|
||||
log.debug('Finished worker {}'.format(worker_id))
|
||||
debug_print('Finished worker {}'.format(worker_id))
|
||||
|
||||
await pg_conn.close()
|
||||
|
||||
|
||||
async def wait_for_lsn(safekeeper: WalAcceptor,
|
||||
tenant_id: str,
|
||||
timeline_id: str,
|
||||
wait_lsn: str,
|
||||
polling_interval=1,
|
||||
timeout=600):
|
||||
"""
|
||||
Poll flush_lsn from safekeeper until it's greater or equal than
|
||||
provided wait_lsn. To do that, timeline_status is fetched from
|
||||
safekeeper every polling_interval seconds.
|
||||
"""
|
||||
|
||||
started_at = time.time()
|
||||
client = safekeeper.http_client()
|
||||
|
||||
flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn
|
||||
log.info(
|
||||
f'Safekeeper at port {safekeeper.port.pg} has flush_lsn {flush_lsn}, waiting for lsn {wait_lsn}'
|
||||
)
|
||||
|
||||
while lsn_from_hex(wait_lsn) > lsn_from_hex(flush_lsn):
|
||||
elapsed = time.time() - started_at
|
||||
if elapsed > timeout:
|
||||
raise RuntimeError(
|
||||
f"timed out waiting for safekeeper at port {safekeeper.port.pg} to reach {wait_lsn}, current lsn is {flush_lsn}"
|
||||
)
|
||||
|
||||
await asyncio.sleep(polling_interval)
|
||||
flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn
|
||||
log.debug(f'safekeeper port={safekeeper.port.pg} flush_lsn={flush_lsn} wait_lsn={wait_lsn}')
|
||||
|
||||
|
||||
# This test will run several iterations and check progress in each of them.
|
||||
# On each iteration 1 acceptor is stopped, and 2 others should allow
|
||||
# background workers execute transactions. In the end, state should remain
|
||||
@@ -148,9 +103,6 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
|
||||
iterations = 6
|
||||
|
||||
pg_conn = await pg.connect_async()
|
||||
tenant_id = await pg_conn.fetchval("show zenith.zenith_tenant")
|
||||
timeline_id = await pg_conn.fetchval("show zenith.zenith_timeline")
|
||||
|
||||
bank = BankClient(pg_conn, n_accounts=n_accounts, init_amount=init_amount)
|
||||
# create tables and initial balances
|
||||
await bank.initdb()
|
||||
@@ -161,19 +113,19 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
|
||||
worker = run_random_worker(stats, pg, worker_id, bank.n_accounts, max_transfer)
|
||||
workers.append(asyncio.create_task(worker))
|
||||
|
||||
|
||||
for it in range(iterations):
|
||||
victim_idx = it % len(acceptors)
|
||||
victim = acceptors[victim_idx]
|
||||
victim = acceptors[it % len(acceptors)]
|
||||
victim.stop()
|
||||
|
||||
flush_lsn = await pg_conn.fetchval('SELECT pg_current_wal_flush_lsn()')
|
||||
flush_lsn = lsn_to_hex(flush_lsn)
|
||||
log.info(f'Postgres flush_lsn {flush_lsn}')
|
||||
|
||||
# Wait until alive safekeepers catch up with postgres
|
||||
for idx, safekeeper in enumerate(acceptors):
|
||||
if idx != victim_idx:
|
||||
await wait_for_lsn(safekeeper, tenant_id, timeline_id, flush_lsn)
|
||||
# Wait till previous victim recovers so it is ready for the next
|
||||
# iteration by making any writing xact.
|
||||
conn = await pg.connect_async()
|
||||
await conn.execute(
|
||||
'UPDATE bank_accs SET amount = amount WHERE uid = 1',
|
||||
timeout=120
|
||||
)
|
||||
await conn.close()
|
||||
|
||||
stats.reset()
|
||||
await asyncio.sleep(period_time)
|
||||
@@ -182,7 +134,7 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
|
||||
|
||||
victim.start()
|
||||
|
||||
log.info('Iterations are finished, exiting coroutines...')
|
||||
print('Iterations are finished, exiting coroutines...')
|
||||
stats.running = False
|
||||
# await all workers
|
||||
await asyncio.gather(*workers)
|
||||
@@ -192,9 +144,7 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_
|
||||
|
||||
|
||||
# restart acceptors one by one, while executing and validating bank transactions
|
||||
def test_restarts_under_load(zenith_cli,
|
||||
pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
def test_restarts_under_load(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory,
|
||||
wa_factory: WalAcceptorFactory):
|
||||
|
||||
wa_factory.start_n_new(3)
|
||||
|
||||
@@ -23,11 +23,8 @@ def helper_compare_branch_list(page_server_cur, zenith_cli, initial_tenant: str)
|
||||
|
||||
res = zenith_cli.run(["branch", f"--tenantid={initial_tenant}"])
|
||||
res.check_returncode()
|
||||
branches_cli_with_tenant_arg = sorted(
|
||||
map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
|
||||
branches_cli_with_tenant_arg = [
|
||||
b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')
|
||||
]
|
||||
branches_cli_with_tenant_arg = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n")))
|
||||
branches_cli_with_tenant_arg = [b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')]
|
||||
|
||||
assert branches_api == branches_cli == branches_cli_with_tenant_arg
|
||||
|
||||
@@ -57,7 +54,6 @@ def test_cli_branch_list(pageserver: ZenithPageserver, zenith_cli):
|
||||
assert 'test_cli_branch_list_main' in branches_cli
|
||||
assert 'test_cli_branch_list_nested' in branches_cli
|
||||
|
||||
|
||||
def helper_compare_tenant_list(page_server_cur, zenith_cli: ZenithCli):
|
||||
page_server_cur.execute(f'tenant_list')
|
||||
tenants_api = sorted(json.loads(page_server_cur.fetchone()[0]))
|
||||
|
||||
@@ -6,14 +6,8 @@ from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def test_isolation(pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
zenith_cli,
|
||||
test_output_dir,
|
||||
pg_distrib_dir,
|
||||
base_dir,
|
||||
capsys):
|
||||
def test_isolation(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir,
|
||||
base_dir, capsys):
|
||||
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_isolation", "empty"])
|
||||
|
||||
@@ -6,14 +6,8 @@ from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, check_re
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def test_pg_regress(pageserver: ZenithPageserver,
|
||||
postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
zenith_cli,
|
||||
test_output_dir,
|
||||
pg_distrib_dir,
|
||||
base_dir,
|
||||
capsys):
|
||||
def test_pg_regress(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir,
|
||||
base_dir, capsys):
|
||||
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_pg_regress", "empty"])
|
||||
|
||||
@@ -2,19 +2,12 @@ import os
|
||||
|
||||
from fixtures.utils import mkdir_if_needed
|
||||
from fixtures.zenith_fixtures import PageserverPort, PostgresFactory, check_restored_datadir_content
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
|
||||
|
||||
def test_zenith_regress(postgres: PostgresFactory,
|
||||
pg_bin,
|
||||
zenith_cli,
|
||||
test_output_dir,
|
||||
pg_distrib_dir,
|
||||
base_dir,
|
||||
capsys,
|
||||
pageserver_port: PageserverPort):
|
||||
def test_zenith_regress(postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir,
|
||||
base_dir, capsys, pageserver_port: PageserverPort):
|
||||
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_zenith_regress", "empty"])
|
||||
@@ -45,7 +38,7 @@ def test_zenith_regress(postgres: PostgresFactory,
|
||||
'--inputdir={}'.format(src_path),
|
||||
]
|
||||
|
||||
log.info(pg_regress_command)
|
||||
print(pg_regress_command)
|
||||
env = {
|
||||
'PGPORT': str(pg.port),
|
||||
'PGUSER': pg.username,
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from pprint import pprint
|
||||
|
||||
import os
|
||||
import re
|
||||
import timeit
|
||||
@@ -24,6 +26,7 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast
|
||||
from typing_extensions import Literal
|
||||
|
||||
from .utils import (get_self_dir, mkdir_if_needed, subprocess_capture)
|
||||
|
||||
"""
|
||||
This file contains fixtures for micro-benchmarks.
|
||||
|
||||
@@ -54,6 +57,7 @@ in the test initialization, or measure disk usage after the test query.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
# All the results are collected in this list, as a tuple:
|
||||
# (test_name: str, metric_name: str, metric_value: float, unit: str)
|
||||
#
|
||||
@@ -63,7 +67,6 @@ in the test initialization, or measure disk usage after the test query.
|
||||
global zenbenchmark_results
|
||||
zenbenchmark_results = []
|
||||
|
||||
|
||||
class ZenithBenchmarkResults:
|
||||
""" An object for recording benchmark results. """
|
||||
def __init__(self):
|
||||
@@ -76,7 +79,6 @@ class ZenithBenchmarkResults:
|
||||
|
||||
self.results.append((test_name, metric_name, metric_value, unit))
|
||||
|
||||
|
||||
# Session scope fixture that initializes the results object
|
||||
@pytest.fixture(autouse=True, scope='session')
|
||||
def zenbenchmark_global(request) -> Iterator[ZenithBenchmarkResults]:
|
||||
@@ -88,7 +90,6 @@ def zenbenchmark_global(request) -> Iterator[ZenithBenchmarkResults]:
|
||||
|
||||
yield zenbenchmark_results
|
||||
|
||||
|
||||
class ZenithBenchmarker:
|
||||
"""
|
||||
An object for recording benchmark results. This is created for each test
|
||||
@@ -104,6 +105,7 @@ class ZenithBenchmarker:
|
||||
"""
|
||||
self.results.record(self.request.node.name, metric_name, metric_value, unit)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def record_duration(self, metric_name):
|
||||
"""
|
||||
@@ -134,8 +136,7 @@ class ZenithBenchmarker:
|
||||
# The metric should be an integer, as it's a number of bytes. But in general
|
||||
# all prometheus metrics are floats. So to be pedantic, read it as a float
|
||||
# and round to integer.
|
||||
matches = re.search(r'^pageserver_disk_io_bytes{io_operation="write"} (\S+)$',
|
||||
all_metrics,
|
||||
matches = re.search(r'^pageserver_disk_io_bytes{io_operation="write"} (\S+)$', all_metrics,
|
||||
re.MULTILINE)
|
||||
return int(round(float(matches.group(1))))
|
||||
|
||||
@@ -146,7 +147,8 @@ class ZenithBenchmarker:
|
||||
# Fetch all the exposed prometheus metrics from page server
|
||||
all_metrics = pageserver.http_client().get_metrics()
|
||||
# See comment in get_io_writes()
|
||||
matches = re.search(r'^pageserver_maxrss_kb (\S+)$', all_metrics, re.MULTILINE)
|
||||
matches = re.search(r'^pageserver_maxrss_kb (\S+)$', all_metrics,
|
||||
re.MULTILINE)
|
||||
return int(round(float(matches.group(1))))
|
||||
|
||||
def get_timeline_size(self, repo_dir: str, tenantid: str, timelineid: str):
|
||||
@@ -171,11 +173,7 @@ class ZenithBenchmarker:
|
||||
yield
|
||||
after = self.get_io_writes(pageserver)
|
||||
|
||||
self.results.record(self.request.node.name,
|
||||
metric_name,
|
||||
round((after - before) / (1024 * 1024)),
|
||||
'MB')
|
||||
|
||||
self.results.record(self.request.node.name, metric_name, round((after - before) / (1024 * 1024)), 'MB')
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]:
|
||||
@@ -189,7 +187,9 @@ def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]:
|
||||
|
||||
# Hook to print the results at the end
|
||||
@pytest.hookimpl(hookwrapper=True)
|
||||
def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, config: Config):
|
||||
def pytest_terminal_summary(
|
||||
terminalreporter: TerminalReporter, exitstatus: int, config: Config
|
||||
):
|
||||
yield
|
||||
|
||||
global zenbenchmark_results
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
import logging
|
||||
import logging.config
|
||||
"""
|
||||
This file configures logging to use in python tests.
|
||||
Logs are automatically captured and shown in their
|
||||
own section after all tests are executed.
|
||||
|
||||
To see logs for all (even successful) tests, run
|
||||
pytest with the following command:
|
||||
- `pipenv run pytest -n8 -rA`
|
||||
|
||||
Other log config can be set in pytest.ini file.
|
||||
You can add `log_cli = true` to it to watch
|
||||
logs in real time.
|
||||
|
||||
To get more info about logging with pytest, see
|
||||
https://docs.pytest.org/en/6.2.x/logging.html
|
||||
"""
|
||||
|
||||
# this config is only used for default log levels,
|
||||
# log format is specified in pytest.ini file
|
||||
LOGGING = {
|
||||
"version": 1,
|
||||
"loggers": {
|
||||
"root": {
|
||||
"level": "INFO"
|
||||
},
|
||||
"root.wal_acceptor_async": {
|
||||
"level": "INFO" # a lot of logs on DEBUG level
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def getLogger(name='root') -> logging.Logger:
|
||||
"""Method to get logger for tests.
|
||||
|
||||
Should be used to get correctly initialized logger. """
|
||||
return logging.getLogger(name)
|
||||
|
||||
|
||||
# default logger for tests
|
||||
log = getLogger()
|
||||
|
||||
logging.config.dictConfig(LOGGING)
|
||||
@@ -2,7 +2,6 @@ import os
|
||||
import subprocess
|
||||
|
||||
from typing import Any, List
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
def get_self_dir() -> str:
|
||||
@@ -40,7 +39,7 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
|
||||
|
||||
with open(stdout_filename, 'w') as stdout_f:
|
||||
with open(stderr_filename, 'w') as stderr_f:
|
||||
log.info('(capturing output to "{}.stdout")'.format(base))
|
||||
print('(capturing output to "{}.stdout")'.format(base))
|
||||
subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
|
||||
|
||||
return basepath
|
||||
@@ -59,13 +58,14 @@ def global_counter() -> int:
|
||||
_global_counter += 1
|
||||
return _global_counter
|
||||
|
||||
def debug_print(*args, **kwargs) -> None:
|
||||
""" Print to the console if TEST_DEBUG_PRINT is set in env.
|
||||
|
||||
All parameters are passed to print().
|
||||
"""
|
||||
if os.environ.get('TEST_DEBUG_PRINT') is not None:
|
||||
print(*args, **kwargs)
|
||||
|
||||
def lsn_to_hex(num: int) -> str:
|
||||
""" Convert lsn from int to standard hex notation. """
|
||||
return "{:X}/{:X}".format(num >> 32, num & 0xffffffff)
|
||||
|
||||
|
||||
def lsn_from_hex(lsn_hex: str) -> int:
|
||||
""" Convert lsn from hex notation to int. """
|
||||
l, r = lsn_hex.split('/')
|
||||
return (int(l, 16) << 32) + int(r, 16)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from dataclasses import dataclass
|
||||
from cached_property import cached_property
|
||||
from functools import cached_property
|
||||
import asyncpg
|
||||
import os
|
||||
import pathlib
|
||||
@@ -13,8 +13,9 @@ import signal
|
||||
import subprocess
|
||||
import time
|
||||
import filecmp
|
||||
import difflib
|
||||
|
||||
from contextlib import closing, suppress
|
||||
from contextlib import closing
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
|
||||
@@ -26,7 +27,6 @@ from typing_extensions import Literal
|
||||
import requests
|
||||
|
||||
from .utils import (get_self_dir, mkdir_if_needed, subprocess_capture)
|
||||
from fixtures.log_helper import log
|
||||
"""
|
||||
This file contains pytest fixtures. A fixture is a test resource that can be
|
||||
summoned by placing its name in the test's arguments.
|
||||
@@ -54,18 +54,17 @@ DEFAULT_POSTGRES_DIR = 'tmp_install'
|
||||
BASE_PORT = 15000
|
||||
WORKER_PORT_NUM = 100
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
"""
|
||||
Ensure that no unwanted daemons are running before we start testing.
|
||||
Check that we do not owerflow available ports range.
|
||||
"""
|
||||
numprocesses = config.getoption('numprocesses')
|
||||
if numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768: # do not use ephemeral ports
|
||||
raise Exception('Too many workers configured. Cannot distrubute ports for services.')
|
||||
if numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768: # do not use ephemeral ports
|
||||
raise Exception('Too many workers configured. Cannot distrubute ports for services.')
|
||||
|
||||
# does not use -c as it is not supported on macOS
|
||||
cmd = ['pgrep', 'pageserver|postgres|safekeeper']
|
||||
cmd = ['pgrep', 'pageserver|postgres|wal_acceptor']
|
||||
result = subprocess.run(cmd, stdout=subprocess.DEVNULL)
|
||||
if result.returncode == 0:
|
||||
# returncode of 0 means it found something.
|
||||
@@ -73,7 +72,7 @@ def pytest_configure(config):
|
||||
# result of the test.
|
||||
# NOTE this shows as an internal pytest error, there might be a better way
|
||||
raise Exception(
|
||||
'Found interfering processes running. Stop all Zenith pageservers, nodes, safekeepers, as well as stand-alone Postgres.'
|
||||
'Found interfering processes running. Stop all Zenith pageservers, nodes, WALs, as well as stand-alone Postgres.'
|
||||
)
|
||||
|
||||
|
||||
@@ -106,11 +105,7 @@ class PgProtocol:
|
||||
self.port = port
|
||||
self.username = username or "zenith_admin"
|
||||
|
||||
def connstr(self,
|
||||
*,
|
||||
dbname: str = 'postgres',
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None) -> str:
|
||||
def connstr(self, *, dbname: str = 'postgres', username: Optional[str] = None, password: Optional[str] = None) -> str:
|
||||
"""
|
||||
Build a libpq connection string for the Postgres instance.
|
||||
"""
|
||||
@@ -122,12 +117,7 @@ class PgProtocol:
|
||||
return f'{res} password={password}'
|
||||
|
||||
# autocommit=True here by default because that's what we need most of the time
|
||||
def connect(self,
|
||||
*,
|
||||
autocommit=True,
|
||||
dbname: str = 'postgres',
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None) -> PgConnection:
|
||||
def connect(self, *, autocommit=True, dbname: str = 'postgres', username: Optional[str] = None, password: Optional[str] = None) -> PgConnection:
|
||||
"""
|
||||
Connect to the node.
|
||||
Returns psycopg2's connection object.
|
||||
@@ -143,11 +133,7 @@ class PgProtocol:
|
||||
conn.autocommit = autocommit
|
||||
return conn
|
||||
|
||||
async def connect_async(self,
|
||||
*,
|
||||
dbname: str = 'postgres',
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None) -> asyncpg.Connection:
|
||||
async def connect_async(self, *, dbname: str = 'postgres', username: Optional[str] = None, password: Optional[str] = None) -> asyncpg.Connection:
|
||||
"""
|
||||
Connect to the node from async python.
|
||||
Returns asyncpg's connection object.
|
||||
@@ -202,22 +188,22 @@ class ZenithCli:
|
||||
|
||||
>>> result = zenith_cli.run(...)
|
||||
>>> assert result.stderr == ""
|
||||
>>> log.info(result.stdout)
|
||||
>>> print(result.stdout)
|
||||
"""
|
||||
|
||||
assert type(arguments) == list
|
||||
|
||||
args = [self.bin_zenith] + arguments
|
||||
log.info('Running command "{}"'.format(' '.join(args)))
|
||||
print('Running command "{}"'.format(' '.join(args)))
|
||||
|
||||
# Interceipt CalledProcessError and print more info
|
||||
try:
|
||||
res = subprocess.run(args,
|
||||
env=self.env,
|
||||
check=True,
|
||||
universal_newlines=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
env=self.env,
|
||||
check=True,
|
||||
universal_newlines=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
# this way command output will be in recorded and shown in CI in failure message
|
||||
msg = f"""\
|
||||
@@ -225,7 +211,7 @@ class ZenithCli:
|
||||
stdout: {exc.stdout}
|
||||
stderr: {exc.stderr}
|
||||
"""
|
||||
log.info(msg)
|
||||
print(msg)
|
||||
|
||||
raise Exception(msg) from exc
|
||||
|
||||
@@ -255,17 +241,21 @@ class ZenithPageserverHttpClient(requests.Session):
|
||||
return res.json()
|
||||
|
||||
def branch_create(self, tenant_id: uuid.UUID, name: str, start_point: str) -> Dict:
|
||||
res = self.post(f"http://localhost:{self.port}/v1/branch",
|
||||
json={
|
||||
'tenant_id': tenant_id.hex,
|
||||
'name': name,
|
||||
'start_point': start_point,
|
||||
})
|
||||
res = self.post(
|
||||
f"http://localhost:{self.port}/v1/branch",
|
||||
json={
|
||||
'tenant_id': tenant_id.hex,
|
||||
'name': name,
|
||||
'start_point': start_point,
|
||||
}
|
||||
)
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
def branch_detail(self, tenant_id: uuid.UUID, name: str) -> Dict:
|
||||
res = self.get(f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}/{name}", )
|
||||
res = self.get(
|
||||
f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}/{name}",
|
||||
)
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
@@ -307,11 +297,7 @@ class AuthKeys:
|
||||
return token
|
||||
|
||||
def generate_tenant_token(self, tenant_id):
|
||||
token = jwt.encode({
|
||||
"scope": "tenant", "tenant_id": tenant_id
|
||||
},
|
||||
self.priv,
|
||||
algorithm="RS256")
|
||||
token = jwt.encode({"scope": "tenant", "tenant_id": tenant_id}, self.priv, algorithm="RS256")
|
||||
|
||||
if isinstance(token, bytes):
|
||||
token = token.decode()
|
||||
@@ -336,7 +322,6 @@ def worker_base_port(worker_seq_no: int):
|
||||
# so workers have disjoint set of ports for services
|
||||
return BASE_PORT + worker_seq_no * WORKER_PORT_NUM
|
||||
|
||||
|
||||
class PortDistributor:
|
||||
def __init__(self, base_port: int, port_number: int) -> None:
|
||||
self.iterator = iter(range(base_port, base_port + port_number))
|
||||
@@ -345,15 +330,13 @@ class PortDistributor:
|
||||
try:
|
||||
return next(self.iterator)
|
||||
except StopIteration:
|
||||
raise RuntimeError(
|
||||
'port range configured for test is exhausted, consider enlarging the range')
|
||||
raise RuntimeError('port range configured for test is exhausted, consider enlarging the range')
|
||||
|
||||
|
||||
@zenfixture
|
||||
def port_distributor(worker_base_port):
|
||||
return PortDistributor(base_port=worker_base_port, port_number=WORKER_PORT_NUM)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PageserverPort:
|
||||
pg: int
|
||||
@@ -368,18 +351,14 @@ class ZenithPageserver(PgProtocol):
|
||||
self.running = False
|
||||
self.initial_tenant = None
|
||||
self.repo_dir = repo_dir
|
||||
self.service_port = port # do not shadow PgProtocol.port which is just int
|
||||
self.service_port = port # do not shadow PgProtocol.port which is just int
|
||||
|
||||
def init(self, enable_auth: bool = False) -> 'ZenithPageserver':
|
||||
"""
|
||||
Initialize the repository, i.e. run "zenith init".
|
||||
Returns self.
|
||||
"""
|
||||
cmd = [
|
||||
'init',
|
||||
f'--pageserver-pg-port={self.service_port.pg}',
|
||||
f'--pageserver-http-port={self.service_port.http}'
|
||||
]
|
||||
cmd = ['init', f'--pageserver-pg-port={self.service_port.pg}', f'--pageserver-http-port={self.service_port.http}']
|
||||
if enable_auth:
|
||||
cmd.append('--enable-auth')
|
||||
self.zenith_cli.run(cmd)
|
||||
@@ -396,7 +375,6 @@ class ZenithPageserver(PgProtocol):
|
||||
Start the page server.
|
||||
Returns self.
|
||||
"""
|
||||
assert self.running == False
|
||||
|
||||
self.zenith_cli.run(['start'])
|
||||
self.running = True
|
||||
@@ -404,18 +382,14 @@ class ZenithPageserver(PgProtocol):
|
||||
self.initial_tenant = self.zenith_cli.run(['tenant', 'list']).stdout.strip()
|
||||
return self
|
||||
|
||||
def stop(self, immediate=False) -> 'ZenithPageserver':
|
||||
def stop(self) -> 'ZenithPageserver':
|
||||
"""
|
||||
Stop the page server.
|
||||
Returns self.
|
||||
"""
|
||||
cmd = ['stop']
|
||||
if immediate:
|
||||
cmd.append('immediate')
|
||||
|
||||
log.info(f"Stopping pageserver with {cmd}")
|
||||
if self.running:
|
||||
self.zenith_cli.run(cmd)
|
||||
self.zenith_cli.run(['stop'])
|
||||
self.running = False
|
||||
|
||||
return self
|
||||
@@ -424,7 +398,7 @@ class ZenithPageserver(PgProtocol):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
self.stop(True)
|
||||
self.stop()
|
||||
|
||||
@cached_property
|
||||
def auth_keys(self) -> AuthKeys:
|
||||
@@ -439,17 +413,18 @@ class ZenithPageserver(PgProtocol):
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
@zenfixture
|
||||
def pageserver_port(port_distributor: PortDistributor) -> PageserverPort:
|
||||
pg = port_distributor.get_port()
|
||||
http = port_distributor.get_port()
|
||||
log.info(f"pageserver_port: pg={pg} http={http}")
|
||||
print(f"pageserver_port: pg={pg} http={http}")
|
||||
return PageserverPort(pg=pg, http=http)
|
||||
|
||||
|
||||
@zenfixture
|
||||
def pageserver(zenith_cli: ZenithCli, repo_dir: str,
|
||||
pageserver_port: PageserverPort) -> Iterator[ZenithPageserver]:
|
||||
def pageserver(zenith_cli: ZenithCli, repo_dir: str, pageserver_port: PageserverPort) -> Iterator[ZenithPageserver]:
|
||||
"""
|
||||
The 'pageserver' fixture provides a Page Server that's up and running.
|
||||
|
||||
@@ -461,17 +436,15 @@ def pageserver(zenith_cli: ZenithCli, repo_dir: str,
|
||||
By convention, the test branches are named after the tests. For example,
|
||||
test called 'test_foo' would create and use branches with the 'test_foo' prefix.
|
||||
"""
|
||||
ps = ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir,
|
||||
port=pageserver_port).init().start()
|
||||
ps = ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir, port=pageserver_port).init().start()
|
||||
# For convenience in tests, create a branch from the freshly-initialized cluster.
|
||||
zenith_cli.run(["branch", "empty", "main"])
|
||||
|
||||
yield ps
|
||||
|
||||
# After the yield comes any cleanup code we need.
|
||||
log.info('Starting pageserver cleanup')
|
||||
ps.stop(True)
|
||||
|
||||
print('Starting pageserver cleanup')
|
||||
ps.stop()
|
||||
|
||||
class PgBin:
|
||||
""" A helper class for executing postgres binaries """
|
||||
@@ -508,7 +481,7 @@ class PgBin:
|
||||
"""
|
||||
|
||||
self._fixpath(command)
|
||||
log.info('Running command "{}"'.format(' '.join(command)))
|
||||
print('Running command "{}"'.format(' '.join(command)))
|
||||
env = self._build_env(env)
|
||||
subprocess.run(command, env=env, cwd=cwd, check=True)
|
||||
|
||||
@@ -525,7 +498,7 @@ class PgBin:
|
||||
"""
|
||||
|
||||
self._fixpath(command)
|
||||
log.info('Running command "{}"'.format(' '.join(command)))
|
||||
print('Running command "{}"'.format(' '.join(command)))
|
||||
env = self._build_env(env)
|
||||
return subprocess_capture(self.log_dir, command, env=env, cwd=cwd, check=True, **kwargs)
|
||||
|
||||
@@ -534,11 +507,9 @@ class PgBin:
|
||||
def pg_bin(test_output_dir: str, pg_distrib_dir: str) -> PgBin:
|
||||
return PgBin(test_output_dir, pg_distrib_dir)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pageserver_auth_enabled(zenith_cli: ZenithCli, repo_dir: str, pageserver_port: PageserverPort):
|
||||
with ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir,
|
||||
port=pageserver_port).init(enable_auth=True).start() as ps:
|
||||
with ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir, port=pageserver_port).init(enable_auth=True).start() as ps:
|
||||
# For convenience in tests, create a branch from the freshly-initialized cluster.
|
||||
zenith_cli.run(["branch", "empty", "main"])
|
||||
yield ps
|
||||
@@ -546,27 +517,21 @@ def pageserver_auth_enabled(zenith_cli: ZenithCli, repo_dir: str, pageserver_por
|
||||
|
||||
class Postgres(PgProtocol):
|
||||
""" An object representing a running postgres daemon. """
|
||||
def __init__(self,
|
||||
zenith_cli: ZenithCli,
|
||||
repo_dir: str,
|
||||
pg_bin: PgBin,
|
||||
tenant_id: str,
|
||||
port: int):
|
||||
def __init__(self, zenith_cli: ZenithCli, repo_dir: str, pg_bin: PgBin, tenant_id: str, port: int):
|
||||
super().__init__(host='localhost', port=port)
|
||||
|
||||
self.zenith_cli = zenith_cli
|
||||
self.running = False
|
||||
self.repo_dir = repo_dir
|
||||
self.node_name: Optional[str] = None # dubious, see asserts below
|
||||
self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA
|
||||
self.branch: Optional[str] = None # dubious, see asserts below
|
||||
self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA
|
||||
self.tenant_id = tenant_id
|
||||
self.pg_bin = pg_bin
|
||||
# path to conf is <repo_dir>/pgdatadirs/tenants/<tenant_id>/<node_name>/postgresql.conf
|
||||
# path to conf is <repo_dir>/pgdatadirs/tenants/<tenant_id>/<branch_name>/postgresql.conf
|
||||
|
||||
def create(
|
||||
self,
|
||||
node_name: str,
|
||||
branch: Optional[str] = None,
|
||||
branch: str,
|
||||
wal_acceptors: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None,
|
||||
) -> 'Postgres':
|
||||
@@ -580,19 +545,9 @@ class Postgres(PgProtocol):
|
||||
if not config_lines:
|
||||
config_lines = []
|
||||
|
||||
if branch is None:
|
||||
branch = node_name
|
||||
|
||||
self.zenith_cli.run([
|
||||
'pg',
|
||||
'create',
|
||||
f'--tenantid={self.tenant_id}',
|
||||
f'--port={self.port}',
|
||||
node_name,
|
||||
branch
|
||||
])
|
||||
self.node_name = node_name
|
||||
path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.node_name
|
||||
self.zenith_cli.run(['pg', 'create', branch, f'--tenantid={self.tenant_id}', f'--port={self.port}'])
|
||||
self.branch = branch
|
||||
path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.branch
|
||||
self.pgdata_dir = os.path.join(self.repo_dir, path)
|
||||
|
||||
if wal_acceptors is not None:
|
||||
@@ -609,21 +564,20 @@ class Postgres(PgProtocol):
|
||||
Returns self.
|
||||
"""
|
||||
|
||||
assert self.node_name is not None
|
||||
assert self.branch is not None
|
||||
|
||||
log.info(f"Starting postgres node {self.node_name}")
|
||||
print(f"Starting postgres on branch {self.branch}")
|
||||
|
||||
run_result = self.zenith_cli.run(
|
||||
['pg', 'start', f'--tenantid={self.tenant_id}', f'--port={self.port}', self.node_name])
|
||||
run_result = self.zenith_cli.run(['pg', 'start', self.branch, f'--tenantid={self.tenant_id}', f'--port={self.port}'])
|
||||
self.running = True
|
||||
|
||||
log.info(f"stdout: {run_result.stdout}")
|
||||
print(f"stdout: {run_result.stdout}")
|
||||
|
||||
return self
|
||||
|
||||
def pg_data_dir_path(self) -> str:
|
||||
""" Path to data directory """
|
||||
path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.node_name
|
||||
path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.branch
|
||||
return os.path.join(self.repo_dir, path)
|
||||
|
||||
def pg_xact_dir_path(self) -> str:
|
||||
@@ -680,8 +634,8 @@ class Postgres(PgProtocol):
|
||||
"""
|
||||
|
||||
if self.running:
|
||||
assert self.node_name is not None
|
||||
self.zenith_cli.run(['pg', 'stop', self.node_name, f'--tenantid={self.tenant_id}'])
|
||||
assert self.branch is not None
|
||||
self.zenith_cli.run(['pg', 'stop', self.branch, f'--tenantid={self.tenant_id}'])
|
||||
self.running = False
|
||||
|
||||
return self
|
||||
@@ -692,17 +646,15 @@ class Postgres(PgProtocol):
|
||||
Returns self.
|
||||
"""
|
||||
|
||||
assert self.node_name is not None
|
||||
assert self.branch is not None
|
||||
assert self.tenant_id is not None
|
||||
self.zenith_cli.run(
|
||||
['pg', 'stop', '--destroy', self.node_name, f'--tenantid={self.tenant_id}'])
|
||||
self.zenith_cli.run(['pg', 'stop', '--destroy', self.branch, f'--tenantid={self.tenant_id}'])
|
||||
|
||||
return self
|
||||
|
||||
def create_start(
|
||||
self,
|
||||
node_name: str,
|
||||
branch: Optional[str] = None,
|
||||
branch: str,
|
||||
wal_acceptors: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None,
|
||||
) -> 'Postgres':
|
||||
@@ -713,7 +665,6 @@ class Postgres(PgProtocol):
|
||||
"""
|
||||
|
||||
self.create(
|
||||
node_name=node_name,
|
||||
branch=branch,
|
||||
wal_acceptors=wal_acceptors,
|
||||
config_lines=config_lines,
|
||||
@@ -727,15 +678,9 @@ class Postgres(PgProtocol):
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
self.stop()
|
||||
|
||||
|
||||
class PostgresFactory:
|
||||
""" An object representing multiple running postgres daemons. """
|
||||
def __init__(self,
|
||||
zenith_cli: ZenithCli,
|
||||
repo_dir: str,
|
||||
pg_bin: PgBin,
|
||||
initial_tenant: str,
|
||||
port_distributor: PortDistributor):
|
||||
def __init__(self, zenith_cli: ZenithCli, repo_dir: str, pg_bin: PgBin, initial_tenant: str, port_distributor: PortDistributor):
|
||||
self.zenith_cli = zenith_cli
|
||||
self.repo_dir = repo_dir
|
||||
self.num_instances = 0
|
||||
@@ -744,13 +689,13 @@ class PostgresFactory:
|
||||
self.port_distributor = port_distributor
|
||||
self.pg_bin = pg_bin
|
||||
|
||||
def create_start(self,
|
||||
node_name: str = "main",
|
||||
branch: Optional[str] = None,
|
||||
tenant_id: Optional[str] = None,
|
||||
wal_acceptors: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None) -> Postgres:
|
||||
|
||||
def create_start(
|
||||
self,
|
||||
branch: str = "main",
|
||||
tenant_id: Optional[str] = None,
|
||||
wal_acceptors: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None
|
||||
) -> Postgres:
|
||||
pg = Postgres(
|
||||
zenith_cli=self.zenith_cli,
|
||||
repo_dir=self.repo_dir,
|
||||
@@ -762,18 +707,18 @@ class PostgresFactory:
|
||||
self.instances.append(pg)
|
||||
|
||||
return pg.create_start(
|
||||
node_name=node_name,
|
||||
branch=branch,
|
||||
wal_acceptors=wal_acceptors,
|
||||
config_lines=config_lines,
|
||||
)
|
||||
|
||||
def create(self,
|
||||
node_name: str = "main",
|
||||
branch: Optional[str] = None,
|
||||
tenant_id: Optional[str] = None,
|
||||
wal_acceptors: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None) -> Postgres:
|
||||
def create(
|
||||
self,
|
||||
branch: str = "main",
|
||||
tenant_id: Optional[str] = None,
|
||||
wal_acceptors: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None
|
||||
) -> Postgres:
|
||||
|
||||
pg = Postgres(
|
||||
zenith_cli=self.zenith_cli,
|
||||
@@ -787,17 +732,18 @@ class PostgresFactory:
|
||||
self.instances.append(pg)
|
||||
|
||||
return pg.create(
|
||||
node_name=node_name,
|
||||
branch=branch,
|
||||
wal_acceptors=wal_acceptors,
|
||||
config_lines=config_lines,
|
||||
)
|
||||
|
||||
def config(self,
|
||||
node_name: str = "main",
|
||||
tenant_id: Optional[str] = None,
|
||||
wal_acceptors: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None) -> Postgres:
|
||||
def config(
|
||||
self,
|
||||
branch: str = "main",
|
||||
tenant_id: Optional[str] = None,
|
||||
wal_acceptors: Optional[str] = None,
|
||||
config_lines: Optional[List[str]] = None
|
||||
) -> Postgres:
|
||||
|
||||
pg = Postgres(
|
||||
zenith_cli=self.zenith_cli,
|
||||
@@ -811,7 +757,7 @@ class PostgresFactory:
|
||||
self.instances.append(pg)
|
||||
|
||||
return pg.config(
|
||||
node_name=node_name,
|
||||
branch=branch,
|
||||
wal_acceptors=wal_acceptors,
|
||||
config_lines=config_lines,
|
||||
)
|
||||
@@ -822,18 +768,13 @@ class PostgresFactory:
|
||||
|
||||
return self
|
||||
|
||||
|
||||
@zenfixture
|
||||
def initial_tenant(pageserver: ZenithPageserver):
|
||||
return pageserver.initial_tenant
|
||||
|
||||
|
||||
@zenfixture
|
||||
def postgres(zenith_cli: ZenithCli,
|
||||
initial_tenant: str,
|
||||
repo_dir: str,
|
||||
pg_bin: PgBin,
|
||||
port_distributor: PortDistributor) -> Iterator[PostgresFactory]:
|
||||
def postgres(zenith_cli: ZenithCli, initial_tenant: str, repo_dir: str, pg_bin: PgBin, port_distributor: PortDistributor) -> Iterator[PostgresFactory]:
|
||||
pgfactory = PostgresFactory(
|
||||
zenith_cli=zenith_cli,
|
||||
repo_dir=repo_dir,
|
||||
@@ -845,10 +786,9 @@ def postgres(zenith_cli: ZenithCli,
|
||||
yield pgfactory
|
||||
|
||||
# After the yield comes any cleanup code we need.
|
||||
log.info('Starting postgres cleanup')
|
||||
print('Starting postgres cleanup')
|
||||
pgfactory.stop_all()
|
||||
|
||||
|
||||
def read_pid(path: Path):
|
||||
""" Read content of file into number """
|
||||
return int(path.read_text())
|
||||
@@ -859,22 +799,20 @@ class WalAcceptorPort:
|
||||
pg: int
|
||||
http: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class WalAcceptor:
|
||||
""" An object representing a running wal acceptor daemon. """
|
||||
wa_bin_path: Path
|
||||
data_dir: Path
|
||||
port: WalAcceptorPort
|
||||
num: int # identifier for logging
|
||||
num: int # identifier for logging
|
||||
pageserver_port: int
|
||||
auth_token: Optional[str] = None
|
||||
|
||||
def start(self) -> 'WalAcceptor':
|
||||
# create data directory if not exists
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
with suppress(FileNotFoundError):
|
||||
self.pidfile.unlink()
|
||||
self.pidfile.unlink(missing_ok=True)
|
||||
|
||||
cmd = [str(self.wa_bin_path)]
|
||||
cmd.extend(["-D", str(self.data_dir)])
|
||||
@@ -885,29 +823,22 @@ class WalAcceptor:
|
||||
# Tell page server it can receive WAL from this WAL safekeeper
|
||||
cmd.extend(["--pageserver", f"localhost:{self.pageserver_port}"])
|
||||
cmd.extend(["--recall", "1 second"])
|
||||
log.info('Running command "{}"'.format(' '.join(cmd)))
|
||||
print('Running command "{}"'.format(' '.join(cmd)))
|
||||
env = {'PAGESERVER_AUTH_TOKEN': self.auth_token} if self.auth_token else None
|
||||
subprocess.run(cmd, check=True, env=env)
|
||||
|
||||
# wait for wal acceptor start by checking its status
|
||||
started_at = time.time()
|
||||
while True:
|
||||
try:
|
||||
http_cli = self.http_client()
|
||||
http_cli.check_status()
|
||||
except Exception as e:
|
||||
elapsed = time.time() - started_at
|
||||
if elapsed > 3:
|
||||
raise RuntimeError(
|
||||
f"timed out waiting {elapsed:.0f}s for wal acceptor start: {e}")
|
||||
time.sleep(0.5)
|
||||
else:
|
||||
break # success
|
||||
return self
|
||||
# wait for wal acceptor start by checkking that pid is readable
|
||||
for _ in range(3):
|
||||
pid = self.get_pid()
|
||||
if pid is not None:
|
||||
return self
|
||||
time.sleep(0.5)
|
||||
|
||||
raise RuntimeError("cannot get wal acceptor pid")
|
||||
|
||||
@property
|
||||
def pidfile(self) -> Path:
|
||||
return self.data_dir / "safekeeper.pid"
|
||||
return self.data_dir / "wal_acceptor.pid"
|
||||
|
||||
def get_pid(self) -> Optional[int]:
|
||||
if not self.pidfile.exists():
|
||||
@@ -921,21 +852,20 @@ class WalAcceptor:
|
||||
return pid
|
||||
|
||||
def stop(self) -> 'WalAcceptor':
|
||||
log.info('Stopping wal acceptor {}'.format(self.num))
|
||||
print('Stopping wal acceptor {}'.format(self.num))
|
||||
pid = self.get_pid()
|
||||
if pid is None:
|
||||
log.info("Wal acceptor {} is not running".format(self.num))
|
||||
print("Wal acceptor {} is not running".format(self.num))
|
||||
return self
|
||||
|
||||
try:
|
||||
os.kill(pid, signal.SIGTERM)
|
||||
except Exception:
|
||||
# TODO: cleanup pid file on exit in wal acceptor
|
||||
pass # pidfile might be obsolete
|
||||
pass # pidfile might be obsolete
|
||||
return self
|
||||
|
||||
def append_logical_message(self, tenant_id: str, timeline_id: str,
|
||||
request: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def append_logical_message(self, tenant_id: str, timeline_id: str, request: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Send JSON_CTRL query to append LogicalMessage to WAL and modify
|
||||
safekeeper state. It will construct LogicalMessage from provided
|
||||
@@ -951,24 +881,16 @@ class WalAcceptor:
|
||||
conn.autocommit = True
|
||||
with conn.cursor() as cur:
|
||||
request_json = json.dumps(request)
|
||||
log.info(f"JSON_CTRL request on port {self.port.pg}: {request_json}")
|
||||
print(f"JSON_CTRL request on port {self.port.pg}: {request_json}")
|
||||
cur.execute("JSON_CTRL " + request_json)
|
||||
all = cur.fetchall()
|
||||
log.info(f"JSON_CTRL response: {all[0][0]}")
|
||||
print(f"JSON_CTRL response: {all[0][0]}")
|
||||
return json.loads(all[0][0])
|
||||
|
||||
def http_client(self):
|
||||
return WalAcceptorHttpClient(port=self.port.http)
|
||||
|
||||
|
||||
class WalAcceptorFactory:
|
||||
""" An object representing multiple running wal acceptors. """
|
||||
def __init__(self,
|
||||
zenith_binpath: Path,
|
||||
data_dir: Path,
|
||||
pageserver_port: int,
|
||||
port_distributor: PortDistributor):
|
||||
self.wa_bin_path = zenith_binpath / 'safekeeper'
|
||||
def __init__(self, zenith_binpath: Path, data_dir: Path, pageserver_port: int, port_distributor: PortDistributor):
|
||||
self.wa_bin_path = zenith_binpath / 'wal_acceptor'
|
||||
self.data_dir = data_dir
|
||||
self.instances: List[WalAcceptor] = []
|
||||
self.port_distributor = port_distributor
|
||||
@@ -1013,10 +935,7 @@ class WalAcceptorFactory:
|
||||
|
||||
|
||||
@zenfixture
|
||||
def wa_factory(zenith_binpath: str,
|
||||
repo_dir: str,
|
||||
pageserver_port: PageserverPort,
|
||||
port_distributor: PortDistributor) -> Iterator[WalAcceptorFactory]:
|
||||
def wa_factory(zenith_binpath: str, repo_dir: str, pageserver_port: PageserverPort, port_distributor: PortDistributor) -> Iterator[WalAcceptorFactory]:
|
||||
""" Gives WalAcceptorFactory providing wal acceptors. """
|
||||
wafactory = WalAcceptorFactory(
|
||||
zenith_binpath=Path(zenith_binpath),
|
||||
@@ -1026,38 +945,16 @@ def wa_factory(zenith_binpath: str,
|
||||
)
|
||||
yield wafactory
|
||||
# After the yield comes any cleanup code we need.
|
||||
log.info('Starting wal acceptors cleanup')
|
||||
print('Starting wal acceptors cleanup')
|
||||
wafactory.stop_all()
|
||||
|
||||
|
||||
@dataclass
|
||||
class SafekeeperTimelineStatus:
|
||||
acceptor_epoch: int
|
||||
flush_lsn: str
|
||||
|
||||
|
||||
class WalAcceptorHttpClient(requests.Session):
|
||||
def __init__(self, port: int) -> None:
|
||||
super().__init__()
|
||||
self.port = port
|
||||
|
||||
def check_status(self):
|
||||
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
|
||||
|
||||
def timeline_status(self, tenant_id: str, timeline_id: str) -> SafekeeperTimelineStatus:
|
||||
res = self.get(f"http://localhost:{self.port}/v1/timeline/{tenant_id}/{timeline_id}")
|
||||
res.raise_for_status()
|
||||
resj = res.json()
|
||||
return SafekeeperTimelineStatus(acceptor_epoch=resj['acceptor_state']['epoch'],
|
||||
flush_lsn=resj['flush_lsn'])
|
||||
|
||||
|
||||
@zenfixture
|
||||
def base_dir() -> str:
|
||||
""" find the base directory (currently this is the git root) """
|
||||
|
||||
base_dir = os.path.normpath(os.path.join(get_self_dir(), '../..'))
|
||||
log.info(f'base_dir is {base_dir}')
|
||||
print('\nbase_dir is', base_dir)
|
||||
return base_dir
|
||||
|
||||
|
||||
@@ -1086,7 +983,7 @@ def test_output_dir(request: Any, top_output_dir: str) -> str:
|
||||
test_name = 'shared'
|
||||
|
||||
test_output_dir = os.path.join(top_output_dir, test_name)
|
||||
log.info(f'test_output_dir is {test_output_dir}')
|
||||
print('test_output_dir is', test_output_dir)
|
||||
shutil.rmtree(test_output_dir, ignore_errors=True)
|
||||
mkdir_if_needed(test_output_dir)
|
||||
return test_output_dir
|
||||
@@ -1128,7 +1025,7 @@ def pg_distrib_dir(base_dir: str) -> str:
|
||||
pg_dir = env_postgres_bin
|
||||
else:
|
||||
pg_dir = os.path.normpath(os.path.join(base_dir, DEFAULT_POSTGRES_DIR))
|
||||
log.info(f'postgres dir is {pg_dir}')
|
||||
print('postgres dir is', pg_dir)
|
||||
if not os.path.exists(os.path.join(pg_dir, 'bin/postgres')):
|
||||
raise Exception('postgres not found at "{}"'.format(pg_dir))
|
||||
return pg_dir
|
||||
@@ -1150,7 +1047,6 @@ class TenantFactory:
|
||||
def tenant_factory(zenith_cli: ZenithCli):
|
||||
return TenantFactory(zenith_cli)
|
||||
|
||||
|
||||
#
|
||||
# Test helpers
|
||||
#
|
||||
@@ -1161,29 +1057,18 @@ def list_files_to_compare(pgdata_dir: str):
|
||||
rel_dir = os.path.relpath(root, pgdata_dir)
|
||||
# Skip some dirs and files we don't want to compare
|
||||
skip_dirs = ['pg_wal', 'pg_stat', 'pg_stat_tmp', 'pg_subtrans', 'pg_logical']
|
||||
skip_files = [
|
||||
'pg_internal.init',
|
||||
'pg.log',
|
||||
'zenith.signal',
|
||||
'postgresql.conf',
|
||||
'postmaster.opts',
|
||||
'postmaster.pid',
|
||||
'pg_control'
|
||||
]
|
||||
skip_files = ['pg_internal.init', 'pg.log', 'zenith.signal', 'postgresql.conf',
|
||||
'postmaster.opts', 'postmaster.pid', 'pg_control']
|
||||
if rel_dir not in skip_dirs and filename not in skip_files:
|
||||
rel_file = os.path.join(rel_dir, filename)
|
||||
pgdata_files.append(rel_file)
|
||||
|
||||
pgdata_files.sort()
|
||||
log.info(pgdata_files)
|
||||
print(pgdata_files)
|
||||
return pgdata_files
|
||||
|
||||
|
||||
# pg is the existing and running compute node, that we want to compare with a basebackup
|
||||
def check_restored_datadir_content(zenith_cli: ZenithCli,
|
||||
test_output_dir: str,
|
||||
pg: Postgres,
|
||||
pageserver_pg_port: int):
|
||||
def check_restored_datadir_content(zenith_cli: ZenithCli, test_output_dir: str, pg: Postgres, pageserver_pg_port: int):
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the 'basebackup' command
|
||||
with closing(pg.connect()) as conn:
|
||||
@@ -1195,7 +1080,7 @@ def check_restored_datadir_content(zenith_cli: ZenithCli,
|
||||
pg.stop()
|
||||
|
||||
# Take a basebackup from pageserver
|
||||
restored_dir_path = os.path.join(test_output_dir, f"{pg.node_name}_restored_datadir")
|
||||
restored_dir_path = os.path.join(test_output_dir, f"{pg.branch}_restored_datadir")
|
||||
mkdir_if_needed(restored_dir_path)
|
||||
|
||||
psql_path = os.path.join(pg.pg_bin.pg_bin_path, 'psql')
|
||||
@@ -1225,7 +1110,9 @@ def check_restored_datadir_content(zenith_cli: ZenithCli,
|
||||
restored_dir_path,
|
||||
pgdata_files,
|
||||
shallow=False)
|
||||
log.info(f'filecmp result mismatch and error lists:\n\t mismatch={mismatch}\n\t error={error}')
|
||||
print('filecmp result mismatch and error lists:')
|
||||
print(mismatch)
|
||||
print(error)
|
||||
|
||||
for f in mismatch:
|
||||
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
while true; do
|
||||
echo -n "==== CURRENT TIME:" >> /tmp/test_output/netstat.stdout
|
||||
date +"%T.%N" >> /tmp/test_output/netstat.stdout
|
||||
sudo netstat -vpnoa | grep tcp | sort >> /tmp/test_output/netstat.stdout
|
||||
sleep 0.5
|
||||
done
|
||||
@@ -1,11 +1,9 @@
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
|
||||
|
||||
#
|
||||
# Run bulk INSERT test.
|
||||
#
|
||||
@@ -16,21 +14,16 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
# 3. Disk space used
|
||||
# 4. Peak memory usage
|
||||
#
|
||||
def test_bulk_insert(postgres: PostgresFactory,
|
||||
pageserver: ZenithPageserver,
|
||||
pg_bin,
|
||||
zenith_cli,
|
||||
zenbenchmark,
|
||||
repo_dir: str):
|
||||
def test_bulk_insert(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str):
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_bulk_insert", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_bulk_insert')
|
||||
log.info("postgres is running on 'test_bulk_insert' branch")
|
||||
print("postgres is running on 'test_bulk_insert' branch")
|
||||
|
||||
# Open a connection directly to the page server that we'll use to force
|
||||
# flushing the layers to disk
|
||||
psconn = pageserver.connect()
|
||||
psconn = pageserver.connect();
|
||||
pscur = psconn.cursor()
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the 'do_gc' command
|
||||
@@ -54,7 +47,5 @@ def test_bulk_insert(postgres: PostgresFactory,
|
||||
zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(pageserver) / 1024, 'MB')
|
||||
|
||||
# Report disk space used by the repository
|
||||
timeline_size = zenbenchmark.get_timeline_size(repo_dir,
|
||||
pageserver.initial_tenant,
|
||||
timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
|
||||
timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024*1024), 'MB')
|
||||
|
||||
@@ -37,9 +37,7 @@ def test_bulk_tenant_create(
|
||||
|
||||
tenant = tenant_factory.create()
|
||||
zenith_cli.run([
|
||||
"branch",
|
||||
f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
|
||||
"main",
|
||||
"branch", f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}", "main",
|
||||
f"--tenantid={tenant}"
|
||||
])
|
||||
|
||||
@@ -48,7 +46,6 @@ def test_bulk_tenant_create(
|
||||
|
||||
pg_tenant = postgres.create_start(
|
||||
f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}",
|
||||
None, # branch name, None means same as node name
|
||||
tenant,
|
||||
wal_acceptors=wa_factory.get_connstrs() if use_wal_acceptors == 'with_wa' else None,
|
||||
)
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
|
||||
|
||||
#
|
||||
# Test buffering GisT build. It WAL-logs the whole relation, in 32-page chunks.
|
||||
# As of this writing, we're duplicate those giant WAL records for each page,
|
||||
# which makes the delta layer about 32x larger than it needs to be.
|
||||
#
|
||||
def test_gist_buffering_build(postgres: PostgresFactory,
|
||||
pageserver: ZenithPageserver,
|
||||
pg_bin,
|
||||
zenith_cli,
|
||||
zenbenchmark,
|
||||
repo_dir: str):
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_gist_buffering_build", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_gist_buffering_build')
|
||||
log.info("postgres is running on 'test_gist_buffering_build' branch")
|
||||
|
||||
# Open a connection directly to the page server that we'll use to force
|
||||
# flushing the layers to disk
|
||||
psconn = pageserver.connect()
|
||||
pscur = psconn.cursor()
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the 'do_gc' command
|
||||
with closing(pg.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SHOW zenith.zenith_timeline")
|
||||
timeline = cur.fetchone()[0]
|
||||
|
||||
# Create test table.
|
||||
cur.execute("create table gist_point_tbl(id int4, p point)")
|
||||
cur.execute(
|
||||
"insert into gist_point_tbl select g, point(g, g) from generate_series(1, 1000000) g;"
|
||||
)
|
||||
|
||||
# Build the index.
|
||||
with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'):
|
||||
with zenbenchmark.record_duration('build'):
|
||||
cur.execute(
|
||||
"create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)"
|
||||
)
|
||||
|
||||
# Flush the layers from memory to disk. This is included in the reported
|
||||
# time and I/O
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 1000000")
|
||||
|
||||
# Record peak memory usage
|
||||
zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(pageserver) / 1024, 'MB')
|
||||
|
||||
# Report disk space used by the repository
|
||||
timeline_size = zenbenchmark.get_timeline_size(repo_dir,
|
||||
pageserver.initial_tenant,
|
||||
timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
|
||||
@@ -1,11 +1,9 @@
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
|
||||
|
||||
#
|
||||
# Run a very short pgbench test.
|
||||
#
|
||||
@@ -15,21 +13,16 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
# 2. Time to run 5000 pgbench transactions
|
||||
# 3. Disk space used
|
||||
#
|
||||
def test_pgbench(postgres: PostgresFactory,
|
||||
pageserver: ZenithPageserver,
|
||||
pg_bin,
|
||||
zenith_cli,
|
||||
zenbenchmark,
|
||||
repo_dir: str):
|
||||
def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str):
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_pgbench_perf", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_pgbench_perf')
|
||||
log.info("postgres is running on 'test_pgbench_perf' branch")
|
||||
print("postgres is running on 'test_pgbench_perf' branch")
|
||||
|
||||
# Open a connection directly to the page server that we'll use to force
|
||||
# flushing the layers to disk
|
||||
psconn = pageserver.connect()
|
||||
psconn = pageserver.connect();
|
||||
pscur = psconn.cursor()
|
||||
|
||||
# Get the timeline ID of our branch. We need it for the 'do_gc' command
|
||||
@@ -59,4 +52,4 @@ def test_pgbench(postgres: PostgresFactory,
|
||||
|
||||
# Report disk space used by the repository
|
||||
timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
|
||||
zenbenchmark.record('size', timeline_size / (1024*1024), 'MB')
|
||||
|
||||
@@ -13,26 +13,19 @@
|
||||
import os
|
||||
from contextlib import closing
|
||||
from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture")
|
||||
|
||||
|
||||
def test_write_amplification(postgres: PostgresFactory,
|
||||
pageserver: ZenithPageserver,
|
||||
pg_bin,
|
||||
zenith_cli,
|
||||
zenbenchmark,
|
||||
repo_dir: str):
|
||||
def test_write_amplification(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str):
|
||||
# Create a branch for us
|
||||
zenith_cli.run(["branch", "test_write_amplification", "empty"])
|
||||
|
||||
pg = postgres.create_start('test_write_amplification')
|
||||
log.info("postgres is running on 'test_write_amplification' branch")
|
||||
print("postgres is running on 'test_write_amplification' branch")
|
||||
|
||||
# Open a connection directly to the page server that we'll use to force
|
||||
# flushing the layers to disk
|
||||
psconn = pageserver.connect()
|
||||
psconn = pageserver.connect();
|
||||
pscur = psconn.cursor()
|
||||
|
||||
with closing(pg.connect()) as conn:
|
||||
@@ -77,7 +70,5 @@ def test_write_amplification(postgres: PostgresFactory,
|
||||
pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0")
|
||||
|
||||
# Report disk space used by the repository
|
||||
timeline_size = zenbenchmark.get_timeline_size(repo_dir,
|
||||
pageserver.initial_tenant,
|
||||
timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB')
|
||||
timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline)
|
||||
zenbenchmark.record('size', timeline_size / (1024*1024), 'MB')
|
||||
|
||||
@@ -1,4 +1,2 @@
|
||||
[pytest]
|
||||
minversion = 6.0
|
||||
log_format = %(asctime)s.%(msecs)-3d %(levelname)s [%(filename)s:%(lineno)d] %(message)s
|
||||
log_date_format = %Y-%m-%d %H:%M:%S
|
||||
|
||||
@@ -10,7 +10,6 @@ max-line-length = 100
|
||||
[yapf]
|
||||
based_on_style = pep8
|
||||
column_limit = 100
|
||||
split_all_top_level_comma_separated_values = true
|
||||
|
||||
[mypy]
|
||||
# some tests don't typecheck when this flag is set
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import pytest
|
||||
import os
|
||||
|
||||
from fixtures.log_helper import log
|
||||
|
||||
pytest_plugins = ("fixtures.zenith_fixtures")
|
||||
"""
|
||||
Use this test to see what happens when tests fail.
|
||||
@@ -24,7 +22,7 @@ def test_broken(zenith_cli, pageserver, postgres, pg_bin):
|
||||
zenith_cli.run(["branch", "test_broken", "empty"])
|
||||
|
||||
postgres.create_start("test_broken")
|
||||
log.info('postgres is running')
|
||||
print('postgres is running')
|
||||
|
||||
log.info('THIS NEXT COMMAND WILL FAIL:')
|
||||
print('THIS NEXT COMMAND WILL FAIL:')
|
||||
pg_bin.run('pgbench -i_am_a_broken_test'.split())
|
||||
|
||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: 9160deb05a...5387eb4a3b
@@ -11,8 +11,6 @@ regex = "1.4.5"
|
||||
bincode = "1.3"
|
||||
bytes = "1.0.1"
|
||||
byteorder = "1.4.3"
|
||||
hyper = "0.14"
|
||||
routerify = "2"
|
||||
fs2 = "0.4.3"
|
||||
lazy_static = "1.4.0"
|
||||
serde_json = "1"
|
||||
|
||||
@@ -89,12 +89,12 @@ A: Page Server is a single server which can be lost. As our primary
|
||||
|
||||
Q: What if the compute node evicts a page, needs it back, but the page is yet
|
||||
to reach the Page Server?
|
||||
A: If the compute node has evicted a page, changes to it have been WAL-logged
|
||||
(that's why it is called Write Ahead logging; there are some exceptions like
|
||||
index builds, but these are exceptions). These WAL records will eventually
|
||||
reach the Page Server. The Page Server notes that the compute note requests
|
||||
pages with a very recent LSN and will not respond to the compute node until a
|
||||
corresponding WAL is received from WAL safekeepers.
|
||||
A: If the compute node has evicted a page, all changes from that page are
|
||||
already committed, i.e. they are saved on majority of WAL safekeepers. These
|
||||
WAL records will eventually reach the Page Server. The Page Server notes
|
||||
that the compute note requests pages with a very recent LSN and will not
|
||||
respond to the compute node until it a corresponding WAL is received from WAL
|
||||
safekeepers.
|
||||
|
||||
Q: How long may Page Server wait for?
|
||||
A: Not too long, hopefully. If a page is evicted, it probably was not used for
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//
|
||||
// Main entry point for the safekeeper executable
|
||||
// Main entry point for the wal_acceptor executable
|
||||
//
|
||||
use anyhow::Result;
|
||||
use clap::{App, Arg};
|
||||
@@ -14,21 +14,20 @@ use zenith_utils::http::endpoint;
|
||||
use zenith_utils::logging;
|
||||
|
||||
use walkeeper::defaults::{DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_PG_LISTEN_ADDR};
|
||||
use walkeeper::http;
|
||||
use walkeeper::s3_offload;
|
||||
use walkeeper::wal_service;
|
||||
use walkeeper::WalAcceptorConf;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
zenith_metrics::set_common_metrics_prefix("safekeeper");
|
||||
let arg_matches = App::new("Zenith safekeeper")
|
||||
let arg_matches = App::new("Zenith wal_acceptor")
|
||||
.about("Store WAL stream to local file system and push it to WAL receivers")
|
||||
.arg(
|
||||
Arg::with_name("datadir")
|
||||
.short("D")
|
||||
.long("dir")
|
||||
.takes_value(true)
|
||||
.help("Path to the safekeeper data directory"),
|
||||
.help("Path to the WAL acceptor data directory"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("listen-pg")
|
||||
@@ -129,20 +128,14 @@ fn main() -> Result<()> {
|
||||
}
|
||||
|
||||
fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
|
||||
let log_filename = conf.data_dir.join("safekeeper.log");
|
||||
let log_file = logging::init(log_filename, conf.daemonize)?;
|
||||
let log_filename = conf.data_dir.join("wal_acceptor.log");
|
||||
let (_scope_guard, log_file) = logging::init(log_filename, conf.daemonize)?;
|
||||
|
||||
let http_listener = TcpListener::bind(conf.listen_http_addr.clone()).map_err(|e| {
|
||||
error!("failed to bind to address {}: {}", conf.listen_http_addr, e);
|
||||
e
|
||||
})?;
|
||||
|
||||
info!("Starting safekeeper on {}", conf.listen_pg_addr);
|
||||
let pg_listener = TcpListener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
|
||||
error!("failed to bind to address {}: {}", conf.listen_pg_addr, e);
|
||||
e
|
||||
})?;
|
||||
|
||||
if conf.daemonize {
|
||||
info!("daemonizing...");
|
||||
|
||||
@@ -152,7 +145,7 @@ fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
|
||||
let stderr = log_file;
|
||||
|
||||
let daemonize = Daemonize::new()
|
||||
.pid_file("safekeeper.pid")
|
||||
.pid_file("wal_acceptor.pid")
|
||||
.working_directory(Path::new("."))
|
||||
.stdout(stdout)
|
||||
.stderr(stderr);
|
||||
@@ -165,12 +158,11 @@ fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
|
||||
|
||||
let mut threads = Vec::new();
|
||||
|
||||
let conf_cloned = conf.clone();
|
||||
let http_endpoint_thread = thread::Builder::new()
|
||||
.name("http_endpoint_thread".into())
|
||||
.spawn(|| {
|
||||
// TODO authentication
|
||||
let router = http::make_router(conf_cloned);
|
||||
.spawn(move || {
|
||||
// No authentication at all: read-only metrics only, early stage.
|
||||
let router = endpoint::make_router();
|
||||
endpoint::serve_thread_main(router, http_listener).unwrap();
|
||||
})
|
||||
.unwrap();
|
||||
@@ -192,7 +184,7 @@ fn start_wal_acceptor(conf: WalAcceptorConf) -> Result<()> {
|
||||
.name("WAL acceptor thread".into())
|
||||
.spawn(|| {
|
||||
// thread code
|
||||
let thread_result = wal_service::thread_main(conf, pg_listener);
|
||||
let thread_result = wal_service::thread_main(conf);
|
||||
if let Err(e) = thread_result {
|
||||
info!("wal_service thread terminated: {}", e);
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
pub mod routes;
|
||||
pub use routes::make_router;
|
||||
@@ -1,92 +0,0 @@
|
||||
use hyper::{Body, Request, Response, StatusCode};
|
||||
use routerify::ext::RequestExt;
|
||||
use routerify::RouterBuilder;
|
||||
use serde::Serialize;
|
||||
use serde::Serializer;
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
|
||||
use crate::safekeeper::AcceptorState;
|
||||
use crate::timeline::CreateControlFile;
|
||||
use crate::timeline::GlobalTimelines;
|
||||
use crate::WalAcceptorConf;
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::http::error::ApiError;
|
||||
use zenith_utils::http::json::json_response;
|
||||
use zenith_utils::http::request::parse_request_param;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
/// Healthcheck handler.
|
||||
async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
Ok(json_response(StatusCode::OK, "")?)
|
||||
}
|
||||
|
||||
fn get_conf(request: &Request<Body>) -> &WalAcceptorConf {
|
||||
request
|
||||
.data::<Arc<WalAcceptorConf>>()
|
||||
.expect("unknown state type")
|
||||
.as_ref()
|
||||
}
|
||||
|
||||
fn display_serialize<S, F>(z: &F, s: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
F: Display,
|
||||
{
|
||||
s.serialize_str(&format!("{}", z))
|
||||
}
|
||||
|
||||
/// Info about timeline on safekeeper ready for reporting.
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TimelineStatus {
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
tenant_id: ZTenantId,
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
timeline_id: ZTimelineId,
|
||||
acceptor_state: AcceptorState,
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
commit_lsn: Lsn,
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
truncate_lsn: Lsn,
|
||||
#[serde(serialize_with = "display_serialize")]
|
||||
flush_lsn: Lsn,
|
||||
}
|
||||
|
||||
/// Report info about timeline.
|
||||
async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
|
||||
let tli = GlobalTimelines::get(
|
||||
get_conf(&request),
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
CreateControlFile::False,
|
||||
)
|
||||
.map_err(ApiError::from_err)?;
|
||||
let sk_state = tli.get_info();
|
||||
let (flush_lsn, _) = tli.get_end_of_wal();
|
||||
|
||||
let status = TimelineStatus {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
acceptor_state: sk_state.acceptor_state,
|
||||
commit_lsn: sk_state.commit_lsn,
|
||||
truncate_lsn: sk_state.truncate_lsn,
|
||||
flush_lsn,
|
||||
};
|
||||
Ok(json_response(StatusCode::OK, status)?)
|
||||
}
|
||||
|
||||
/// Safekeeper http router.
|
||||
pub fn make_router(conf: WalAcceptorConf) -> RouterBuilder<hyper::Body, ApiError> {
|
||||
let router = endpoint::make_router();
|
||||
router
|
||||
.data(Arc::new(conf))
|
||||
.get("/v1/status", status_handler)
|
||||
.get(
|
||||
"/v1/timeline/:tenant_id/:timeline_id",
|
||||
timeline_status_handler,
|
||||
)
|
||||
}
|
||||
@@ -2,7 +2,6 @@
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
pub mod http;
|
||||
pub mod json_ctrl;
|
||||
pub mod receive_wal;
|
||||
pub mod replication;
|
||||
|
||||
@@ -290,13 +290,13 @@ lazy_static! {
|
||||
"Current flush_lsn, grouped by timeline",
|
||||
&["ztli"]
|
||||
)
|
||||
.expect("Failed to register safekeeper_flush_lsn gauge vec");
|
||||
.expect("Failed to register safekeeper_flush_lsn int gauge vec");
|
||||
static ref COMMIT_LSN_GAUGE: GaugeVec = register_gauge_vec!(
|
||||
"safekeeper_commit_lsn",
|
||||
"Current commit_lsn (not necessarily persisted to disk), grouped by timeline",
|
||||
&["ztli"]
|
||||
)
|
||||
.expect("Failed to register safekeeper_commit_lsn gauge vec");
|
||||
.expect("Failed to register safekeeper_commit_lsn int gauge vec");
|
||||
}
|
||||
|
||||
struct SafeKeeperMetrics {
|
||||
@@ -312,13 +312,6 @@ impl SafeKeeperMetrics {
|
||||
commit_lsn: COMMIT_LSN_GAUGE.with_label_values(&[&ztli_str]),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_noname() -> SafeKeeperMetrics {
|
||||
SafeKeeperMetrics {
|
||||
flush_lsn: FLUSH_LSN_GAUGE.with_label_values(&["n/a"]),
|
||||
commit_lsn: COMMIT_LSN_GAUGE.with_label_values(&["n/a"]),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// SafeKeeper which consumes events (messages from compute) and provides
|
||||
@@ -329,7 +322,7 @@ pub struct SafeKeeper<ST: Storage> {
|
||||
pub flush_lsn: Lsn,
|
||||
pub tli: u32,
|
||||
// Cached metrics so we don't have to recompute labels on each update.
|
||||
metrics: SafeKeeperMetrics,
|
||||
metrics: Option<SafeKeeperMetrics>,
|
||||
/// not-yet-flushed pairs of same named fields in s.*
|
||||
pub commit_lsn: Lsn,
|
||||
pub truncate_lsn: Lsn,
|
||||
@@ -348,7 +341,7 @@ where
|
||||
SafeKeeper {
|
||||
flush_lsn,
|
||||
tli,
|
||||
metrics: SafeKeeperMetrics::new_noname(),
|
||||
metrics: None,
|
||||
commit_lsn: state.commit_lsn,
|
||||
truncate_lsn: state.truncate_lsn,
|
||||
storage,
|
||||
@@ -400,7 +393,7 @@ where
|
||||
self.s.server.wal_seg_size = msg.wal_seg_size;
|
||||
self.storage.persist(&self.s, true)?;
|
||||
|
||||
self.metrics = SafeKeeperMetrics::new(self.s.server.ztli);
|
||||
self.metrics = Some(SafeKeeperMetrics::new(self.s.server.ztli));
|
||||
|
||||
info!(
|
||||
"processed greeting from proposer {:?}, sending term {:?}",
|
||||
@@ -525,7 +518,11 @@ where
|
||||
}
|
||||
if last_rec_lsn > self.flush_lsn {
|
||||
self.flush_lsn = last_rec_lsn;
|
||||
self.metrics.flush_lsn.set(u64::from(self.flush_lsn) as f64);
|
||||
self.metrics
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.flush_lsn
|
||||
.set(u64::from(self.flush_lsn) as f64);
|
||||
}
|
||||
|
||||
// Advance commit_lsn taking into account what we have locally. xxx this
|
||||
@@ -544,6 +541,8 @@ where
|
||||
commit_lsn >= msg.h.epoch_start_lsn && self.s.commit_lsn < msg.h.epoch_start_lsn;
|
||||
self.commit_lsn = commit_lsn;
|
||||
self.metrics
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.commit_lsn
|
||||
.set(u64::from(self.commit_lsn) as f64);
|
||||
}
|
||||
|
||||
@@ -13,13 +13,14 @@ use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use zenith_utils::postgres_backend;
|
||||
use zenith_utils::postgres_backend::PostgresBackend;
|
||||
use zenith_utils::pq_proto::{BeMessage, FeStartupMessage, RowDescriptor, INT4_OID, TEXT_OID};
|
||||
use zenith_utils::pq_proto::{BeMessage, FeStartupMessage, RowDescriptor};
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
|
||||
use crate::timeline::CreateControlFile;
|
||||
|
||||
/// Handler for streaming WAL from acceptor
|
||||
pub struct SendWalHandler {
|
||||
/// wal acceptor configuration
|
||||
pub conf: WalAcceptorConf,
|
||||
/// assigned application name
|
||||
pub appname: Option<String>,
|
||||
@@ -71,16 +72,19 @@ impl postgres_backend::Handler for SendWalHandler {
|
||||
}
|
||||
if query_string.starts_with(b"IDENTIFY_SYSTEM") {
|
||||
self.handle_identify_system(pgb)?;
|
||||
Ok(())
|
||||
} else if query_string.starts_with(b"START_REPLICATION") {
|
||||
ReplicationConn::new(pgb).run(self, pgb, &query_string)?;
|
||||
Ok(())
|
||||
} else if query_string.starts_with(b"START_WAL_PUSH") {
|
||||
ReceiveWalConn::new(pgb)?.run(self)?;
|
||||
Ok(())
|
||||
} else if query_string.starts_with(b"JSON_CTRL") {
|
||||
handle_json_ctrl(self, pgb, &query_string)?;
|
||||
Ok(())
|
||||
} else {
|
||||
bail!("Unexpected command {:?}", query_string);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -110,25 +114,25 @@ impl SendWalHandler {
|
||||
pgb.write_message_noflush(&BeMessage::RowDescription(&[
|
||||
RowDescriptor {
|
||||
name: b"systemid",
|
||||
typoid: TEXT_OID,
|
||||
typoid: 25,
|
||||
typlen: -1,
|
||||
..Default::default()
|
||||
},
|
||||
RowDescriptor {
|
||||
name: b"timeline",
|
||||
typoid: INT4_OID,
|
||||
typoid: 23,
|
||||
typlen: 4,
|
||||
..Default::default()
|
||||
},
|
||||
RowDescriptor {
|
||||
name: b"xlogpos",
|
||||
typoid: TEXT_OID,
|
||||
typoid: 25,
|
||||
typlen: -1,
|
||||
..Default::default()
|
||||
},
|
||||
RowDescriptor {
|
||||
name: b"dbname",
|
||||
typoid: TEXT_OID,
|
||||
typoid: 25,
|
||||
typlen: -1,
|
||||
..Default::default()
|
||||
},
|
||||
|
||||
@@ -112,7 +112,7 @@ impl SharedState {
|
||||
}
|
||||
match opts.open(&control_file_path) {
|
||||
Ok(mut file) => {
|
||||
// Lock file to prevent two or more active safekeepers
|
||||
// Lock file to prevent two or more active wal_acceptors
|
||||
match file.try_lock_exclusive() {
|
||||
Ok(()) => {}
|
||||
Err(e) => {
|
||||
@@ -289,7 +289,7 @@ lazy_static! {
|
||||
}
|
||||
|
||||
/// A zero-sized struct used to manage access to the global timelines map.
|
||||
pub struct GlobalTimelines;
|
||||
struct GlobalTimelines;
|
||||
|
||||
impl GlobalTimelines {
|
||||
/// Get a timeline with control file loaded from the global TIMELINES map.
|
||||
|
||||
@@ -12,7 +12,13 @@ use crate::WalAcceptorConf;
|
||||
use zenith_utils::postgres_backend::{AuthType, PostgresBackend};
|
||||
|
||||
/// Accept incoming TCP connections and spawn them into a background thread.
|
||||
pub fn thread_main(conf: WalAcceptorConf, listener: TcpListener) -> Result<()> {
|
||||
pub fn thread_main(conf: WalAcceptorConf) -> Result<()> {
|
||||
info!("Starting wal acceptor on {}", conf.listen_pg_addr);
|
||||
let listener = TcpListener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
|
||||
error!("failed to bind to address {}: {}", conf.listen_pg_addr, e);
|
||||
e
|
||||
})?;
|
||||
|
||||
loop {
|
||||
match listener.accept() {
|
||||
Ok((socket, peer_addr)) => {
|
||||
@@ -35,8 +41,8 @@ fn handle_socket(socket: TcpStream, conf: WalAcceptorConf) -> Result<()> {
|
||||
socket.set_nodelay(true)?;
|
||||
|
||||
let mut conn_handler = SendWalHandler::new(conf);
|
||||
let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None, false)?;
|
||||
// libpq replication protocol between safekeeper and replicas/pagers
|
||||
let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None)?;
|
||||
// libpq replication protocol between wal_acceptor and replicas/pagers
|
||||
pgbackend.run(&mut conn_handler)?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -32,15 +32,11 @@ struct BranchTreeEl {
|
||||
// * Providing CLI api to the pageserver
|
||||
// * TODO: export/import to/from usual postgres
|
||||
fn main() -> Result<()> {
|
||||
let node_arg = Arg::with_name("node")
|
||||
.index(1)
|
||||
.help("Node name")
|
||||
.required(true);
|
||||
|
||||
let timeline_arg = Arg::with_name("timeline")
|
||||
.index(2)
|
||||
.help("Branch name or a point-in time specification")
|
||||
.required(false);
|
||||
.short("n")
|
||||
.index(1)
|
||||
.help("Timeline name")
|
||||
.required(true);
|
||||
|
||||
let tenantid_arg = Arg::with_name("tenantid")
|
||||
.long("tenantid")
|
||||
@@ -92,12 +88,7 @@ fn main() -> Result<()> {
|
||||
)
|
||||
.subcommand(SubCommand::with_name("status"))
|
||||
.subcommand(SubCommand::with_name("start").about("Start local pageserver"))
|
||||
.subcommand(SubCommand::with_name("stop").about("Stop local pageserver")
|
||||
.arg(Arg::with_name("immediate")
|
||||
.help("Don't flush repository data at shutdown")
|
||||
.required(false)
|
||||
)
|
||||
)
|
||||
.subcommand(SubCommand::with_name("stop").about("Stop local pageserver"))
|
||||
.subcommand(SubCommand::with_name("restart").about("Restart local pageserver"))
|
||||
.subcommand(
|
||||
SubCommand::with_name("pg")
|
||||
@@ -106,10 +97,7 @@ fn main() -> Result<()> {
|
||||
.subcommand(SubCommand::with_name("list").arg(tenantid_arg.clone()))
|
||||
.subcommand(SubCommand::with_name("create")
|
||||
.about("Create a postgres compute node")
|
||||
.arg(node_arg.clone())
|
||||
.arg(timeline_arg.clone())
|
||||
.arg(tenantid_arg.clone())
|
||||
.arg(port_arg.clone())
|
||||
.arg(timeline_arg.clone()).arg(tenantid_arg.clone()).arg(port_arg.clone())
|
||||
.arg(
|
||||
Arg::with_name("config-only")
|
||||
.help("Don't do basebackup, create compute node with only config files")
|
||||
@@ -118,13 +106,13 @@ fn main() -> Result<()> {
|
||||
))
|
||||
.subcommand(SubCommand::with_name("start")
|
||||
.about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
|
||||
.arg(node_arg.clone())
|
||||
.arg(timeline_arg.clone())
|
||||
.arg(tenantid_arg.clone())
|
||||
.arg(port_arg.clone()))
|
||||
.arg(
|
||||
timeline_arg.clone()
|
||||
).arg(
|
||||
tenantid_arg.clone()
|
||||
).arg(port_arg.clone()))
|
||||
.subcommand(
|
||||
SubCommand::with_name("stop")
|
||||
.arg(node_arg.clone())
|
||||
.arg(timeline_arg.clone())
|
||||
.arg(tenantid_arg.clone())
|
||||
.arg(
|
||||
@@ -208,12 +196,10 @@ fn main() -> Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
("stop", Some(stop_match)) => {
|
||||
("stop", Some(_sub_m)) => {
|
||||
let pageserver = PageServerNode::from_env(&env);
|
||||
|
||||
let immediate = stop_match.is_present("immediate");
|
||||
|
||||
if let Err(e) = pageserver.stop(immediate) {
|
||||
if let Err(e) = pageserver.stop() {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
@@ -222,8 +208,7 @@ fn main() -> Result<()> {
|
||||
("restart", Some(_sub_m)) => {
|
||||
let pageserver = PageServerNode::from_env(&env);
|
||||
|
||||
//TODO what shutdown strategy should we use here?
|
||||
if let Err(e) = pageserver.stop(false) {
|
||||
if let Err(e) = pageserver.stop() {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
exit(1);
|
||||
}
|
||||
@@ -437,32 +422,25 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let tenantid: ZTenantId = list_match
|
||||
.value_of("tenantid")
|
||||
.map_or(Ok(env.tenantid), |value| value.parse())?;
|
||||
|
||||
let branch_infos = get_branch_infos(env, &tenantid).unwrap_or_else(|e| {
|
||||
eprintln!("Failed to load branch info: {}", e);
|
||||
HashMap::new()
|
||||
});
|
||||
|
||||
println!("NODE\tADDRESS\t\tBRANCH\tLSN\t\tSTATUS");
|
||||
for ((_, node_name), node) in cplane
|
||||
println!("BRANCH\tADDRESS\t\tLSN\t\tSTATUS");
|
||||
for ((_, timeline_name), node) in cplane
|
||||
.nodes
|
||||
.iter()
|
||||
.filter(|((node_tenantid, _), _)| node_tenantid == &tenantid)
|
||||
{
|
||||
// FIXME: This shows the LSN at the end of the timeline. It's not the
|
||||
// right thing to do for read-only nodes that might be anchored at an
|
||||
// older point in time, or following but lagging behind the primary.
|
||||
let lsn_str = branch_infos
|
||||
.get(&node.timelineid)
|
||||
.map(|bi| bi.latest_valid_lsn.to_string())
|
||||
.unwrap_or_else(|| "?".to_string());
|
||||
|
||||
println!(
|
||||
"{}\t{}\t{}\t{}\t{}",
|
||||
node_name,
|
||||
"{}\t{}\t{}\t{}",
|
||||
timeline_name,
|
||||
node.address,
|
||||
node.timelineid, // FIXME: resolve human-friendly branch name
|
||||
lsn_str,
|
||||
branch_infos
|
||||
.get(&node.timelineid)
|
||||
.map(|bi| bi.latest_valid_lsn.to_string())
|
||||
.unwrap_or_else(|| "?".to_string()),
|
||||
node.status(),
|
||||
);
|
||||
}
|
||||
@@ -471,28 +449,26 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
let tenantid: ZTenantId = create_match
|
||||
.value_of("tenantid")
|
||||
.map_or(Ok(env.tenantid), |value| value.parse())?;
|
||||
let node_name = create_match.value_of("node").unwrap_or("main");
|
||||
let timeline_name = create_match.value_of("timeline").unwrap_or(node_name);
|
||||
let timeline_name = create_match.value_of("timeline").unwrap_or("main");
|
||||
|
||||
let port: Option<u16> = match create_match.value_of("port") {
|
||||
Some(p) => Some(p.parse()?),
|
||||
None => None,
|
||||
};
|
||||
cplane.new_node(tenantid, node_name, timeline_name, port)?;
|
||||
cplane.new_node(tenantid, timeline_name, port)?;
|
||||
}
|
||||
("start", Some(start_match)) => {
|
||||
let tenantid: ZTenantId = start_match
|
||||
.value_of("tenantid")
|
||||
.map_or(Ok(env.tenantid), |value| value.parse())?;
|
||||
let node_name = start_match.value_of("node").unwrap_or("main");
|
||||
let timeline_name = start_match.value_of("timeline");
|
||||
let timeline_name = start_match.value_of("timeline").unwrap_or("main");
|
||||
|
||||
let port: Option<u16> = match start_match.value_of("port") {
|
||||
Some(p) => Some(p.parse()?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let node = cplane.nodes.get(&(tenantid, node_name.to_owned()));
|
||||
let node = cplane.nodes.get(&(tenantid, timeline_name.to_owned()));
|
||||
|
||||
let auth_token = if matches!(env.auth_type, AuthType::ZenithJWT) {
|
||||
let claims = Claims::new(Some(tenantid), Scope::Tenant);
|
||||
@@ -501,11 +477,12 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
None
|
||||
};
|
||||
|
||||
println!(
|
||||
"Starting {} postgres on timeline {}...",
|
||||
if node.is_some() { "existing" } else { "new" },
|
||||
timeline_name
|
||||
);
|
||||
if let Some(node) = node {
|
||||
if timeline_name.is_some() {
|
||||
println!("timeline name ignored because node exists already");
|
||||
}
|
||||
println!("Starting existing postgres {}...", node_name);
|
||||
node.start(&auth_token)?;
|
||||
} else {
|
||||
// when used with custom port this results in non obvious behaviour
|
||||
@@ -513,17 +490,12 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
// start --port X
|
||||
// stop
|
||||
// start <-- will also use port X even without explicit port argument
|
||||
let timeline_name = timeline_name.unwrap_or(node_name);
|
||||
println!(
|
||||
"Starting new postgres {} on {}...",
|
||||
node_name, timeline_name
|
||||
);
|
||||
let node = cplane.new_node(tenantid, node_name, timeline_name, port)?;
|
||||
let node = cplane.new_node(tenantid, timeline_name, port)?;
|
||||
node.start(&auth_token)?;
|
||||
}
|
||||
}
|
||||
("stop", Some(stop_match)) => {
|
||||
let node_name = stop_match.value_of("node").unwrap_or("main");
|
||||
let timeline_name = stop_match.value_of("timeline").unwrap_or("main");
|
||||
let destroy = stop_match.is_present("destroy");
|
||||
let tenantid: ZTenantId = stop_match
|
||||
.value_of("tenantid")
|
||||
@@ -531,8 +503,8 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
|
||||
let node = cplane
|
||||
.nodes
|
||||
.get(&(tenantid, node_name.to_owned()))
|
||||
.ok_or_else(|| anyhow!("postgres {} is not found", node_name))?;
|
||||
.get(&(tenantid, timeline_name.to_owned()))
|
||||
.ok_or_else(|| anyhow!("postgres {} is not found", timeline_name))?;
|
||||
node.stop(destroy)?;
|
||||
}
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ pub fn set_common_metrics_prefix(prefix: &'static str) {
|
||||
}
|
||||
|
||||
/// Prepends a prefix to a common metric name so they are distinguished between
|
||||
/// different services, see <https://github.com/zenithdb/zenith/pull/681>
|
||||
/// different services, see https://github.com/zenithdb/zenith/pull/681
|
||||
/// A call to set_common_metrics_prefix() is necessary prior to calling this.
|
||||
pub fn new_common_metric_name(unprefixed_metric_name: &str) -> String {
|
||||
// Not unwrap() because metrics may be initialized after multiple threads have been started.
|
||||
|
||||
@@ -18,9 +18,12 @@ serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
thiserror = "1.0"
|
||||
tokio = "1.11"
|
||||
tracing = "0.1"
|
||||
tracing-log = "0.1"
|
||||
tracing-subscriber = "0.2"
|
||||
|
||||
slog-async = "2.6.0"
|
||||
slog-stdlog = "4.1.0"
|
||||
slog-scope = "4.4.0"
|
||||
slog-term = "2.8.0"
|
||||
slog = "2.7.0"
|
||||
|
||||
zenith_metrics = { path = "../zenith_metrics" }
|
||||
workspace_hack = { path = "../workspace_hack" }
|
||||
|
||||
@@ -12,17 +12,8 @@ use std::net::TcpListener;
|
||||
use zenith_metrics::{new_common_metric_name, register_int_counter, IntCounter};
|
||||
use zenith_metrics::{Encoder, TextEncoder};
|
||||
|
||||
use std::sync::Mutex;
|
||||
use tokio::sync::oneshot::Sender;
|
||||
|
||||
use super::error::ApiError;
|
||||
|
||||
lazy_static! {
|
||||
/// Channel used to send shutdown signal - wrapped in an Option to allow
|
||||
/// it to be taken by value (since oneshot channels consume themselves on send)
|
||||
static ref SHUTDOWN_SENDER: Mutex<Option<Sender<()>>> = Mutex::new(None);
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref SERVE_METRICS_COUNT: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("serve_metrics_count"),
|
||||
@@ -152,18 +143,11 @@ pub fn check_permission(req: &Request<Body>, tenantid: Option<ZTenantId>) -> Res
|
||||
}
|
||||
}
|
||||
|
||||
// Send shutdown signal
|
||||
pub fn shutdown() {
|
||||
if let Some(tx) = SHUTDOWN_SENDER.lock().unwrap().take() {
|
||||
let _ = tx.send(());
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serve_thread_main(
|
||||
router_builder: RouterBuilder<hyper::Body, ApiError>,
|
||||
listener: TcpListener,
|
||||
) -> anyhow::Result<()> {
|
||||
log::info!("Starting a http endpoint at {}", listener.local_addr()?);
|
||||
log::info!("Starting a http endoint at {}", listener.local_addr()?);
|
||||
|
||||
// Create a Service from the router above to handle incoming requests.
|
||||
let service = RouterService::new(router_builder.build().map_err(|err| anyhow!(err))?).unwrap();
|
||||
@@ -175,14 +159,7 @@ pub fn serve_thread_main(
|
||||
|
||||
let _guard = runtime.enter();
|
||||
|
||||
let (send, recv) = tokio::sync::oneshot::channel::<()>();
|
||||
*SHUTDOWN_SENDER.lock().unwrap() = Some(send);
|
||||
|
||||
let server = Server::from_tcp(listener)?
|
||||
.serve(service)
|
||||
.with_graceful_shutdown(async {
|
||||
recv.await.ok();
|
||||
});
|
||||
let server = Server::from_tcp(listener)?.serve(service);
|
||||
|
||||
runtime.block_on(server)?;
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
pub mod endpoint;
|
||||
pub mod error;
|
||||
pub mod json;
|
||||
pub mod request;
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use super::error::ApiError;
|
||||
use hyper::{Body, Request};
|
||||
use routerify::ext::RequestExt;
|
||||
|
||||
pub fn get_request_param<'a>(
|
||||
request: &'a Request<Body>,
|
||||
param_name: &str,
|
||||
) -> Result<&'a str, ApiError> {
|
||||
match request.param(param_name) {
|
||||
Some(arg) => Ok(arg),
|
||||
None => {
|
||||
return Err(ApiError::BadRequest(format!(
|
||||
"no {} specified in path param",
|
||||
param_name
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_request_param<T: FromStr>(
|
||||
request: &Request<Body>,
|
||||
param_name: &str,
|
||||
) -> Result<T, ApiError> {
|
||||
match get_request_param(request, param_name)?.parse() {
|
||||
Ok(v) => Ok(v),
|
||||
Err(_) => Err(ApiError::BadRequest(format!(
|
||||
"failed to parse {}",
|
||||
param_name
|
||||
))),
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
use slog::{Drain, Level};
|
||||
use std::{
|
||||
fs::{File, OpenOptions},
|
||||
path::Path,
|
||||
@@ -5,12 +6,10 @@ use std::{
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
|
||||
use tracing::subscriber::set_global_default;
|
||||
use tracing_log::LogTracer;
|
||||
use tracing_subscriber::fmt;
|
||||
use tracing_subscriber::{layer::SubscriberExt, EnvFilter, Registry};
|
||||
|
||||
pub fn init(log_filename: impl AsRef<Path>, daemonize: bool) -> Result<File> {
|
||||
pub fn init(
|
||||
log_filename: impl AsRef<Path>,
|
||||
daemonize: bool,
|
||||
) -> Result<(slog_scope::GlobalLoggerGuard, File)> {
|
||||
// Don't open the same file for output multiple times;
|
||||
// the different fds could overwrite each other's output.
|
||||
let log_file = OpenOptions::new()
|
||||
@@ -19,38 +18,30 @@ pub fn init(log_filename: impl AsRef<Path>, daemonize: bool) -> Result<File> {
|
||||
.open(&log_filename)
|
||||
.with_context(|| format!("failed to open {:?}", log_filename.as_ref()))?;
|
||||
|
||||
let default_filter_str = "info";
|
||||
|
||||
// We fall back to printing all spans at info-level or above if
|
||||
// the RUST_LOG environment variable is not set.
|
||||
let env_filter =
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(default_filter_str));
|
||||
|
||||
// we are cloning and returning log file in order to allow redirecting daemonized stdout and stderr to it
|
||||
// if we do not use daemonization (e.g. in docker) it is better to log to stdout directly
|
||||
// for example to be in line with docker log command which expects logs comimg from stdout
|
||||
//
|
||||
// TODO: perhaps use a more human-readable format when !daemonize
|
||||
if daemonize {
|
||||
let x = log_file.try_clone().unwrap();
|
||||
|
||||
let fmt_layer = fmt::layer()
|
||||
.pretty()
|
||||
.with_target(false) // don't include event targets
|
||||
.with_ansi(false) // don't use colors in log file
|
||||
.with_writer(move || x.try_clone().unwrap());
|
||||
let subscriber = Registry::default().with(env_filter).with(fmt_layer);
|
||||
|
||||
set_global_default(subscriber).expect("Failed to set subscriber");
|
||||
let guard = if daemonize {
|
||||
let decorator = slog_term::PlainSyncDecorator::new(log_file.try_clone()?);
|
||||
let drain = slog_term::FullFormat::new(decorator)
|
||||
.build()
|
||||
.filter_level(Level::Info)
|
||||
.fuse();
|
||||
let logger = slog::Logger::root(drain, slog::o!());
|
||||
slog_scope::set_global_logger(logger)
|
||||
} else {
|
||||
let fmt_layer = fmt::layer().with_target(false); // don't include event targets
|
||||
let subscriber = Registry::default().with(env_filter).with(fmt_layer);
|
||||
let decorator = slog_term::TermDecorator::new().build();
|
||||
let drain = slog_term::FullFormat::new(decorator)
|
||||
.build()
|
||||
.filter_level(Level::Info)
|
||||
.fuse();
|
||||
let drain = slog_async::Async::new(drain).chan_size(1000).build().fuse();
|
||||
let logger = slog::Logger::root(drain, slog::o!());
|
||||
slog_scope::set_global_logger(logger)
|
||||
};
|
||||
|
||||
set_global_default(subscriber).expect("Failed to set subscriber");
|
||||
}
|
||||
// initialise forwarding of std log calls
|
||||
slog_stdlog::init()?;
|
||||
|
||||
// Redirect all `log`'s events to our subscriber
|
||||
LogTracer::init().expect("Failed to set logger");
|
||||
|
||||
Ok(log_file)
|
||||
Ok((guard, log_file))
|
||||
}
|
||||
|
||||
@@ -13,11 +13,7 @@ use serde::{Deserialize, Serialize};
|
||||
use std::io::{self, Write};
|
||||
use std::net::{Shutdown, SocketAddr, TcpStream};
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
static PGBACKEND_SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
pub trait Handler {
|
||||
/// Handle single query.
|
||||
@@ -139,32 +135,13 @@ pub fn query_from_cstring(query_string: Bytes) -> Vec<u8> {
|
||||
query_string
|
||||
}
|
||||
|
||||
// Helper function for socket read loops
|
||||
pub fn is_socket_read_timed_out(error: &anyhow::Error) -> bool {
|
||||
for cause in error.chain() {
|
||||
if let Some(io_error) = cause.downcast_ref::<io::Error>() {
|
||||
if io_error.kind() == std::io::ErrorKind::WouldBlock {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
impl PostgresBackend {
|
||||
pub fn new(
|
||||
socket: TcpStream,
|
||||
auth_type: AuthType,
|
||||
tls_config: Option<Arc<rustls::ServerConfig>>,
|
||||
set_read_timeout: bool,
|
||||
) -> io::Result<Self> {
|
||||
let peer_addr = socket.peer_addr()?;
|
||||
if set_read_timeout {
|
||||
socket
|
||||
.set_read_timeout(Some(Duration::from_secs(5)))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
stream: Some(Stream::Bidirectional(BidiStream::from_tcp(socket))),
|
||||
buf_out: BytesMut::with_capacity(10 * 1024),
|
||||
@@ -252,26 +229,12 @@ impl PostgresBackend {
|
||||
|
||||
let mut unnamed_query_string = Bytes::new();
|
||||
|
||||
while !PGBACKEND_SHUTDOWN_REQUESTED.load(Ordering::Relaxed) {
|
||||
match self.read_message() {
|
||||
Ok(message) => {
|
||||
if let Some(msg) = message {
|
||||
trace!("got message {:?}", msg);
|
||||
while let Some(msg) = self.read_message()? {
|
||||
trace!("got message {:?}", msg);
|
||||
|
||||
match self.process_message(handler, msg, &mut unnamed_query_string)? {
|
||||
ProcessMsgResult::Continue => continue,
|
||||
ProcessMsgResult::Break => break,
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// If it is a timeout error, continue the loop
|
||||
if !is_socket_read_timed_out(&e) {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
match self.process_message(handler, msg, &mut unnamed_query_string)? {
|
||||
ProcessMsgResult::Continue => continue,
|
||||
ProcessMsgResult::Break => break,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -464,8 +427,3 @@ impl PostgresBackend {
|
||||
Ok(ProcessMsgResult::Continue)
|
||||
}
|
||||
}
|
||||
|
||||
// Set the flag to inform connections to cancel
|
||||
pub fn set_pgbackend_shutdown_requested() {
|
||||
PGBACKEND_SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
@@ -15,9 +15,8 @@ use std::str;
|
||||
pub type Oid = u32;
|
||||
pub type SystemId = u64;
|
||||
|
||||
pub const INT8_OID: Oid = 20;
|
||||
pub const INT4_OID: Oid = 23;
|
||||
pub const TEXT_OID: Oid = 25;
|
||||
pub const INT8_OID: Oid = 20;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FeMessage {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user