mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-18 10:52:55 +00:00
Compare commits
201 Commits
ps-thread-
...
projects_m
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
263a3ea5e3 | ||
|
|
bb69e0920c | ||
|
|
05f6a1394d | ||
|
|
844832ffe4 | ||
|
|
d29c545b5d | ||
|
|
6abdb12724 | ||
|
|
7898e72990 | ||
|
|
65704708fa | ||
|
|
6100a02d0f | ||
|
|
97fed38213 | ||
|
|
cadaca010c | ||
|
|
f09c09438a | ||
|
|
00fc696606 | ||
|
|
1d0706cf25 | ||
|
|
5ee19b0758 | ||
|
|
cef90d9220 | ||
|
|
4a05413a4c | ||
|
|
dd61f3558f | ||
|
|
8a714f1ebf | ||
|
|
137291dc24 | ||
|
|
eb8926083e | ||
|
|
26bca6ddba | ||
|
|
55192384c3 | ||
|
|
392cd8b1fc | ||
|
|
3cc531d093 | ||
|
|
84b9fcbbd5 | ||
|
|
93e050afe3 | ||
|
|
6d7dc384a5 | ||
|
|
3c2b03cd87 | ||
|
|
7c49abe7d1 | ||
|
|
d059e588a6 | ||
|
|
6222a0012b | ||
|
|
1ca28e6f3c | ||
|
|
6c4d6a2183 | ||
|
|
37465dafe3 | ||
|
|
ec0064c442 | ||
|
|
83c7e6ce52 | ||
|
|
f862373ac0 | ||
|
|
699f46cd84 | ||
|
|
36ee182d26 | ||
|
|
d11c9f9fcb | ||
|
|
d8a37452c8 | ||
|
|
e1336f451d | ||
|
|
a4d8261390 | ||
|
|
e2a5a31595 | ||
|
|
0ac0fba77a | ||
|
|
a001052cdd | ||
|
|
1f1d852204 | ||
|
|
f7b878611a | ||
|
|
a51b2dac9a | ||
|
|
e22d9cee3a | ||
|
|
a01999bc4a | ||
|
|
32e64afd54 | ||
|
|
8a53472e4f | ||
|
|
6e26588d17 | ||
|
|
0b93253b3c | ||
|
|
7dc6beacbd | ||
|
|
6cfebc096f | ||
|
|
fecad1ca34 | ||
|
|
92de8423af | ||
|
|
e442f5357b | ||
|
|
5a723d44cd | ||
|
|
2623193876 | ||
|
|
70a53c4b03 | ||
|
|
9e108102b3 | ||
|
|
9c846a93e8 | ||
|
|
c5007d3916 | ||
|
|
5b06599770 | ||
|
|
1d16ee92d4 | ||
|
|
7933804284 | ||
|
|
a91e0c299d | ||
|
|
b0c4ec0594 | ||
|
|
90e2c9ee1f | ||
|
|
aba5e5f8b5 | ||
|
|
b155fe0e2f | ||
|
|
c71faae2c6 | ||
|
|
de7eda2dc6 | ||
|
|
1188c9a95c | ||
|
|
e5cb727572 | ||
|
|
6623c5b9d5 | ||
|
|
e5a2b0372d | ||
|
|
af6143ea1f | ||
|
|
ff233cf4c2 | ||
|
|
b1b67cc5a0 | ||
|
|
ca10cc12c1 | ||
|
|
c97cd684e0 | ||
|
|
54e163ac03 | ||
|
|
595a6bc1e1 | ||
|
|
c3e0b6c839 | ||
|
|
36281e3b47 | ||
|
|
e014cb6026 | ||
|
|
915e5c9114 | ||
|
|
67d6ff4100 | ||
|
|
6a867bce6d | ||
|
|
751f1191b4 | ||
|
|
3accde613d | ||
|
|
e3b320daab | ||
|
|
4b4d3073b8 | ||
|
|
f1c51a1267 | ||
|
|
500e8772f0 | ||
|
|
b3ec6e0661 | ||
|
|
5d813f9738 | ||
|
|
757746b571 | ||
|
|
cb8bf1beb6 | ||
|
|
75f71a6380 | ||
|
|
54b75248ff | ||
|
|
0e1bd57c53 | ||
|
|
1d71949c51 | ||
|
|
7d565aa4b9 | ||
|
|
72a7220dc8 | ||
|
|
b0d114ee3f | ||
|
|
38f2d165b7 | ||
|
|
5a5737278e | ||
|
|
06f5e017a1 | ||
|
|
887b0e14d9 | ||
|
|
c584d90bb9 | ||
|
|
7997fc2932 | ||
|
|
24d2313d0b | ||
|
|
9ab52e2186 | ||
|
|
6f1f33ef42 | ||
|
|
703f691df8 | ||
|
|
2b265fd6dc | ||
|
|
d32b491a53 | ||
|
|
541ec25875 | ||
|
|
8346aa3a29 | ||
|
|
2aceb6a309 | ||
|
|
3ff5caf786 | ||
|
|
fbedd535c0 | ||
|
|
89e5659f3f | ||
|
|
ef7cdb13e2 | ||
|
|
73187bfef1 | ||
|
|
967eb38e81 | ||
|
|
a124e44866 | ||
|
|
c4b77084af | ||
|
|
c9efdec8db | ||
|
|
12b7c793b3 | ||
|
|
3c6890bf1d | ||
|
|
d97617ed3a | ||
|
|
65cf1a3221 | ||
|
|
a4aef5d8dc | ||
|
|
ffbb9dd155 | ||
|
|
baf7a81dce | ||
|
|
ee3bcf108d | ||
|
|
0da4046704 | ||
|
|
cbd00d7ed9 | ||
|
|
4c30ae8ba3 | ||
|
|
3da4b3165e | ||
|
|
c1b365fdf7 | ||
|
|
fab104d5f3 | ||
|
|
7dd27ecd20 | ||
|
|
bd2979d02c | ||
|
|
5914aab78a | ||
|
|
4a36d89247 | ||
|
|
432907ff5f | ||
|
|
98da0aa159 | ||
|
|
772c2fb4ff | ||
|
|
b9f84f4a83 | ||
|
|
134eeeb096 | ||
|
|
55ea3f262e | ||
|
|
f03779bf1a | ||
|
|
070c255522 | ||
|
|
9ccbb8d331 | ||
|
|
f2881bbd8a | ||
|
|
a884f4cf6b | ||
|
|
9a0fed0880 | ||
|
|
bea84150b2 | ||
|
|
85b5c0e989 | ||
|
|
e4a70faa08 | ||
|
|
c41549f630 | ||
|
|
c700032dd2 | ||
|
|
33cac863d7 | ||
|
|
51ea9c3053 | ||
|
|
a10cac980f | ||
|
|
081d5dac5e | ||
|
|
cded72a580 | ||
|
|
768c846eeb | ||
|
|
a2561f0a78 | ||
|
|
aa7c601eca | ||
|
|
bf899a57d9 | ||
|
|
07b85e7cfc | ||
|
|
22d997049c | ||
|
|
b683308791 | ||
|
|
51c0f9ab2b | ||
|
|
0030da57a8 | ||
|
|
85884a1599 | ||
|
|
ae20751724 | ||
|
|
5812e26b90 | ||
|
|
ec8861b8cc | ||
|
|
4538f1e1b8 | ||
|
|
b10ae195b7 | ||
|
|
b426775aa0 | ||
|
|
5da4f3a4df | ||
|
|
2bde77fced | ||
|
|
c864091035 | ||
|
|
20361395bb | ||
|
|
b338b5dffe | ||
|
|
5bd879f641 | ||
|
|
e6e883eb12 | ||
|
|
d710dff975 | ||
|
|
6cb14b4200 | ||
|
|
87dfa99734 |
@@ -6,5 +6,7 @@ timeout = 30
|
||||
|
||||
[ssh_connection]
|
||||
ssh_args = -F ./ansible.ssh.cfg
|
||||
scp_if_ssh = True
|
||||
# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
|
||||
# and scp neither worked for me
|
||||
transfer_method = piped
|
||||
pipelining = True
|
||||
|
||||
@@ -1,3 +1,7 @@
|
||||
# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
|
||||
# (use pre 8.5 option name to cope with old ssh in CI)
|
||||
PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
|
||||
|
||||
Host tele.zenith.tech
|
||||
User admin
|
||||
Port 3023
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
args:
|
||||
creates: "/storage/pageserver/data/tenants"
|
||||
environment:
|
||||
ZENITH_REPO_DIR: "/storage/pageserver/data"
|
||||
NEON_REPO_DIR: "/storage/pageserver/data"
|
||||
LD_LIBRARY_PATH: "/usr/local/lib"
|
||||
become: true
|
||||
tags:
|
||||
@@ -131,7 +131,7 @@
|
||||
args:
|
||||
creates: "/storage/safekeeper/data/safekeeper.id"
|
||||
environment:
|
||||
ZENITH_REPO_DIR: "/storage/safekeeper/data"
|
||||
NEON_REPO_DIR: "/storage/safekeeper/data"
|
||||
LD_LIBRARY_PATH: "/usr/local/lib"
|
||||
become: true
|
||||
tags:
|
||||
|
||||
@@ -6,7 +6,7 @@ RELEASE=${RELEASE:-false}
|
||||
|
||||
# look at docker hub for latest tag for neon docker image
|
||||
if [ "${RELEASE}" = "true" ]; then
|
||||
echo "search latest relase tag"
|
||||
echo "search latest release tag"
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | grep -E '^[0-9]+$' | sort -n | tail -1)
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "no any docker tags found, exiting..."
|
||||
@@ -31,7 +31,7 @@ echo "found ${VERSION}"
|
||||
rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
|
||||
mkdir neon_install
|
||||
|
||||
# retrive binaries from docker image
|
||||
# retrieve binaries from docker image
|
||||
echo "getting binaries from docker image"
|
||||
docker pull --quiet neondatabase/neon:${TAG}
|
||||
ID=$(docker create neondatabase/neon:${TAG})
|
||||
|
||||
19
.circleci/ansible/neon-stress.hosts
Normal file
19
.circleci/ansible/neon-stress.hosts
Normal file
@@ -0,0 +1,19 @@
|
||||
[pageservers]
|
||||
neon-stress-ps-1 console_region_id=1
|
||||
neon-stress-ps-2 console_region_id=1
|
||||
|
||||
[safekeepers]
|
||||
neon-stress-sk-1 console_region_id=1
|
||||
neon-stress-sk-2 console_region_id=1
|
||||
neon-stress-sk-3 console_region_id=1
|
||||
|
||||
[storage:children]
|
||||
pageservers
|
||||
safekeepers
|
||||
|
||||
[storage:vars]
|
||||
console_mgmt_base_url = http://neon-stress-console.local
|
||||
bucket_name = neon-storage-ireland
|
||||
bucket_region = eu-west-1
|
||||
etcd_endpoints = etcd-stress.local:2379
|
||||
safekeeper_enable_s3_offload = false
|
||||
@@ -1,5 +1,6 @@
|
||||
[pageservers]
|
||||
zenith-1-ps-1 console_region_id=1
|
||||
#zenith-1-ps-1 console_region_id=1
|
||||
zenith-1-ps-2 console_region_id=1
|
||||
|
||||
[safekeepers]
|
||||
zenith-1-sk-1 console_region_id=1
|
||||
@@ -11,6 +12,7 @@ pageservers
|
||||
safekeepers
|
||||
|
||||
[storage:vars]
|
||||
env_name = prod-1
|
||||
console_mgmt_base_url = http://console-release.local
|
||||
bucket_name = zenith-storage-oregon
|
||||
bucket_region = us-west-2
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
[pageservers]
|
||||
#zenith-us-stage-ps-1 console_region_id=27
|
||||
zenith-us-stage-ps-2 console_region_id=27
|
||||
zenith-us-stage-ps-3 console_region_id=27
|
||||
|
||||
[safekeepers]
|
||||
zenith-us-stage-sk-1 console_region_id=27
|
||||
zenith-us-stage-sk-2 console_region_id=27
|
||||
zenith-us-stage-sk-4 console_region_id=27
|
||||
zenith-us-stage-sk-5 console_region_id=27
|
||||
zenith-us-stage-sk-6 console_region_id=27
|
||||
|
||||
[storage:children]
|
||||
pageservers
|
||||
safekeepers
|
||||
|
||||
[storage:vars]
|
||||
env_name = us-stage
|
||||
console_mgmt_base_url = http://console-staging.local
|
||||
bucket_name = zenith-staging-storage-us-east-1
|
||||
bucket_region = us-east-1
|
||||
|
||||
@@ -5,8 +5,8 @@ After=network.target auditd.service
|
||||
[Service]
|
||||
Type=simple
|
||||
User=pageserver
|
||||
Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
|
||||
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -D /storage/pageserver/data
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
|
||||
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
KillSignal=SIGINT
|
||||
|
||||
@@ -5,8 +5,8 @@ After=network.target auditd.service
|
||||
[Service]
|
||||
Type=simple
|
||||
User=safekeeper
|
||||
Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }}
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
KillSignal=SIGINT
|
||||
|
||||
@@ -11,15 +11,6 @@ executors:
|
||||
- image: zimg/rust:1.58
|
||||
|
||||
jobs:
|
||||
check-codestyle-rust:
|
||||
executor: neon-xlarge-executor
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: rustfmt
|
||||
when: always
|
||||
command: cargo fmt --all -- --check
|
||||
|
||||
# A job to build postgres
|
||||
build-postgres:
|
||||
executor: neon-xlarge-executor
|
||||
@@ -109,10 +100,8 @@ jobs:
|
||||
name: Rust build << parameters.build_type >>
|
||||
command: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
|
||||
CARGO_FLAGS=
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=()
|
||||
CARGO_FLAGS="--release --features profiling"
|
||||
fi
|
||||
|
||||
@@ -121,7 +110,7 @@ jobs:
|
||||
export RUSTC_WRAPPER=cachepot
|
||||
export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
|
||||
export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
|
||||
"${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
|
||||
mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
|
||||
cachepot -s
|
||||
|
||||
- save_cache:
|
||||
@@ -137,32 +126,24 @@ jobs:
|
||||
name: cargo test
|
||||
command: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
|
||||
CARGO_FLAGS=
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=()
|
||||
CARGO_FLAGS=--release
|
||||
fi
|
||||
|
||||
"${cov_prefix[@]}" cargo test $CARGO_FLAGS
|
||||
cargo test $CARGO_FLAGS
|
||||
|
||||
# Install the rust binaries, for use by test jobs
|
||||
- run:
|
||||
name: Install rust binaries
|
||||
command: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=()
|
||||
fi
|
||||
|
||||
binaries=$(
|
||||
"${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
|
||||
cargo metadata --format-version=1 --no-deps |
|
||||
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
||||
)
|
||||
|
||||
test_exe_paths=$(
|
||||
"${cov_prefix[@]}" cargo test --message-format=json --no-run |
|
||||
cargo test --message-format=json --no-run |
|
||||
jq -r '.executable | select(. != null)'
|
||||
)
|
||||
|
||||
@@ -175,34 +156,15 @@ jobs:
|
||||
SRC=target/$BUILD_TYPE/$bin
|
||||
DST=/tmp/zenith/bin/$bin
|
||||
cp $SRC $DST
|
||||
echo $DST >> /tmp/zenith/etc/binaries.list
|
||||
done
|
||||
|
||||
# Install test executables (for code coverage)
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
for bin in $test_exe_paths; do
|
||||
SRC=$bin
|
||||
DST=/tmp/zenith/test_bin/$(basename $bin)
|
||||
cp $SRC $DST
|
||||
echo $DST >> /tmp/zenith/etc/binaries.list
|
||||
done
|
||||
fi
|
||||
|
||||
# Install the postgres binaries, for use by test jobs
|
||||
- run:
|
||||
name: Install postgres binaries
|
||||
command: |
|
||||
cp -a tmp_install /tmp/zenith/pg_install
|
||||
|
||||
- run:
|
||||
name: Merge coverage data
|
||||
command: |
|
||||
# This will speed up workspace uploads
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
|
||||
fi
|
||||
|
||||
# Save the rust binaries and coverage data for other jobs in this workflow.
|
||||
# Save rust binaries for other jobs in the workflow
|
||||
- persist_to_workspace:
|
||||
root: /tmp/zenith
|
||||
paths:
|
||||
@@ -222,6 +184,12 @@ jobs:
|
||||
key: v2-python-deps-{{ checksum "poetry.lock" }}
|
||||
paths:
|
||||
- /home/circleci/.cache/pypoetry/virtualenvs
|
||||
- run:
|
||||
name: Print versions
|
||||
when: always
|
||||
command: |
|
||||
poetry run python --version
|
||||
poetry show
|
||||
- run:
|
||||
name: Run yapf to ensure code format
|
||||
when: always
|
||||
@@ -289,7 +257,7 @@ jobs:
|
||||
# no_output_timeout, specified here.
|
||||
no_output_timeout: 10m
|
||||
environment:
|
||||
- ZENITH_BIN: /tmp/zenith/bin
|
||||
- NEON_BIN: /tmp/zenith/bin
|
||||
- POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
|
||||
- TEST_OUTPUT: /tmp/test_output
|
||||
# this variable will be embedded in perf test report
|
||||
@@ -317,12 +285,6 @@ jobs:
|
||||
|
||||
export GITHUB_SHA=$CIRCLE_SHA1
|
||||
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=()
|
||||
fi
|
||||
|
||||
# Run the tests.
|
||||
#
|
||||
# The junit.xml file allows CircleCI to display more fine-grained test information
|
||||
@@ -333,7 +295,7 @@ jobs:
|
||||
# -n4 uses four processes to run tests via pytest-xdist
|
||||
# -s is not used to prevent pytest from capturing output, because tests are running
|
||||
# in parallel and logs are mixed between different tests
|
||||
"${cov_prefix[@]}" ./scripts/pytest \
|
||||
./scripts/pytest \
|
||||
--junitxml=$TEST_OUTPUT/junit.xml \
|
||||
--tb=short \
|
||||
--verbose \
|
||||
@@ -355,74 +317,19 @@ jobs:
|
||||
when: always
|
||||
command: |
|
||||
du -sh /tmp/test_output/*
|
||||
find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" -delete
|
||||
find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
|
||||
du -sh /tmp/test_output/*
|
||||
- store_artifacts:
|
||||
path: /tmp/test_output
|
||||
# The store_test_results step tells CircleCI where to find the junit.xml file.
|
||||
- store_test_results:
|
||||
path: /tmp/test_output
|
||||
- run:
|
||||
name: Merge coverage data
|
||||
command: |
|
||||
# This will speed up workspace uploads
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
|
||||
fi
|
||||
# Save coverage data (if any)
|
||||
# Save data (if any)
|
||||
- persist_to_workspace:
|
||||
root: /tmp/zenith
|
||||
paths:
|
||||
- "*"
|
||||
|
||||
coverage-report:
|
||||
executor: neon-xlarge-executor
|
||||
steps:
|
||||
- attach_workspace:
|
||||
at: /tmp/zenith
|
||||
- checkout
|
||||
- restore_cache:
|
||||
name: Restore rust cache
|
||||
keys:
|
||||
# Require an exact match. While an out of date cache might speed up the build,
|
||||
# there's no way to clean out old packages, so the cache grows every time something
|
||||
# changes.
|
||||
- v04-rust-cache-deps-debug-{{ checksum "Cargo.lock" }}
|
||||
- run:
|
||||
name: Build coverage report
|
||||
command: |
|
||||
COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
|
||||
|
||||
scripts/coverage \
|
||||
--dir=/tmp/zenith/coverage report \
|
||||
--input-objects=/tmp/zenith/etc/binaries.list \
|
||||
--commit-url=$COMMIT_URL \
|
||||
--format=github
|
||||
- run:
|
||||
name: Upload coverage report
|
||||
command: |
|
||||
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
|
||||
REPORT_URL=https://neondatabase.github.io/zenith-coverage-data/$CIRCLE_SHA1
|
||||
COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
|
||||
|
||||
scripts/git-upload \
|
||||
--repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
|
||||
--message="Add code coverage for $COMMIT_URL" \
|
||||
copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE
|
||||
|
||||
# Add link to the coverage report to the commit
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "$CI_ACCESS_TOKEN" \
|
||||
--data \
|
||||
"{
|
||||
\"state\": \"success\",
|
||||
\"context\": \"zenith-coverage\",
|
||||
\"description\": \"Coverage report is ready\",
|
||||
\"target_url\": \"$REPORT_URL\"
|
||||
}"
|
||||
|
||||
# Build neondatabase/neon:latest image and push it to Docker hub
|
||||
docker-image:
|
||||
docker:
|
||||
@@ -456,9 +363,6 @@ jobs:
|
||||
- checkout
|
||||
- setup_remote_docker:
|
||||
docker_layer_caching: true
|
||||
# Build neondatabase/compute-tools:latest image and push it to Docker hub
|
||||
# TODO: this should probably also use versioned tag, not just :latest.
|
||||
# XXX: but should it? We build and use it only locally now.
|
||||
- run:
|
||||
name: Build and push compute-tools Docker image
|
||||
command: |
|
||||
@@ -466,7 +370,10 @@ jobs:
|
||||
docker build \
|
||||
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||
--tag neondatabase/compute-tools:latest -f Dockerfile.compute-tools .
|
||||
--tag neondatabase/compute-tools:local \
|
||||
--tag neondatabase/compute-tools:latest \
|
||||
-f Dockerfile.compute-tools .
|
||||
# Only push :latest image
|
||||
docker push neondatabase/compute-tools:latest
|
||||
- run:
|
||||
name: Init postgres submodule
|
||||
@@ -476,7 +383,9 @@ jobs:
|
||||
command: |
|
||||
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||
docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:latest vendor/postgres
|
||||
docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
|
||||
--tag neondatabase/compute-node:latest vendor/postgres \
|
||||
--build-arg COMPUTE_TOOLS_TAG=local
|
||||
docker push neondatabase/compute-node:${DOCKER_TAG}
|
||||
docker push neondatabase/compute-node:latest
|
||||
|
||||
@@ -513,9 +422,6 @@ jobs:
|
||||
- checkout
|
||||
- setup_remote_docker:
|
||||
docker_layer_caching: true
|
||||
# Build neondatabase/compute-tools:release image and push it to Docker hub
|
||||
# TODO: this should probably also use versioned tag, not just :latest.
|
||||
# XXX: but should it? We build and use it only locally now.
|
||||
- run:
|
||||
name: Build and push compute-tools Docker image
|
||||
command: |
|
||||
@@ -523,7 +429,10 @@ jobs:
|
||||
docker build \
|
||||
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||
--tag neondatabase/compute-tools:release -f Dockerfile.compute-tools .
|
||||
--tag neondatabase/compute-tools:release \
|
||||
--tag neondatabase/compute-tools:local \
|
||||
-f Dockerfile.compute-tools .
|
||||
# Only push :release image
|
||||
docker push neondatabase/compute-tools:release
|
||||
- run:
|
||||
name: Init postgres submodule
|
||||
@@ -533,7 +442,9 @@ jobs:
|
||||
command: |
|
||||
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||
docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:release vendor/postgres
|
||||
docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
|
||||
--tag neondatabase/compute-node:release vendor/postgres \
|
||||
--build-arg COMPUTE_TOOLS_TAG=local
|
||||
docker push neondatabase/compute-node:${DOCKER_TAG}
|
||||
docker push neondatabase/compute-node:release
|
||||
|
||||
@@ -587,6 +498,56 @@ jobs:
|
||||
helm upgrade neon-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
|
||||
deploy-neon-stress:
|
||||
docker:
|
||||
- image: cimg/python:3.10
|
||||
steps:
|
||||
- checkout
|
||||
- setup_remote_docker
|
||||
- run:
|
||||
name: Setup ansible
|
||||
command: |
|
||||
pip install --progress-bar off --user ansible boto3
|
||||
- run:
|
||||
name: Redeploy
|
||||
command: |
|
||||
cd "$(pwd)/.circleci/ansible"
|
||||
|
||||
./get_binaries.sh
|
||||
|
||||
echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key
|
||||
echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
|
||||
chmod 0600 ssh-key
|
||||
ssh-add ssh-key
|
||||
rm -f ssh-key ssh-key-cert.pub
|
||||
|
||||
ansible-playbook deploy.yaml -i neon-stress.hosts
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-neon-stress-proxy:
|
||||
docker:
|
||||
- image: cimg/base:2021.04
|
||||
environment:
|
||||
KUBECONFIG: .kubeconfig
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Store kubeconfig file
|
||||
command: |
|
||||
echo "${NEON_STRESS_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
|
||||
chmod 0600 ${KUBECONFIG}
|
||||
- run:
|
||||
name: Setup helm v3
|
||||
command: |
|
||||
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
- run:
|
||||
name: Re-deploy proxy
|
||||
command: |
|
||||
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||
helm upgrade neon-stress-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
helm upgrade neon-stress-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
|
||||
deploy-release:
|
||||
docker:
|
||||
- image: cimg/python:3.10
|
||||
@@ -629,61 +590,17 @@ jobs:
|
||||
name: Setup helm v3
|
||||
command: |
|
||||
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
helm repo add zenithdb https://neondatabase.github.io/helm-charts
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
- run:
|
||||
name: Re-deploy proxy
|
||||
command: |
|
||||
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||
helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
|
||||
# Trigger a new remote CI job
|
||||
remote-ci-trigger:
|
||||
docker:
|
||||
- image: cimg/base:2021.04
|
||||
parameters:
|
||||
remote_repo:
|
||||
type: string
|
||||
environment:
|
||||
REMOTE_REPO: << parameters.remote_repo >>
|
||||
steps:
|
||||
- run:
|
||||
name: Set PR's status to pending
|
||||
command: |
|
||||
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
|
||||
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "$CI_ACCESS_TOKEN" \
|
||||
--data \
|
||||
"{
|
||||
\"state\": \"pending\",
|
||||
\"context\": \"neon-cloud-e2e\",
|
||||
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
|
||||
}"
|
||||
- run:
|
||||
name: Request a remote CI test
|
||||
command: |
|
||||
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
|
||||
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "$CI_ACCESS_TOKEN" \
|
||||
--data \
|
||||
"{
|
||||
\"ref\": \"main\",
|
||||
\"inputs\": {
|
||||
\"ci_job_name\": \"neon-cloud-e2e\",
|
||||
\"commit_hash\": \"$CIRCLE_SHA1\",
|
||||
\"remote_repo\": \"$LOCAL_REPO\"
|
||||
}
|
||||
}"
|
||||
helm upgrade neon-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
|
||||
workflows:
|
||||
build_and_test:
|
||||
jobs:
|
||||
- check-codestyle-rust
|
||||
- check-codestyle-python
|
||||
- build-postgres:
|
||||
name: build-postgres-<< matrix.build_type >>
|
||||
@@ -699,7 +616,6 @@ workflows:
|
||||
- build-postgres-<< matrix.build_type >>
|
||||
- run-pytest:
|
||||
name: pg_regress-tests-<< matrix.build_type >>
|
||||
context: PERF_TEST_RESULT_CONNSTR
|
||||
matrix:
|
||||
parameters:
|
||||
build_type: ["debug", "release"]
|
||||
@@ -724,12 +640,6 @@ workflows:
|
||||
save_perf_report: true
|
||||
requires:
|
||||
- build-neon-release
|
||||
- coverage-report:
|
||||
# Context passes credentials for gh api
|
||||
context: CI_ACCESS_TOKEN
|
||||
requires:
|
||||
# TODO: consider adding more
|
||||
- other-tests-debug
|
||||
- docker-image:
|
||||
# Context gives an ability to login
|
||||
context: Docker Hub
|
||||
@@ -771,6 +681,25 @@ workflows:
|
||||
requires:
|
||||
- docker-image
|
||||
|
||||
- deploy-neon-stress:
|
||||
# Context gives an ability to login
|
||||
context: Docker Hub
|
||||
# deploy only for commits to main
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- main
|
||||
requires:
|
||||
- docker-image
|
||||
- deploy-neon-stress-proxy:
|
||||
# deploy only for commits to main
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- main
|
||||
requires:
|
||||
- docker-image
|
||||
|
||||
- docker-image-release:
|
||||
# Context gives an ability to login
|
||||
context: Docker Hub
|
||||
@@ -811,14 +740,3 @@ workflows:
|
||||
- release
|
||||
requires:
|
||||
- docker-image-release
|
||||
- remote-ci-trigger:
|
||||
# Context passes credentials for gh api
|
||||
context: CI_ACCESS_TOKEN
|
||||
remote_repo: "neondatabase/cloud"
|
||||
requires:
|
||||
# XXX: Successful build doesn't mean everything is OK, but
|
||||
# the job to be triggered takes so much time to complete (~22 min)
|
||||
# that it's better not to wait for the commented-out steps
|
||||
- build-neon-release
|
||||
# - pg_regress-tests-release
|
||||
# - other-tests-release
|
||||
|
||||
26
.circleci/helm-values/neon-stress.proxy-scram.yaml
Normal file
26
.circleci/helm-values/neon-stress.proxy-scram.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
fullnameOverride: "neon-stress-proxy-scram"
|
||||
|
||||
settings:
|
||||
authBackend: "console"
|
||||
authEndpoint: "http://neon-stress-console.local/management/api/v2"
|
||||
domain: "*.stress.neon.tech"
|
||||
|
||||
podLabels:
|
||||
zenith_service: proxy-scram
|
||||
zenith_env: staging
|
||||
zenith_region: eu-west-1
|
||||
zenith_region_slug: ireland
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: '*.stress.neon.tech'
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
34
.circleci/helm-values/neon-stress.proxy.yaml
Normal file
34
.circleci/helm-values/neon-stress.proxy.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
fullnameOverride: "neon-stress-proxy"
|
||||
|
||||
settings:
|
||||
authEndpoint: "https://console.dev.neon.tech/authenticate_proxy_request/"
|
||||
uri: "https://console.dev.neon.tech/psql_session/"
|
||||
|
||||
# -- Additional labels for zenith-proxy pods
|
||||
podLabels:
|
||||
zenith_service: proxy
|
||||
zenith_env: staging
|
||||
zenith_region: eu-west-1
|
||||
zenith_region_slug: ireland
|
||||
|
||||
service:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internal
|
||||
external-dns.alpha.kubernetes.io/hostname: neon-stress-proxy.local
|
||||
type: LoadBalancer
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: connect.dev.neon.tech
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
24
.circleci/helm-values/production.proxy-scram.yaml
Normal file
24
.circleci/helm-values/production.proxy-scram.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
settings:
|
||||
authBackend: "console"
|
||||
authEndpoint: "http://console-release.local/management/api/v2"
|
||||
domain: "*.cloud.neon.tech"
|
||||
|
||||
podLabels:
|
||||
zenith_service: proxy-scram
|
||||
zenith_env: production
|
||||
zenith_region: us-west-2
|
||||
zenith_region_slug: oregon
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: '*.cloud.neon.tech'
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
@@ -1,9 +1,3 @@
|
||||
# Helm chart values for zenith-proxy.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
image:
|
||||
repository: neondatabase/neon
|
||||
|
||||
settings:
|
||||
authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
|
||||
uri: "https://console.neon.tech/psql_session/"
|
||||
@@ -28,7 +22,7 @@ exposedService:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: start.zenith.tech,connect.neon.tech,pg.neon.tech
|
||||
external-dns.alpha.kubernetes.io/hostname: connect.neon.tech,pg.neon.tech
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
|
||||
@@ -9,8 +9,8 @@ tmp_install
|
||||
tmp_check_cli
|
||||
test_output
|
||||
.vscode
|
||||
.zenith
|
||||
integration_tests/.zenith
|
||||
.neon
|
||||
integration_tests/.neon
|
||||
.mypy_cache
|
||||
|
||||
Dockerfile
|
||||
|
||||
140
.github/actions/run-python-test-set/action.yml
vendored
Normal file
140
.github/actions/run-python-test-set/action.yml
vendored
Normal file
@@ -0,0 +1,140 @@
|
||||
name: 'Run python test'
|
||||
description: 'Runs a Neon python test set, performing all the required preparations before'
|
||||
|
||||
inputs:
|
||||
build_type:
|
||||
description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug".'
|
||||
required: true
|
||||
rust_toolchain:
|
||||
description: 'Rust toolchain version to fetch the caches'
|
||||
required: true
|
||||
test_selection:
|
||||
description: 'A python test suite to run'
|
||||
required: true
|
||||
extra_params:
|
||||
description: 'Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr'
|
||||
required: false
|
||||
default: ''
|
||||
needs_postgres_source:
|
||||
description: 'Set to true if the test suite requires postgres source checked out'
|
||||
required: false
|
||||
default: 'false'
|
||||
run_in_parallel:
|
||||
description: 'Whether to run tests in parallel'
|
||||
required: false
|
||||
default: 'true'
|
||||
save_perf_report:
|
||||
description: 'Whether to upload the performance report'
|
||||
required: false
|
||||
default: 'false'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Get Neon artifact for restoration
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-artifact
|
||||
path: ./neon-artifact/
|
||||
|
||||
- name: Extract Neon artifact
|
||||
shell: bash -ex {0}
|
||||
run: |
|
||||
mkdir -p /tmp/neon/
|
||||
tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
|
||||
rm -rf ./neon-artifact/
|
||||
|
||||
- name: Checkout
|
||||
if: inputs.needs_postgres_source == 'true'
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Cache poetry deps
|
||||
id: cache_poetry
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: Install Python deps
|
||||
shell: bash -ex {0}
|
||||
run: ./scripts/pysync
|
||||
|
||||
- name: Run pytest
|
||||
env:
|
||||
NEON_BIN: /tmp/neon/bin
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
# this variable will be embedded in perf test report
|
||||
# and is needed to distinguish different environments
|
||||
PLATFORM: github-actions-selfhosted
|
||||
shell: bash -ex {0}
|
||||
run: |
|
||||
PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
|
||||
rm -rf $PERF_REPORT_DIR
|
||||
|
||||
TEST_SELECTION="test_runner/${{ inputs.test_selection }}"
|
||||
EXTRA_PARAMS="${{ inputs.extra_params }}"
|
||||
if [ -z "$TEST_SELECTION" ]; then
|
||||
echo "test_selection must be set"
|
||||
exit 1
|
||||
fi
|
||||
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
|
||||
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
|
||||
fi
|
||||
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
|
||||
if [[ "$GITHUB_REF" == "main" ]]; then
|
||||
mkdir -p "$PERF_REPORT_DIR"
|
||||
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "${{ inputs.build_type }}" == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
elif [[ "${{ inputs.build_type }}" == "release" ]]; then
|
||||
cov_prefix=()
|
||||
fi
|
||||
|
||||
# Run the tests.
|
||||
#
|
||||
# The junit.xml file allows CircleCI to display more fine-grained test information
|
||||
# in its "Tests" tab in the results page.
|
||||
# --verbose prints name of each test (helpful when there are
|
||||
# multiple tests in one file)
|
||||
# -rA prints summary in the end
|
||||
# -n4 uses four processes to run tests via pytest-xdist
|
||||
# -s is not used to prevent pytest from capturing output, because tests are running
|
||||
# in parallel and logs are mixed between different tests
|
||||
"${cov_prefix[@]}" ./scripts/pytest \
|
||||
--junitxml=$TEST_OUTPUT/junit.xml \
|
||||
--tb=short \
|
||||
--verbose \
|
||||
-m "not remote_cluster" \
|
||||
-rA $TEST_SELECTION $EXTRA_PARAMS
|
||||
|
||||
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
|
||||
if [[ "$GITHUB_REF" == "main" ]]; then
|
||||
export REPORT_FROM="$PERF_REPORT_DIR"
|
||||
export REPORT_TO=local
|
||||
scripts/generate_and_push_perf_report.sh
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Delete all data but logs
|
||||
shell: bash -ex {0}
|
||||
if: always()
|
||||
run: |
|
||||
du -sh /tmp/test_output/*
|
||||
find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
|
||||
du -sh /tmp/test_output/*
|
||||
|
||||
- name: Upload python test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
retention-days: 7
|
||||
if-no-files-found: error
|
||||
name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs
|
||||
path: /tmp/test_output/
|
||||
17
.github/actions/save-coverage-data/action.yml
vendored
Normal file
17
.github/actions/save-coverage-data/action.yml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
name: 'Merge and upload coverage data'
|
||||
description: 'Compresses and uploads the coverage data as an artifact'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Merge coverage data
|
||||
shell: bash -ex {0}
|
||||
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
|
||||
|
||||
- name: Upload coverage data
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
retention-days: 7
|
||||
if-no-files-found: error
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage/
|
||||
2
.github/workflows/benchmarking.yml
vendored
2
.github/workflows/benchmarking.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
bench:
|
||||
# this workflow runs on self hosteed runner
|
||||
# it's environment is quite different from usual guthub runner
|
||||
# probably the most important difference is that it doesnt start from clean workspace each time
|
||||
# probably the most important difference is that it doesn't start from clean workspace each time
|
||||
# e g if you install system packages they are not cleaned up since you install them directly in host machine
|
||||
# not a container or something
|
||||
# See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
|
||||
|
||||
389
.github/workflows/build_and_test.yml
vendored
Normal file
389
.github/workflows/build_and_test.yml
vendored
Normal file
@@ -0,0 +1,389 @@
|
||||
name: Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -ex {0}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
|
||||
jobs:
|
||||
build-postgres:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ debug, release ]
|
||||
rust_toolchain: [ 1.58 ]
|
||||
|
||||
env:
|
||||
BUILD_TYPE: ${{ matrix.build_type }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Set pg revision for caching
|
||||
id: pg_ver
|
||||
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
|
||||
|
||||
- name: Cache postgres build
|
||||
id: cache_pg
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: tmp_install/
|
||||
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_ver.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Build postgres
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres -j$(nproc)
|
||||
|
||||
# actions/cache@v3 does not allow concurrently using the same cache across job steps, so use a separate cache
|
||||
- name: Prepare postgres artifact
|
||||
run: tar -C tmp_install/ -czf ./pg.tgz .
|
||||
- name: Upload postgres artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
retention-days: 7
|
||||
if-no-files-found: error
|
||||
name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
|
||||
path: ./pg.tgz
|
||||
|
||||
|
||||
build-neon:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ build-postgres ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ debug, release ]
|
||||
rust_toolchain: [ 1.58 ]
|
||||
|
||||
env:
|
||||
BUILD_TYPE: ${{ matrix.build_type }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Get postgres artifact for restoration
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
|
||||
path: ./postgres-artifact/
|
||||
- name: Extract postgres artifact
|
||||
run: |
|
||||
mkdir ./tmp_install/
|
||||
tar -xf ./postgres-artifact/pg.tgz -C ./tmp_install/
|
||||
rm -rf ./postgres-artifact/
|
||||
|
||||
- name: Cache cargo deps
|
||||
id: cache_cargo
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry/
|
||||
~/.cargo/git/
|
||||
target/
|
||||
# Fall back to older versions of the key, if no cache for current Cargo.lock was found
|
||||
key: |
|
||||
v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
|
||||
v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
|
||||
|
||||
- name: Run cargo build
|
||||
run: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
CARGO_FLAGS=
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=()
|
||||
CARGO_FLAGS="--release --features profiling"
|
||||
fi
|
||||
|
||||
"${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
|
||||
|
||||
- name: Run cargo test
|
||||
run: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
CARGO_FLAGS=
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=()
|
||||
CARGO_FLAGS=--release
|
||||
fi
|
||||
|
||||
"${cov_prefix[@]}" cargo test $CARGO_FLAGS
|
||||
|
||||
- name: Install rust binaries
|
||||
run: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=()
|
||||
fi
|
||||
|
||||
binaries=$(
|
||||
"${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
|
||||
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
||||
)
|
||||
|
||||
test_exe_paths=$(
|
||||
"${cov_prefix[@]}" cargo test --message-format=json --no-run |
|
||||
jq -r '.executable | select(. != null)'
|
||||
)
|
||||
|
||||
mkdir -p /tmp/neon/bin/
|
||||
mkdir -p /tmp/neon/test_bin/
|
||||
mkdir -p /tmp/neon/etc/
|
||||
|
||||
# Keep bloated coverage data files away from the rest of the artifact
|
||||
mkdir -p /tmp/coverage/
|
||||
|
||||
# Install target binaries
|
||||
for bin in $binaries; do
|
||||
SRC=target/$BUILD_TYPE/$bin
|
||||
DST=/tmp/neon/bin/$bin
|
||||
cp "$SRC" "$DST"
|
||||
done
|
||||
|
||||
# Install test executables and write list of all binaries (for code coverage)
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
for bin in $binaries; do
|
||||
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
|
||||
done
|
||||
for bin in $test_exe_paths; do
|
||||
SRC=$bin
|
||||
DST=/tmp/neon/test_bin/$(basename $bin)
|
||||
cp "$SRC" "$DST"
|
||||
echo "$DST" >> /tmp/coverage/binaries.list
|
||||
done
|
||||
fi
|
||||
|
||||
- name: Install postgres binaries
|
||||
run: cp -a tmp_install /tmp/neon/pg_install
|
||||
|
||||
- name: Prepare neon artifact
|
||||
run: tar -C /tmp/neon/ -czf ./neon.tgz .
|
||||
|
||||
- name: Upload neon binaries
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
retention-days: 7
|
||||
if-no-files-found: error
|
||||
name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
|
||||
path: ./neon.tgz
|
||||
|
||||
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
|
||||
- name: Merge and upload coverage data
|
||||
if: matrix.build_type == 'debug'
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
|
||||
|
||||
pg_regress-tests:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ build-neon ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ debug, release ]
|
||||
rust_toolchain: [ 1.58 ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Pytest regress tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ matrix.build_type }}
|
||||
rust_toolchain: ${{ matrix.rust_toolchain }}
|
||||
test_selection: batch_pg_regress
|
||||
needs_postgres_source: true
|
||||
|
||||
- name: Merge and upload coverage data
|
||||
if: matrix.build_type == 'debug'
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
|
||||
other-tests:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ build-neon ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ debug, release ]
|
||||
rust_toolchain: [ 1.58 ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Pytest other tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ matrix.build_type }}
|
||||
rust_toolchain: ${{ matrix.rust_toolchain }}
|
||||
test_selection: batch_others
|
||||
|
||||
- name: Merge and upload coverage data
|
||||
if: matrix.build_type == 'debug'
|
||||
uses: ./.github/actions/save-coverage-data
|
||||
|
||||
benchmarks:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ build-neon ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ release ]
|
||||
rust_toolchain: [ 1.58 ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Pytest benchmarks
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ matrix.build_type }}
|
||||
rust_toolchain: ${{ matrix.rust_toolchain }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: true
|
||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||
# while coverage is currently collected for the debug ones
|
||||
|
||||
coverage-report:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ other-tests, pg_regress-tests ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ debug ]
|
||||
rust_toolchain: [ 1.58 ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Restore cargo deps cache
|
||||
id: cache_cargo
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry/
|
||||
~/.cargo/git/
|
||||
target/
|
||||
key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
|
||||
|
||||
- name: Get Neon artifact for restoration
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
|
||||
path: ./neon-artifact/
|
||||
|
||||
- name: Extract Neon artifact
|
||||
run: |
|
||||
mkdir -p /tmp/neon/
|
||||
tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
|
||||
rm -rf ./neon-artifact/
|
||||
|
||||
- name: Restore coverage data
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage/
|
||||
|
||||
- name: Merge coverage data
|
||||
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
|
||||
|
||||
- name: Build and upload coverage report
|
||||
run: |
|
||||
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
|
||||
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
|
||||
COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
|
||||
|
||||
scripts/coverage \
|
||||
--dir=/tmp/coverage report \
|
||||
--input-objects=/tmp/coverage/binaries.list \
|
||||
--commit-url=$COMMIT_URL \
|
||||
--format=github
|
||||
|
||||
REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA
|
||||
|
||||
scripts/git-upload \
|
||||
--repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
|
||||
--message="Add code coverage for $COMMIT_URL" \
|
||||
copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
|
||||
|
||||
# Add link to the coverage report to the commit
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
|
||||
--data \
|
||||
"{
|
||||
\"state\": \"success\",
|
||||
\"context\": \"neon-coverage\",
|
||||
\"description\": \"Coverage report is ready\",
|
||||
\"target_url\": \"$REPORT_URL\"
|
||||
}"
|
||||
|
||||
trigger-e2e-tests:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ build-neon ]
|
||||
steps:
|
||||
- name: Set PR's status to pending and request a remote CI test
|
||||
run: |
|
||||
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
|
||||
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
|
||||
|
||||
REMOTE_REPO="${{ github.repository_owner }}/cloud"
|
||||
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
|
||||
--data \
|
||||
"{
|
||||
\"state\": \"pending\",
|
||||
\"context\": \"neon-cloud-e2e\",
|
||||
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
|
||||
}"
|
||||
|
||||
curl -f -X POST \
|
||||
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
|
||||
-H "Accept: application/vnd.github.v3+json" \
|
||||
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
|
||||
--data \
|
||||
"{
|
||||
\"ref\": \"main\",
|
||||
\"inputs\": {
|
||||
\"ci_job_name\": \"neon-cloud-e2e\",
|
||||
\"commit_hash\": \"$COMMIT_SHA\",
|
||||
\"remote_repo\": \"${{ github.repository }}\"
|
||||
}
|
||||
}"
|
||||
133
.github/workflows/codestyle.yml
vendored
Normal file
133
.github/workflows/codestyle.yml
vendored
Normal file
@@ -0,0 +1,133 @@
|
||||
name: Check code style and build
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -ex {0}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
|
||||
jobs:
|
||||
check-codestyle-rust:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# If we want to duplicate this job for different
|
||||
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
|
||||
rust_toolchain: [1.58]
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
timeout-minutes: 50
|
||||
name: run regression test suite
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Install rust toolchain ${{ matrix.rust_toolchain }}
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: ${{ matrix.rust_toolchain }}
|
||||
components: rustfmt, clippy
|
||||
override: true
|
||||
|
||||
- name: Check formatting
|
||||
run: cargo fmt --all -- --check
|
||||
|
||||
- name: Install Ubuntu postgres dependencies
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev
|
||||
|
||||
- name: Install macOS postgres dependencies
|
||||
if: matrix.os == 'macos-latest'
|
||||
run: brew install flex bison openssl
|
||||
|
||||
- name: Set pg revision for caching
|
||||
id: pg_ver
|
||||
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
|
||||
|
||||
- name: Cache postgres build
|
||||
id: cache_pg
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
tmp_install/
|
||||
key: ${{ runner.os }}-pg-${{ steps.pg_ver.outputs.pg_rev }}
|
||||
|
||||
- name: Set extra env for macOS
|
||||
if: matrix.os == 'macos-latest'
|
||||
run: |
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Build postgres
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: make postgres
|
||||
|
||||
# Plain configure output can contain weird errors like 'error: C compiler cannot create executables'
|
||||
# and the real cause will be inside config.log
|
||||
- name: Print configure logs in case of failure
|
||||
if: failure()
|
||||
continue-on-error: true
|
||||
run: |
|
||||
echo '' && echo '=== config.log ===' && echo ''
|
||||
cat tmp_install/build/config.log
|
||||
echo '' && echo '=== configure.log ===' && echo ''
|
||||
cat tmp_install/build/configure.log
|
||||
|
||||
- name: Cache cargo deps
|
||||
id: cache_cargo
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
target
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
|
||||
|
||||
- name: Run cargo clippy
|
||||
run: ./run_clippy.sh
|
||||
|
||||
- name: Ensure all project builds
|
||||
run: cargo build --all --all-targets
|
||||
|
||||
check-codestyle-python:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: false
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Cache poetry deps
|
||||
id: cache_poetry
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: Install Python deps
|
||||
run: ./scripts/pysync
|
||||
|
||||
- name: Run yapf to ensure code format
|
||||
run: poetry run yapf --recursive --diff .
|
||||
|
||||
- name: Run mypy to check types
|
||||
run: poetry run mypy .
|
||||
74
.github/workflows/pg_clients.yml
vendored
Normal file
74
.github/workflows/pg_clients.yml
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
name: Test Postgres client libraries
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '23 02 * * *' # run once a day, timezone is utc
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
test-postgres-client-libs:
|
||||
runs-on: [ ubuntu-latest ]
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.9
|
||||
|
||||
- name: Install Poetry
|
||||
uses: snok/install-poetry@v1
|
||||
|
||||
- name: Cache poetry deps
|
||||
id: cache_poetry
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: Install Python deps
|
||||
shell: bash -ex {0}
|
||||
run: ./scripts/pysync
|
||||
|
||||
- name: Run pytest
|
||||
env:
|
||||
REMOTE_ENV: 1
|
||||
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
# this variable will be embedded in perf test report
|
||||
# and is needed to distinguish different environments
|
||||
PLATFORM: github-actions-selfhosted
|
||||
shell: bash -ex {0}
|
||||
run: |
|
||||
# Test framework expects we have psql binary;
|
||||
# but since we don't really need it in this test, let's mock it
|
||||
mkdir -p "$POSTGRES_DISTRIB_DIR/bin" && touch "$POSTGRES_DISTRIB_DIR/bin/psql";
|
||||
./scripts/pytest \
|
||||
--junitxml=$TEST_OUTPUT/junit.xml \
|
||||
--tb=short \
|
||||
--verbose \
|
||||
-m "remote_cluster" \
|
||||
-rA "test_runner/pg_clients"
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: failure()
|
||||
id: slack
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
71
.github/workflows/testing.yml
vendored
71
.github/workflows/testing.yml
vendored
@@ -1,71 +0,0 @@
|
||||
name: Build and Test
|
||||
|
||||
on: push
|
||||
|
||||
jobs:
|
||||
regression-check:
|
||||
strategy:
|
||||
matrix:
|
||||
# If we want to duplicate this job for different
|
||||
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
|
||||
rust_toolchain: [stable]
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
timeout-minutes: 30
|
||||
name: run regression test suite
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 2
|
||||
|
||||
- name: install rust toolchain ${{ matrix.rust_toolchain }}
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: ${{ matrix.rust_toolchain }}
|
||||
override: true
|
||||
|
||||
- name: Install Ubuntu postgres dependencies
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev
|
||||
|
||||
- name: Install macOs postgres dependencies
|
||||
if: matrix.os == 'macos-latest'
|
||||
run: brew install flex bison
|
||||
|
||||
- name: Set pg revision for caching
|
||||
id: pg_ver
|
||||
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
|
||||
|
||||
- name: Cache postgres build
|
||||
id: cache_pg
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
tmp_install/
|
||||
key: ${{ runner.os }}-pg-${{ steps.pg_ver.outputs.pg_rev }}
|
||||
|
||||
- name: Build postgres
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: make postgres
|
||||
|
||||
- name: Cache cargo deps
|
||||
id: cache_cargo
|
||||
uses: actions/cache@v2
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
target
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}
|
||||
|
||||
- name: Run cargo clippy
|
||||
run: ./run_clippy.sh
|
||||
|
||||
- name: Run cargo test
|
||||
run: cargo test --all --all-targets
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -5,8 +5,9 @@
|
||||
__pycache__/
|
||||
test_output/
|
||||
.vscode
|
||||
/.zenith
|
||||
/integration_tests/.zenith
|
||||
.idea
|
||||
/.neon
|
||||
/integration_tests/.neon
|
||||
|
||||
# Coverage
|
||||
*.profraw
|
||||
|
||||
@@ -6,5 +6,5 @@ target/
|
||||
tmp_install/
|
||||
__pycache__/
|
||||
test_output/
|
||||
.zenith/
|
||||
.neon/
|
||||
.git/
|
||||
|
||||
20
COPYRIGHT
20
COPYRIGHT
@@ -1,20 +0,0 @@
|
||||
This software is licensed under the Apache 2.0 License:
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
Copyright 2021 Zenith Labs, Inc
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
The PostgreSQL submodule in vendor/postgres is licensed under the
|
||||
PostgreSQL license. See vendor/postgres/COPYRIGHT.
|
||||
205
Cargo.lock
generated
205
Cargo.lock
generated
@@ -64,6 +64,45 @@ dependencies = [
|
||||
"nodrop",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30ff05a702273012438132f449575dbc804e27b2f3cbe3069aa237d26c98fa33"
|
||||
dependencies = [
|
||||
"asn1-rs-derive",
|
||||
"asn1-rs-impl",
|
||||
"displaydoc",
|
||||
"nom",
|
||||
"num-traits",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"time 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs-derive"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db8b7511298d5b7784b40b092d9e9dcd3a627a5707e4b5e507931ab0d44eeebf"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs-impl"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-stream"
|
||||
version = "0.3.3"
|
||||
@@ -166,7 +205,7 @@ dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"miniz_oxide 0.4.4",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
]
|
||||
@@ -363,6 +402,16 @@ dependencies = [
|
||||
"textwrap 0.14.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "close_fds"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3bc416f33de9d59e79e57560f450d21ff8393adcf1cdfc3e6d8fb93d5f88a2ed"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cmake"
|
||||
version = "0.1.48"
|
||||
@@ -412,6 +461,7 @@ dependencies = [
|
||||
"tar",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"url",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
@@ -702,6 +752,12 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "data-encoding"
|
||||
version = "2.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
|
||||
|
||||
[[package]]
|
||||
name = "debugid"
|
||||
version = "0.7.3"
|
||||
@@ -711,6 +767,20 @@ dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "der-parser"
|
||||
version = "7.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe398ac75057914d7d07307bf67dc7f3f574a26783b4fc7805a20ffa9f506e82"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
"displaydoc",
|
||||
"nom",
|
||||
"num-bigint",
|
||||
"num-traits",
|
||||
"rusticata-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.9.0"
|
||||
@@ -752,6 +822,17 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "displaydoc"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.6.1"
|
||||
@@ -801,6 +882,7 @@ name = "etcd_broker"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"etcd-client",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -868,6 +950,18 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "279fb028e20b3c4c320317955b77c5e0c9701f05a1d309905d6fc702cdc5053e"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b39522e96686d38f4bc984b9198e3a0613264abaebaff2c5c918bfa6b6da09af"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crc32fast",
|
||||
"libc",
|
||||
"miniz_oxide 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
@@ -1527,6 +1621,15 @@ dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.2"
|
||||
@@ -1582,6 +1685,7 @@ dependencies = [
|
||||
"clap 3.0.14",
|
||||
"comfy-table",
|
||||
"control_plane",
|
||||
"git-version",
|
||||
"pageserver",
|
||||
"postgres",
|
||||
"postgres_ffi",
|
||||
@@ -1699,10 +1803,19 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.9.0"
|
||||
name = "oid-registry"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
|
||||
checksum = "38e20717fa0541f39bd146692035c37bedfa532b3e5071b35761082407546b2a"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
|
||||
|
||||
[[package]]
|
||||
name = "oorandom"
|
||||
@@ -1767,12 +1880,15 @@ dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
"clap 3.0.14",
|
||||
"close_fds",
|
||||
"const_format",
|
||||
"crc32c",
|
||||
"crossbeam-utils",
|
||||
"daemonize",
|
||||
"etcd_broker",
|
||||
"fail",
|
||||
"futures",
|
||||
"git-version",
|
||||
"hex",
|
||||
"hex-literal",
|
||||
"humantime",
|
||||
@@ -1806,6 +1922,7 @@ dependencies = [
|
||||
"tracing",
|
||||
"url",
|
||||
"utils",
|
||||
"walkdir",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
@@ -2023,15 +2140,18 @@ dependencies = [
|
||||
"bytes",
|
||||
"chrono",
|
||||
"crc32c",
|
||||
"env_logger",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"memoffset",
|
||||
"postgres",
|
||||
"rand",
|
||||
"regex",
|
||||
"serde",
|
||||
"thiserror",
|
||||
"utils",
|
||||
"wal_generate",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
@@ -2085,6 +2205,20 @@ dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "procfs"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95e344cafeaeefe487300c361654bcfc85db3ac53619eeccced29f5ea18c4c70"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"byteorder",
|
||||
"flate2",
|
||||
"hex",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prometheus"
|
||||
version = "0.13.0"
|
||||
@@ -2094,8 +2228,10 @@ dependencies = [
|
||||
"cfg-if",
|
||||
"fnv",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"memchr",
|
||||
"parking_lot 0.11.2",
|
||||
"procfs",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
@@ -2164,6 +2300,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"clap 3.0.14",
|
||||
"futures",
|
||||
"git-version",
|
||||
"hashbrown",
|
||||
"hex",
|
||||
"hmac 0.12.1",
|
||||
@@ -2193,6 +2330,7 @@ dependencies = [
|
||||
"url",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
"x509-parser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2350,6 +2488,8 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
"rusoto_core",
|
||||
"rusoto_s3",
|
||||
"serde",
|
||||
@@ -2357,6 +2497,7 @@ dependencies = [
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-util 0.7.0",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -2561,6 +2702,15 @@ dependencies = [
|
||||
"semver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rusticata-macros"
|
||||
version = "4.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632"
|
||||
dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.20.4"
|
||||
@@ -2608,6 +2758,7 @@ name = "safekeeper"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"clap 3.0.14",
|
||||
@@ -2616,11 +2767,14 @@ dependencies = [
|
||||
"daemonize",
|
||||
"etcd_broker",
|
||||
"fs2",
|
||||
"futures",
|
||||
"git-version",
|
||||
"hex",
|
||||
"humantime",
|
||||
"hyper",
|
||||
"lazy_static",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
"postgres",
|
||||
"postgres-protocol",
|
||||
"postgres_ffi",
|
||||
@@ -2634,6 +2788,7 @@ dependencies = [
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-util 0.7.0",
|
||||
"toml_edit",
|
||||
"tracing",
|
||||
"url",
|
||||
"utils",
|
||||
@@ -2995,6 +3150,18 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8"
|
||||
|
||||
[[package]]
|
||||
name = "synstructure"
|
||||
version = "0.12.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tar"
|
||||
version = "0.4.38"
|
||||
@@ -3585,6 +3752,18 @@ version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wal_generate"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap 3.0.14",
|
||||
"env_logger",
|
||||
"log",
|
||||
"postgres",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.2"
|
||||
@@ -3845,6 +4024,24 @@ dependencies = [
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "x509-parser"
|
||||
version = "0.13.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fb9bace5b5589ffead1afb76e43e34cff39cd0f3ce7e170ae0c29e53b88eb1c"
|
||||
dependencies = [
|
||||
"asn1-rs",
|
||||
"base64",
|
||||
"data-encoding",
|
||||
"der-parser",
|
||||
"lazy_static",
|
||||
"nom",
|
||||
"oid-registry",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"time 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xattr"
|
||||
version = "0.2.2"
|
||||
|
||||
12
Dockerfile
12
Dockerfile
@@ -1,5 +1,5 @@
|
||||
# Build Postgres
|
||||
FROM zimg/rust:1.58 AS pg-build
|
||||
FROM neondatabase/rust:1.58 AS pg-build
|
||||
WORKDIR /pg
|
||||
|
||||
USER root
|
||||
@@ -14,7 +14,7 @@ RUN set -e \
|
||||
&& tar -C tmp_install -czf /postgres_install.tar.gz .
|
||||
|
||||
# Build zenith binaries
|
||||
FROM zimg/rust:1.58 AS build
|
||||
FROM neondatabase/rust:1.58 AS build
|
||||
ARG GIT_VERSION=local
|
||||
|
||||
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
|
||||
@@ -25,7 +25,7 @@ COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/inclu
|
||||
COPY . .
|
||||
|
||||
# Show build caching stats to check if it was used in the end.
|
||||
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, loosing the compilation stats.
|
||||
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
|
||||
RUN set -e \
|
||||
&& sudo -E "PATH=$PATH" mold -run cargo build --release \
|
||||
&& cachepot -s
|
||||
@@ -46,9 +46,9 @@ RUN set -e \
|
||||
&& useradd -d /data zenith \
|
||||
&& chown -R zenith:zenith /data
|
||||
|
||||
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/proxy /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/runner/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/runner/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build --chown=zenith:zenith /home/runner/target/release/proxy /usr/local/bin
|
||||
|
||||
COPY --from=pg-build /pg/tmp_install/ /usr/local/
|
||||
COPY --from=pg-build /postgres_install.tar.gz /data/
|
||||
|
||||
@@ -1,95 +0,0 @@
|
||||
#
|
||||
# Docker image for console integration testing.
|
||||
#
|
||||
# We may also reuse it in CI to unify installation process and as a general binaries building
|
||||
# tool for production servers.
|
||||
#
|
||||
# Dynamic linking is used for librocksdb and libstdc++ bacause librocksdb-sys calls
|
||||
# bindgen with "dynamic" feature flag. This also prevents usage of dockerhub alpine-rust
|
||||
# images which are statically linked and have guards against any dlopen. I would rather
|
||||
# prefer all static binaries so we may change the way librocksdb-sys builds or wait until
|
||||
# we will have our own storage and drop rockdb dependency.
|
||||
#
|
||||
# Cargo-chef is used to separate dependencies building from main binaries building. This
|
||||
# way `docker build` will download and install dependencies only of there are changes to
|
||||
# out Cargo.toml files.
|
||||
#
|
||||
|
||||
|
||||
#
|
||||
# build postgres separately -- this layer will be rebuilt only if one of
|
||||
# mentioned paths will get any changes
|
||||
#
|
||||
FROM alpine:3.13 as pg-build
|
||||
RUN apk add --update clang llvm compiler-rt compiler-rt-static lld musl-dev binutils \
|
||||
make bison flex readline-dev zlib-dev perl linux-headers libseccomp-dev
|
||||
WORKDIR zenith
|
||||
COPY ./vendor/postgres vendor/postgres
|
||||
COPY ./Makefile Makefile
|
||||
# Build using clang and lld
|
||||
RUN CC='clang' LD='lld' CFLAGS='-fuse-ld=lld --rtlib=compiler-rt' make postgres -j4
|
||||
|
||||
#
|
||||
# Calculate cargo dependencies.
|
||||
# This will always run, but only generate recipe.json with list of dependencies without
|
||||
# installing them.
|
||||
#
|
||||
FROM alpine:20210212 as cargo-deps-inspect
|
||||
RUN apk add --update rust cargo
|
||||
RUN cargo install cargo-chef
|
||||
WORKDIR zenith
|
||||
COPY . .
|
||||
RUN cargo chef prepare --recipe-path recipe.json
|
||||
|
||||
#
|
||||
# Build cargo dependencies.
|
||||
# This temp cantainner would be build only if recipe.json was changed.
|
||||
#
|
||||
FROM alpine:20210212 as deps-build
|
||||
RUN apk add --update rust cargo openssl-dev clang build-base
|
||||
# rust-rocksdb can be built against system-wide rocksdb -- that saves about
|
||||
# 10 minutes during build. Rocksdb apk package is in testing now, but use it
|
||||
# anyway. In case of any troubles we can download and build rocksdb here manually
|
||||
# (to cache it as a docker layer).
|
||||
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
|
||||
WORKDIR zenith
|
||||
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
|
||||
COPY --from=cargo-deps-inspect /root/.cargo/bin/cargo-chef /root/.cargo/bin/
|
||||
COPY --from=cargo-deps-inspect /zenith/recipe.json recipe.json
|
||||
RUN ROCKSDB_LIB_DIR=/usr/lib/ cargo chef cook --release --recipe-path recipe.json
|
||||
|
||||
#
|
||||
# Build zenith binaries
|
||||
#
|
||||
FROM alpine:20210212 as build
|
||||
RUN apk add --update rust cargo openssl-dev clang build-base
|
||||
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
|
||||
WORKDIR zenith
|
||||
COPY . .
|
||||
# Copy cached dependencies
|
||||
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
|
||||
COPY --from=deps-build /zenith/target target
|
||||
COPY --from=deps-build /root/.cargo /root/.cargo
|
||||
RUN cargo build --release
|
||||
|
||||
#
|
||||
# Copy binaries to resulting image.
|
||||
# build-base hare to provide libstdc++ (it will also bring gcc, but leave it this way until we figure
|
||||
# out how to statically link rocksdb or avoid it at all).
|
||||
#
|
||||
FROM alpine:3.13
|
||||
RUN apk add --update openssl build-base libseccomp-dev
|
||||
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb
|
||||
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
|
||||
COPY --from=build /zenith/target/release/proxy /usr/local/bin
|
||||
COPY --from=pg-build /zenith/tmp_install /usr/local
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
|
||||
RUN addgroup zenith && adduser -h /data -D -G zenith zenith
|
||||
VOLUME ["/data"]
|
||||
WORKDIR /data
|
||||
USER zenith
|
||||
EXPOSE 6400
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
CMD ["pageserver"]
|
||||
@@ -1,6 +1,6 @@
|
||||
# First transient image to build compute_tools binaries
|
||||
# NB: keep in sync with rust image version in .circle/config.yml
|
||||
FROM zimg/rust:1.58 AS rust-build
|
||||
FROM neondatabase/rust:1.58 AS rust-build
|
||||
|
||||
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
|
||||
ARG AWS_ACCESS_KEY_ID
|
||||
@@ -15,4 +15,4 @@ RUN set -e \
|
||||
# Final image that only has one binary
|
||||
FROM debian:buster-slim
|
||||
|
||||
COPY --from=rust-build /home/circleci/project/target/release/zenith_ctl /usr/local/bin/zenith_ctl
|
||||
COPY --from=rust-build /home/runner/target/release/compute_ctl /usr/local/bin/compute_ctl
|
||||
|
||||
22
Makefile
22
Makefile
@@ -12,15 +12,21 @@ endif
|
||||
#
|
||||
BUILD_TYPE ?= debug
|
||||
ifeq ($(BUILD_TYPE),release)
|
||||
PG_CONFIGURE_OPTS = --enable-debug
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
|
||||
PG_CFLAGS = -O2 -g3 $(CFLAGS)
|
||||
# Unfortunately, `--profile=...` is a nightly feature
|
||||
CARGO_BUILD_FLAGS += --release
|
||||
else ifeq ($(BUILD_TYPE),debug)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --enable-cassert --enable-depend
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
|
||||
PG_CFLAGS = -O0 -g3 $(CFLAGS)
|
||||
else
|
||||
$(error Bad build type `$(BUILD_TYPE)', see Makefile for options)
|
||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||
endif
|
||||
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(HOMEBREW_PREFIX)/opt/openssl/include --with-libraries=$(HOMEBREW_PREFIX)/opt/openssl/lib
|
||||
endif
|
||||
|
||||
# Choose whether we should be silent or verbose
|
||||
@@ -68,16 +74,16 @@ postgres-headers: postgres-configure
|
||||
+@echo "Installing PostgreSQL headers"
|
||||
$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install
|
||||
|
||||
# Compile and install PostgreSQL and contrib/zenith
|
||||
# Compile and install PostgreSQL and contrib/neon
|
||||
.PHONY: postgres
|
||||
postgres: postgres-configure \
|
||||
postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
|
||||
+@echo "Compiling PostgreSQL"
|
||||
$(MAKE) -C tmp_install/build MAKELEVEL=0 install
|
||||
+@echo "Compiling contrib/zenith"
|
||||
$(MAKE) -C tmp_install/build/contrib/zenith install
|
||||
+@echo "Compiling contrib/zenith_test_utils"
|
||||
$(MAKE) -C tmp_install/build/contrib/zenith_test_utils install
|
||||
+@echo "Compiling contrib/neon"
|
||||
$(MAKE) -C tmp_install/build/contrib/neon install
|
||||
+@echo "Compiling contrib/neon_test_utils"
|
||||
$(MAKE) -C tmp_install/build/contrib/neon_test_utils install
|
||||
+@echo "Compiling pg_buffercache"
|
||||
$(MAKE) -C tmp_install/build/contrib/pg_buffercache install
|
||||
+@echo "Compiling pageinspect"
|
||||
|
||||
5
NOTICE
Normal file
5
NOTICE
Normal file
@@ -0,0 +1,5 @@
|
||||
Neon
|
||||
Copyright 2022 Neon Inc.
|
||||
|
||||
The PostgreSQL submodule in vendor/postgres is licensed under the
|
||||
PostgreSQL license. See vendor/postgres/COPYRIGHT.
|
||||
100
README.md
100
README.md
@@ -5,6 +5,11 @@ Neon is a serverless open source alternative to AWS Aurora Postgres. It separate
|
||||
The project used to be called "Zenith". Many of the commands and code comments
|
||||
still refer to "zenith", but we are in the process of renaming things.
|
||||
|
||||
## Quick start
|
||||
[Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.
|
||||
|
||||
Alternatively, compile and run the project [locally](#running-local-installation).
|
||||
|
||||
## Architecture overview
|
||||
|
||||
A Neon installation consists of compute nodes and Neon storage engine.
|
||||
@@ -23,31 +28,74 @@ Pageserver consists of:
|
||||
|
||||
## Running local installation
|
||||
|
||||
|
||||
#### Installing dependencies on Linux
|
||||
1. Install build dependencies and other useful packages
|
||||
|
||||
On Ubuntu or Debian this set of packages should be sufficient to build the code:
|
||||
```text
|
||||
* On Ubuntu or Debian this set of packages should be sufficient to build the code:
|
||||
```bash
|
||||
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
|
||||
libssl-dev clang pkg-config libpq-dev
|
||||
libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
|
||||
```
|
||||
* On Fedora these packages are needed:
|
||||
```bash
|
||||
dnf install flex bison readline-devel zlib-devel openssl-devel \
|
||||
libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
|
||||
```
|
||||
|
||||
[Rust] 1.58 or later is also required.
|
||||
2. [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
```
|
||||
# recommended approach from https://www.rust-lang.org/tools/install
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
```
|
||||
|
||||
#### Installing dependencies on OSX (12.3.1)
|
||||
1. Install XCode and dependencies
|
||||
```
|
||||
xcode-select --install
|
||||
brew install protobuf etcd openssl
|
||||
```
|
||||
|
||||
2. [Install Rust](https://www.rust-lang.org/tools/install)
|
||||
```
|
||||
# recommended approach from https://www.rust-lang.org/tools/install
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
```
|
||||
|
||||
3. Install PostgreSQL Client
|
||||
```
|
||||
# from https://stackoverflow.com/questions/44654216/correct-way-to-install-psql-without-full-postgres-on-macos
|
||||
brew install libpq
|
||||
brew link --force libpq
|
||||
```
|
||||
|
||||
#### Building on Linux and OSX
|
||||
|
||||
1. Build neon and patched postgres
|
||||
```
|
||||
# Note: The path to the neon sources can not contain a space.
|
||||
|
||||
git clone --recursive https://github.com/neondatabase/neon.git
|
||||
cd neon
|
||||
|
||||
# The preferred and default is to make a debug build. This will create a
|
||||
# demonstrably slower build than a release build. If you want to use a release
|
||||
# build, utilize "`BUILD_TYPE=release make -j`nproc``"
|
||||
|
||||
make -j`nproc`
|
||||
```
|
||||
|
||||
#### dependency installation notes
|
||||
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
|
||||
|
||||
To run the integration tests or Python scripts (not required to use the code), install
|
||||
Python (3.7 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.
|
||||
Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.
|
||||
|
||||
2. Build neon and patched postgres
|
||||
```sh
|
||||
git clone --recursive https://github.com/neondatabase/neon.git
|
||||
cd neon
|
||||
make -j5
|
||||
```
|
||||
|
||||
3. Start pageserver and postgres on top of it (should be called from repo root):
|
||||
#### running neon database
|
||||
1. Start pageserver and postgres on top of it (should be called from repo root):
|
||||
```sh
|
||||
# Create repository in .zenith with proper paths to binaries and data
|
||||
# Create repository in .neon with proper paths to binaries and data
|
||||
# Later that would be responsibility of a package install script
|
||||
> ./target/debug/neon_local init
|
||||
initializing tenantid 9ef87a5bf0d92544f6fafeeb3239695c
|
||||
@@ -57,17 +105,17 @@ pageserver init succeeded
|
||||
|
||||
# start pageserver and safekeeper
|
||||
> ./target/debug/neon_local start
|
||||
Starting pageserver at '127.0.0.1:64000' in '.zenith'
|
||||
Starting pageserver at '127.0.0.1:64000' in '.neon'
|
||||
Pageserver started
|
||||
initializing for sk 1 for 7676
|
||||
Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/sk1'
|
||||
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
|
||||
Safekeeper started
|
||||
|
||||
# start postgres compute node
|
||||
> ./target/debug/neon_local pg start main
|
||||
Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
|
||||
Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
|
||||
Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=postgres'
|
||||
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
|
||||
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
|
||||
|
||||
# check list of running postgres instances
|
||||
> ./target/debug/neon_local pg list
|
||||
@@ -75,9 +123,9 @@ Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=po
|
||||
main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running
|
||||
```
|
||||
|
||||
4. Now it is possible to connect to postgres and run some queries:
|
||||
2. Now it is possible to connect to postgres and run some queries:
|
||||
```text
|
||||
> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
|
||||
> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
|
||||
postgres=# CREATE TABLE t(key int primary key, value text);
|
||||
CREATE TABLE
|
||||
postgres=# insert into t values(1,1);
|
||||
@@ -89,7 +137,7 @@ postgres=# select * from t;
|
||||
(1 row)
|
||||
```
|
||||
|
||||
5. And create branches and run postgres on them:
|
||||
3. And create branches and run postgres on them:
|
||||
```sh
|
||||
# create branch named migration_check
|
||||
> ./target/debug/neon_local timeline branch --branch-name migration_check
|
||||
@@ -103,8 +151,8 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
|
||||
# start postgres on that branch
|
||||
> ./target/debug/neon_local pg start migration_check --branch-name migration_check
|
||||
Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
|
||||
Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
|
||||
Starting postgres node at 'host=127.0.0.1 port=55433 user=zenith_admin dbname=postgres'
|
||||
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
|
||||
Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
|
||||
|
||||
# check the new list of running postgres instances
|
||||
> ./target/debug/neon_local pg list
|
||||
@@ -114,7 +162,7 @@ Starting postgres node at 'host=127.0.0.1 port=55433 user=zenith_admin dbname=po
|
||||
|
||||
# this new postgres instance will have all the data from 'main' postgres,
|
||||
# but all modifications would not affect data in original postgres
|
||||
> psql -p55433 -h 127.0.0.1 -U zenith_admin postgres
|
||||
> psql -p55433 -h 127.0.0.1 -U cloud_admin postgres
|
||||
postgres=# select * from t;
|
||||
key | value
|
||||
-----+-------
|
||||
@@ -125,7 +173,7 @@ postgres=# insert into t values(2,2);
|
||||
INSERT 0 1
|
||||
|
||||
# check that the new change doesn't affect the 'main' postgres
|
||||
> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
|
||||
> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
|
||||
postgres=# select * from t;
|
||||
key | value
|
||||
-----+-------
|
||||
@@ -133,7 +181,7 @@ postgres=# select * from t;
|
||||
(1 row)
|
||||
```
|
||||
|
||||
6. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
|
||||
4. If you want to run tests afterwards (see below), you have to stop all the running the pageserver, safekeeper and postgres instances
|
||||
you have just started. You can stop them all with one command:
|
||||
```sh
|
||||
> ./target/debug/neon_local stop
|
||||
@@ -163,7 +211,7 @@ Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, wh
|
||||
To get more familiar with this aspect, refer to:
|
||||
|
||||
- [Neon glossary](/docs/glossary.md)
|
||||
- [PostgreSQL glossary](https://www.postgresql.org/docs/13/glossary.html)
|
||||
- [PostgreSQL glossary](https://www.postgresql.org/docs/14/glossary.html)
|
||||
- Other PostgreSQL documentation and sources (Neon fork sources can be found [here](https://github.com/neondatabase/postgres))
|
||||
|
||||
## Join the development
|
||||
|
||||
@@ -18,4 +18,5 @@ serde_json = "1"
|
||||
tar = "0.4"
|
||||
tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
url = "2.2.2"
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
# Compute node tools
|
||||
|
||||
Postgres wrapper (`zenith_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
|
||||
`ExecStart` option. It will handle all the `zenith` specifics during compute node
|
||||
Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
|
||||
`ExecStart` option. It will handle all the `Neon` specifics during compute node
|
||||
initialization:
|
||||
- `zenith_ctl` accepts cluster (compute node) specification as a JSON file.
|
||||
- `compute_ctl` accepts cluster (compute node) specification as a JSON file.
|
||||
- Every start is a fresh start, so the data directory is removed and
|
||||
initialized again on each run.
|
||||
- Next it will put configuration files into the `PGDATA` directory.
|
||||
@@ -13,18 +13,18 @@ initialization:
|
||||
- Check and alter/drop/create roles and databases.
|
||||
- Hang waiting on the `postmaster` process to exit.
|
||||
|
||||
Also `zenith_ctl` spawns two separate service threads:
|
||||
Also `compute_ctl` spawns two separate service threads:
|
||||
- `compute-monitor` checks the last Postgres activity timestamp and saves it
|
||||
into the shared `ComputeState`;
|
||||
into the shared `ComputeNode`;
|
||||
- `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
|
||||
last activity requests.
|
||||
|
||||
Usage example:
|
||||
```sh
|
||||
zenith_ctl -D /var/db/postgres/compute \
|
||||
-C 'postgresql://zenith_admin@localhost/postgres' \
|
||||
-S /var/db/postgres/specs/current.json \
|
||||
-b /usr/local/bin/postgres
|
||||
compute_ctl -D /var/db/postgres/compute \
|
||||
-C 'postgresql://cloud_admin@localhost/postgres' \
|
||||
-S /var/db/postgres/specs/current.json \
|
||||
-b /usr/local/bin/postgres
|
||||
```
|
||||
|
||||
## Tests
|
||||
|
||||
175
compute_tools/src/bin/compute_ctl.rs
Normal file
175
compute_tools/src/bin/compute_ctl.rs
Normal file
@@ -0,0 +1,175 @@
|
||||
//!
|
||||
//! Postgres wrapper (`compute_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
|
||||
//! `ExecStart` option. It will handle all the `Neon` specifics during compute node
|
||||
//! initialization:
|
||||
//! - `compute_ctl` accepts cluster (compute node) specification as a JSON file.
|
||||
//! - Every start is a fresh start, so the data directory is removed and
|
||||
//! initialized again on each run.
|
||||
//! - Next it will put configuration files into the `PGDATA` directory.
|
||||
//! - Sync safekeepers and get commit LSN.
|
||||
//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
|
||||
//! - Try to start `postgres` and wait until it is ready to accept connections.
|
||||
//! - Check and alter/drop/create roles and databases.
|
||||
//! - Hang waiting on the `postmaster` process to exit.
|
||||
//!
|
||||
//! Also `compute_ctl` spawns two separate service threads:
|
||||
//! - `compute-monitor` checks the last Postgres activity timestamp and saves it
|
||||
//! into the shared `ComputeNode`;
|
||||
//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
|
||||
//! last activity requests.
|
||||
//!
|
||||
//! Usage example:
|
||||
//! ```sh
|
||||
//! compute_ctl -D /var/db/postgres/compute \
|
||||
//! -C 'postgresql://cloud_admin@localhost/postgres' \
|
||||
//! -S /var/db/postgres/specs/current.json \
|
||||
//! -b /usr/local/bin/postgres
|
||||
//! ```
|
||||
//!
|
||||
use std::fs::File;
|
||||
use std::panic;
|
||||
use std::path::Path;
|
||||
use std::process::exit;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::{thread, time::Duration};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Arg;
|
||||
use log::{error, info};
|
||||
|
||||
use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
|
||||
use compute_tools::http::api::launch_http_server;
|
||||
use compute_tools::logger::*;
|
||||
use compute_tools::monitor::launch_monitor;
|
||||
use compute_tools::params::*;
|
||||
use compute_tools::pg_helpers::*;
|
||||
use compute_tools::spec::*;
|
||||
use url::Url;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
// TODO: re-use `utils::logging` later
|
||||
init_logger(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
// Env variable is set by `cargo`
|
||||
let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
|
||||
let matches = clap::App::new("compute_ctl")
|
||||
.version(version.unwrap_or("unknown"))
|
||||
.arg(
|
||||
Arg::new("connstr")
|
||||
.short('C')
|
||||
.long("connstr")
|
||||
.value_name("DATABASE_URL")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgdata")
|
||||
.short('D')
|
||||
.long("pgdata")
|
||||
.value_name("DATADIR")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgbin")
|
||||
.short('b')
|
||||
.long("pgbin")
|
||||
.value_name("POSTGRES_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec")
|
||||
.short('s')
|
||||
.long("spec")
|
||||
.value_name("SPEC_JSON"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec-path")
|
||||
.short('S')
|
||||
.long("spec-path")
|
||||
.value_name("SPEC_PATH"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
|
||||
let connstr = matches
|
||||
.value_of("connstr")
|
||||
.expect("Postgres connection string is required");
|
||||
let spec = matches.value_of("spec");
|
||||
let spec_path = matches.value_of("spec-path");
|
||||
|
||||
// Try to use just 'postgres' if no path is provided
|
||||
let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
|
||||
|
||||
let spec: ComputeSpec = match spec {
|
||||
// First, try to get cluster spec from the cli argument
|
||||
Some(json) => serde_json::from_str(json)?,
|
||||
None => {
|
||||
// Second, try to read it from the file if path is provided
|
||||
if let Some(sp) = spec_path {
|
||||
let path = Path::new(sp);
|
||||
let file = File::open(path)?;
|
||||
serde_json::from_reader(file)?
|
||||
} else {
|
||||
panic!("cluster spec should be provided via --spec or --spec-path argument");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let pageserver_connstr = spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("neon.pageserver_connstring")
|
||||
.expect("pageserver connstr should be provided");
|
||||
let tenant = spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("neon.tenant_id")
|
||||
.expect("tenant id should be provided");
|
||||
let timeline = spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("neon.timeline_id")
|
||||
.expect("tenant id should be provided");
|
||||
|
||||
let compute_state = ComputeNode {
|
||||
start_time: Utc::now(),
|
||||
connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
|
||||
pgdata: pgdata.to_string(),
|
||||
pgbin: pgbin.to_string(),
|
||||
spec,
|
||||
tenant,
|
||||
timeline,
|
||||
pageserver_connstr,
|
||||
metrics: ComputeMetrics::new(),
|
||||
state: RwLock::new(ComputeState::new()),
|
||||
};
|
||||
let compute = Arc::new(compute_state);
|
||||
|
||||
// Launch service threads first, so we were able to serve availability
|
||||
// requests, while configuration is still in progress.
|
||||
let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
|
||||
let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
|
||||
|
||||
// Run compute (Postgres) and hang waiting on it.
|
||||
match compute.prepare_and_run() {
|
||||
Ok(ec) => {
|
||||
let code = ec.code().unwrap_or(1);
|
||||
info!("Postgres exited with code {}, shutting down", code);
|
||||
exit(code)
|
||||
}
|
||||
Err(error) => {
|
||||
error!("could not start the compute node: {}", error);
|
||||
|
||||
let mut state = compute.state.write().unwrap();
|
||||
state.error = Some(format!("{:?}", error));
|
||||
state.status = ComputeStatus::Failed;
|
||||
drop(state);
|
||||
|
||||
// Keep serving HTTP requests, so the cloud control plane was able to
|
||||
// get the actual error.
|
||||
info!("giving control plane 30s to collect the error before shutdown");
|
||||
thread::sleep(Duration::from_secs(30));
|
||||
info!("shutting down");
|
||||
Err(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,252 +0,0 @@
|
||||
//!
|
||||
//! Postgres wrapper (`zenith_ctl`) is intended to be run as a Docker entrypoint or as a `systemd`
|
||||
//! `ExecStart` option. It will handle all the `zenith` specifics during compute node
|
||||
//! initialization:
|
||||
//! - `zenith_ctl` accepts cluster (compute node) specification as a JSON file.
|
||||
//! - Every start is a fresh start, so the data directory is removed and
|
||||
//! initialized again on each run.
|
||||
//! - Next it will put configuration files into the `PGDATA` directory.
|
||||
//! - Sync safekeepers and get commit LSN.
|
||||
//! - Get `basebackup` from pageserver using the returned on the previous step LSN.
|
||||
//! - Try to start `postgres` and wait until it is ready to accept connections.
|
||||
//! - Check and alter/drop/create roles and databases.
|
||||
//! - Hang waiting on the `postmaster` process to exit.
|
||||
//!
|
||||
//! Also `zenith_ctl` spawns two separate service threads:
|
||||
//! - `compute-monitor` checks the last Postgres activity timestamp and saves it
|
||||
//! into the shared `ComputeState`;
|
||||
//! - `http-endpoint` runs a Hyper HTTP API server, which serves readiness and the
|
||||
//! last activity requests.
|
||||
//!
|
||||
//! Usage example:
|
||||
//! ```sh
|
||||
//! zenith_ctl -D /var/db/postgres/compute \
|
||||
//! -C 'postgresql://zenith_admin@localhost/postgres' \
|
||||
//! -S /var/db/postgres/specs/current.json \
|
||||
//! -b /usr/local/bin/postgres
|
||||
//! ```
|
||||
//!
|
||||
use std::fs::File;
|
||||
use std::panic;
|
||||
use std::path::Path;
|
||||
use std::process::{exit, Command, ExitStatus};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Utc;
|
||||
use clap::Arg;
|
||||
use log::info;
|
||||
use postgres::{Client, NoTls};
|
||||
|
||||
use compute_tools::checker::create_writablity_check_data;
|
||||
use compute_tools::config;
|
||||
use compute_tools::http_api::launch_http_server;
|
||||
use compute_tools::logger::*;
|
||||
use compute_tools::monitor::launch_monitor;
|
||||
use compute_tools::params::*;
|
||||
use compute_tools::pg_helpers::*;
|
||||
use compute_tools::spec::*;
|
||||
use compute_tools::zenith::*;
|
||||
|
||||
/// Do all the preparations like PGDATA directory creation, configuration,
|
||||
/// safekeepers sync, basebackup, etc.
|
||||
fn prepare_pgdata(state: &Arc<RwLock<ComputeState>>) -> Result<()> {
|
||||
let state = state.read().unwrap();
|
||||
let spec = &state.spec;
|
||||
let pgdata_path = Path::new(&state.pgdata);
|
||||
let pageserver_connstr = spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("zenith.page_server_connstring")
|
||||
.expect("pageserver connstr should be provided");
|
||||
let tenant = spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("zenith.zenith_tenant")
|
||||
.expect("tenant id should be provided");
|
||||
let timeline = spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("zenith.zenith_timeline")
|
||||
.expect("tenant id should be provided");
|
||||
|
||||
info!(
|
||||
"starting cluster #{}, operation #{}",
|
||||
spec.cluster.cluster_id,
|
||||
spec.operation_uuid.as_ref().unwrap()
|
||||
);
|
||||
|
||||
// Remove/create an empty pgdata directory and put configuration there.
|
||||
create_pgdata(&state.pgdata)?;
|
||||
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
|
||||
|
||||
info!("starting safekeepers syncing");
|
||||
let lsn = sync_safekeepers(&state.pgdata, &state.pgbin)
|
||||
.with_context(|| "failed to sync safekeepers")?;
|
||||
info!("safekeepers synced at LSN {}", lsn);
|
||||
|
||||
info!(
|
||||
"getting basebackup@{} from pageserver {}",
|
||||
lsn, pageserver_connstr
|
||||
);
|
||||
get_basebackup(&state.pgdata, &pageserver_connstr, &tenant, &timeline, &lsn).with_context(
|
||||
|| {
|
||||
format!(
|
||||
"failed to get basebackup@{} from pageserver {}",
|
||||
lsn, pageserver_connstr
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
// Update pg_hba.conf received with basebackup.
|
||||
update_pg_hba(pgdata_path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start Postgres as a child process and manage DBs/roles.
|
||||
/// After that this will hang waiting on the postmaster process to exit.
|
||||
fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {
|
||||
let read_state = state.read().unwrap();
|
||||
let pgdata_path = Path::new(&read_state.pgdata);
|
||||
|
||||
// Run postgres as a child process.
|
||||
let mut pg = Command::new(&read_state.pgbin)
|
||||
.args(&["-D", &read_state.pgdata])
|
||||
.spawn()
|
||||
.expect("cannot start postgres process");
|
||||
|
||||
// Try default Postgres port if it is not provided
|
||||
let port = read_state
|
||||
.spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("port")
|
||||
.unwrap_or_else(|| "5432".to_string());
|
||||
wait_for_postgres(&port, pgdata_path)?;
|
||||
|
||||
let mut client = Client::connect(&read_state.connstr, NoTls)?;
|
||||
|
||||
handle_roles(&read_state.spec, &mut client)?;
|
||||
handle_databases(&read_state.spec, &mut client)?;
|
||||
handle_grants(&read_state.spec, &mut client)?;
|
||||
create_writablity_check_data(&mut client)?;
|
||||
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
|
||||
info!(
|
||||
"finished configuration of cluster #{}",
|
||||
read_state.spec.cluster.cluster_id
|
||||
);
|
||||
|
||||
// Release the read lock.
|
||||
drop(read_state);
|
||||
|
||||
// Get the write lock, update state and release the lock, so HTTP API
|
||||
// was able to serve requests, while we are blocked waiting on
|
||||
// Postgres.
|
||||
let mut state = state.write().unwrap();
|
||||
state.ready = true;
|
||||
drop(state);
|
||||
|
||||
// Wait for child postgres process basically forever. In this state Ctrl+C
|
||||
// will be propagated to postgres and it will be shut down as well.
|
||||
let ecode = pg.wait().expect("failed to wait on postgres");
|
||||
|
||||
Ok(ecode)
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
// TODO: re-use `utils::logging` later
|
||||
init_logger(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
// Env variable is set by `cargo`
|
||||
let version: Option<&str> = option_env!("CARGO_PKG_VERSION");
|
||||
let matches = clap::App::new("zenith_ctl")
|
||||
.version(version.unwrap_or("unknown"))
|
||||
.arg(
|
||||
Arg::new("connstr")
|
||||
.short('C')
|
||||
.long("connstr")
|
||||
.value_name("DATABASE_URL")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgdata")
|
||||
.short('D')
|
||||
.long("pgdata")
|
||||
.value_name("DATADIR")
|
||||
.required(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pgbin")
|
||||
.short('b')
|
||||
.long("pgbin")
|
||||
.value_name("POSTGRES_PATH"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec")
|
||||
.short('s')
|
||||
.long("spec")
|
||||
.value_name("SPEC_JSON"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("spec-path")
|
||||
.short('S')
|
||||
.long("spec-path")
|
||||
.value_name("SPEC_PATH"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let pgdata = matches.value_of("pgdata").expect("PGDATA path is required");
|
||||
let connstr = matches
|
||||
.value_of("connstr")
|
||||
.expect("Postgres connection string is required");
|
||||
let spec = matches.value_of("spec");
|
||||
let spec_path = matches.value_of("spec-path");
|
||||
|
||||
// Try to use just 'postgres' if no path is provided
|
||||
let pgbin = matches.value_of("pgbin").unwrap_or("postgres");
|
||||
|
||||
let spec: ClusterSpec = match spec {
|
||||
// First, try to get cluster spec from the cli argument
|
||||
Some(json) => serde_json::from_str(json)?,
|
||||
None => {
|
||||
// Second, try to read it from the file if path is provided
|
||||
if let Some(sp) = spec_path {
|
||||
let path = Path::new(sp);
|
||||
let file = File::open(path)?;
|
||||
serde_json::from_reader(file)?
|
||||
} else {
|
||||
panic!("cluster spec should be provided via --spec or --spec-path argument");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let compute_state = ComputeState {
|
||||
connstr: connstr.to_string(),
|
||||
pgdata: pgdata.to_string(),
|
||||
pgbin: pgbin.to_string(),
|
||||
spec,
|
||||
ready: false,
|
||||
last_active: Utc::now(),
|
||||
};
|
||||
let compute_state = Arc::new(RwLock::new(compute_state));
|
||||
|
||||
// Launch service threads first, so we were able to serve availability
|
||||
// requests, while configuration is still in progress.
|
||||
let mut _threads = vec![
|
||||
launch_http_server(&compute_state).expect("cannot launch compute monitor thread"),
|
||||
launch_monitor(&compute_state).expect("cannot launch http endpoint thread"),
|
||||
];
|
||||
|
||||
prepare_pgdata(&compute_state)?;
|
||||
|
||||
// Run compute (Postgres) and hang waiting on it. Panic if any error happens,
|
||||
// it will help us to trigger unwind and kill postmaster as well.
|
||||
match run_compute(&compute_state) {
|
||||
Ok(ec) => exit(ec.success() as i32),
|
||||
Err(error) => panic!("cannot start compute node, error: {}", error),
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,9 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use log::error;
|
||||
use postgres::Client;
|
||||
use tokio_postgres::NoTls;
|
||||
|
||||
use crate::zenith::ComputeState;
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
|
||||
let query = "
|
||||
@@ -23,9 +21,8 @@ pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn check_writability(state: &Arc<RwLock<ComputeState>>) -> Result<()> {
|
||||
let connstr = state.read().unwrap().connstr.clone();
|
||||
let (client, connection) = tokio_postgres::connect(&connstr, NoTls).await?;
|
||||
pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
|
||||
let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
|
||||
if client.is_closed() {
|
||||
return Err(anyhow!("connection to postgres closed"));
|
||||
}
|
||||
|
||||
350
compute_tools/src/compute.rs
Normal file
350
compute_tools/src/compute.rs
Normal file
@@ -0,0 +1,350 @@
|
||||
//
|
||||
// XXX: This starts to be scarry similar to the `PostgresNode` from `control_plane`,
|
||||
// but there are several things that makes `PostgresNode` usage inconvenient in the
|
||||
// cloud:
|
||||
// - it inherits from `LocalEnv`, which contains **all-all** the information about
|
||||
// a complete service running
|
||||
// - it uses `PageServerNode` with information about http endpoint, which we do not
|
||||
// need in the cloud again
|
||||
// - many tiny pieces like, for example, we do not use `pg_ctl` in the cloud
|
||||
//
|
||||
// Thus, to use `PostgresNode` in the cloud, we need to 'mock' a bunch of required
|
||||
// attributes (not required for the cloud). Yet, it is still tempting to unify these
|
||||
// `PostgresNode` and `ComputeNode` and use one in both places.
|
||||
//
|
||||
// TODO: stabilize `ComputeNode` and think about using it in the `control_plane`.
|
||||
//
|
||||
use std::fs;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::Path;
|
||||
use std::process::{Command, ExitStatus, Stdio};
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::RwLock;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use log::info;
|
||||
use postgres::{Client, NoTls};
|
||||
use serde::{Serialize, Serializer};
|
||||
|
||||
use crate::checker::create_writablity_check_data;
|
||||
use crate::config;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::spec::*;
|
||||
|
||||
/// Compute node info shared across several `compute_ctl` threads.
|
||||
pub struct ComputeNode {
|
||||
pub start_time: DateTime<Utc>,
|
||||
// Url type maintains proper escaping
|
||||
pub connstr: url::Url,
|
||||
pub pgdata: String,
|
||||
pub pgbin: String,
|
||||
pub spec: ComputeSpec,
|
||||
pub tenant: String,
|
||||
pub timeline: String,
|
||||
pub pageserver_connstr: String,
|
||||
pub metrics: ComputeMetrics,
|
||||
/// Volatile part of the `ComputeNode` so should be used under `RwLock`
|
||||
/// to allow HTTP API server to serve status requests, while configuration
|
||||
/// is in progress.
|
||||
pub state: RwLock<ComputeState>,
|
||||
}
|
||||
|
||||
fn rfc3339_serialize<S>(x: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
x.to_rfc3339().serialize(s)
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub struct ComputeState {
|
||||
pub status: ComputeStatus,
|
||||
/// Timestamp of the last Postgres activity
|
||||
#[serde(serialize_with = "rfc3339_serialize")]
|
||||
pub last_active: DateTime<Utc>,
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
impl ComputeState {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
status: ComputeStatus::Init,
|
||||
last_active: Utc::now(),
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ComputeState {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Copy, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ComputeStatus {
|
||||
Init,
|
||||
Running,
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct ComputeMetrics {
|
||||
pub sync_safekeepers_ms: AtomicU64,
|
||||
pub basebackup_ms: AtomicU64,
|
||||
pub config_ms: AtomicU64,
|
||||
pub total_startup_ms: AtomicU64,
|
||||
}
|
||||
|
||||
impl ComputeMetrics {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
sync_safekeepers_ms: AtomicU64::new(0),
|
||||
basebackup_ms: AtomicU64::new(0),
|
||||
config_ms: AtomicU64::new(0),
|
||||
total_startup_ms: AtomicU64::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for ComputeMetrics {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl ComputeNode {
|
||||
pub fn set_status(&self, status: ComputeStatus) {
|
||||
self.state.write().unwrap().status = status;
|
||||
}
|
||||
|
||||
pub fn get_status(&self) -> ComputeStatus {
|
||||
self.state.read().unwrap().status
|
||||
}
|
||||
|
||||
// Remove `pgdata` directory and create it again with right permissions.
|
||||
fn create_pgdata(&self) -> Result<()> {
|
||||
// Ignore removal error, likely it is a 'No such file or directory (os error 2)'.
|
||||
// If it is something different then create_dir() will error out anyway.
|
||||
let _ok = fs::remove_dir_all(&self.pgdata);
|
||||
fs::create_dir(&self.pgdata)?;
|
||||
fs::set_permissions(&self.pgdata, fs::Permissions::from_mode(0o700))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Get basebackup from the libpq connection to pageserver using `connstr` and
|
||||
// unarchive it to `pgdata` directory overriding all its previous content.
|
||||
fn get_basebackup(&self, lsn: &str) -> Result<()> {
|
||||
let start_time = Utc::now();
|
||||
|
||||
let mut client = Client::connect(&self.pageserver_connstr, NoTls)?;
|
||||
let basebackup_cmd = match lsn {
|
||||
"0/0" => format!("basebackup {} {}", &self.tenant, &self.timeline), // First start of the compute
|
||||
_ => format!("basebackup {} {} {}", &self.tenant, &self.timeline, lsn),
|
||||
};
|
||||
let copyreader = client.copy_out(basebackup_cmd.as_str())?;
|
||||
|
||||
// Read the archive directly from the `CopyOutReader`
|
||||
//
|
||||
// Set `ignore_zeros` so that unpack() reads all the Copy data and
|
||||
// doesn't stop at the end-of-archive marker. Otherwise, if the server
|
||||
// sends an Error after finishing the tarball, we will not notice it.
|
||||
let mut ar = tar::Archive::new(copyreader);
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.pgdata)?;
|
||||
|
||||
self.metrics.basebackup_ms.store(
|
||||
Utc::now()
|
||||
.signed_duration_since(start_time)
|
||||
.to_std()
|
||||
.unwrap()
|
||||
.as_millis() as u64,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Run `postgres` in a special mode with `--sync-safekeepers` argument
|
||||
// and return the reported LSN back to the caller.
|
||||
fn sync_safekeepers(&self) -> Result<String> {
|
||||
let start_time = Utc::now();
|
||||
|
||||
let sync_handle = Command::new(&self.pgbin)
|
||||
.args(&["--sync-safekeepers"])
|
||||
.env("PGDATA", &self.pgdata) // we cannot use -D in this mode
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
.expect("postgres --sync-safekeepers failed to start");
|
||||
|
||||
// `postgres --sync-safekeepers` will print all log output to stderr and
|
||||
// final LSN to stdout. So we pipe only stdout, while stderr will be automatically
|
||||
// redirected to the caller output.
|
||||
let sync_output = sync_handle
|
||||
.wait_with_output()
|
||||
.expect("postgres --sync-safekeepers failed");
|
||||
if !sync_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"postgres --sync-safekeepers exited with non-zero status: {}",
|
||||
sync_output.status,
|
||||
);
|
||||
}
|
||||
|
||||
self.metrics.sync_safekeepers_ms.store(
|
||||
Utc::now()
|
||||
.signed_duration_since(start_time)
|
||||
.to_std()
|
||||
.unwrap()
|
||||
.as_millis() as u64,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
|
||||
let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());
|
||||
|
||||
Ok(lsn)
|
||||
}
|
||||
|
||||
/// Do all the preparations like PGDATA directory creation, configuration,
|
||||
/// safekeepers sync, basebackup, etc.
|
||||
pub fn prepare_pgdata(&self) -> Result<()> {
|
||||
let spec = &self.spec;
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
|
||||
// Remove/create an empty pgdata directory and put configuration there.
|
||||
self.create_pgdata()?;
|
||||
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
|
||||
|
||||
info!("starting safekeepers syncing");
|
||||
let lsn = self
|
||||
.sync_safekeepers()
|
||||
.with_context(|| "failed to sync safekeepers")?;
|
||||
info!("safekeepers synced at LSN {}", lsn);
|
||||
|
||||
info!(
|
||||
"getting basebackup@{} from pageserver {}",
|
||||
lsn, &self.pageserver_connstr
|
||||
);
|
||||
self.get_basebackup(&lsn).with_context(|| {
|
||||
format!(
|
||||
"failed to get basebackup@{} from pageserver {}",
|
||||
lsn, &self.pageserver_connstr
|
||||
)
|
||||
})?;
|
||||
|
||||
// Update pg_hba.conf received with basebackup.
|
||||
update_pg_hba(pgdata_path)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Start Postgres as a child process and manage DBs/roles.
|
||||
/// After that this will hang waiting on the postmaster process to exit.
|
||||
pub fn run(&self) -> Result<ExitStatus> {
|
||||
let start_time = Utc::now();
|
||||
|
||||
let pgdata_path = Path::new(&self.pgdata);
|
||||
|
||||
// Run postgres as a child process.
|
||||
let mut pg = Command::new(&self.pgbin)
|
||||
.args(&["-D", &self.pgdata])
|
||||
.spawn()
|
||||
.expect("cannot start postgres process");
|
||||
|
||||
// Try default Postgres port if it is not provided
|
||||
let port = self
|
||||
.spec
|
||||
.cluster
|
||||
.settings
|
||||
.find("port")
|
||||
.unwrap_or_else(|| "5432".to_string());
|
||||
wait_for_postgres(&mut pg, &port, pgdata_path)?;
|
||||
|
||||
// If connection fails,
|
||||
// it may be the old node with `zenith_admin` superuser.
|
||||
//
|
||||
// In this case we need to connect with old `zenith_admin`name
|
||||
// and create new user. We cannot simply rename connected user,
|
||||
// but we can create a new one and grant it all privileges.
|
||||
let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
|
||||
Err(e) => {
|
||||
info!(
|
||||
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
|
||||
e
|
||||
);
|
||||
let mut zenith_admin_connstr = self.connstr.clone();
|
||||
|
||||
zenith_admin_connstr
|
||||
.set_username("zenith_admin")
|
||||
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
|
||||
|
||||
let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
|
||||
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
|
||||
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
|
||||
drop(client);
|
||||
|
||||
// reconnect with connsting with expected name
|
||||
Client::connect(self.connstr.as_str(), NoTls)?
|
||||
}
|
||||
Ok(client) => client,
|
||||
};
|
||||
|
||||
handle_roles(&self.spec, &mut client)?;
|
||||
handle_databases(&self.spec, &mut client)?;
|
||||
handle_role_deletions(self, &mut client)?;
|
||||
handle_grants(&self.spec, &mut client)?;
|
||||
create_writablity_check_data(&mut client)?;
|
||||
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
let startup_end_time = Utc::now();
|
||||
|
||||
self.metrics.config_ms.store(
|
||||
startup_end_time
|
||||
.signed_duration_since(start_time)
|
||||
.to_std()
|
||||
.unwrap()
|
||||
.as_millis() as u64,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
self.metrics.total_startup_ms.store(
|
||||
startup_end_time
|
||||
.signed_duration_since(self.start_time)
|
||||
.to_std()
|
||||
.unwrap()
|
||||
.as_millis() as u64,
|
||||
Ordering::Relaxed,
|
||||
);
|
||||
|
||||
self.set_status(ComputeStatus::Running);
|
||||
|
||||
info!(
|
||||
"finished configuration of compute for project {}",
|
||||
self.spec.cluster.cluster_id
|
||||
);
|
||||
|
||||
// Wait for child Postgres process basically forever. In this state Ctrl+C
|
||||
// will propagate to Postgres and it will be shut down as well.
|
||||
let ecode = pg
|
||||
.wait()
|
||||
.expect("failed to start waiting on Postgres process");
|
||||
|
||||
Ok(ecode)
|
||||
}
|
||||
|
||||
pub fn prepare_and_run(&self) -> Result<ExitStatus> {
|
||||
info!(
|
||||
"starting compute for project {}, operation {}, tenant {}, timeline {}",
|
||||
self.spec.cluster.cluster_id,
|
||||
self.spec.operation_uuid.as_ref().unwrap(),
|
||||
self.tenant,
|
||||
self.timeline,
|
||||
);
|
||||
|
||||
self.prepare_pgdata()?;
|
||||
self.run()
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,7 @@ use std::path::Path;
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::pg_helpers::PgOptionsSerialize;
|
||||
use crate::zenith::ClusterSpec;
|
||||
use crate::spec::ComputeSpec;
|
||||
|
||||
/// Check that `line` is inside a text file and put it there if it is not.
|
||||
/// Create file if it doesn't exist.
|
||||
@@ -32,20 +32,20 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
|
||||
}
|
||||
|
||||
/// Create or completely rewrite configuration file specified by `path`
|
||||
pub fn write_postgres_conf(path: &Path, spec: &ClusterSpec) -> Result<()> {
|
||||
pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
|
||||
// File::create() destroys the file content if it exists.
|
||||
let mut postgres_conf = File::create(path)?;
|
||||
|
||||
write_zenith_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
|
||||
write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Write Postgres config block wrapped with generated comment section
|
||||
fn write_zenith_managed_block(file: &mut File, buf: &str) -> Result<()> {
|
||||
writeln!(file, "# Managed by Zenith: begin")?;
|
||||
fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
|
||||
writeln!(file, "# Managed by compute_ctl: begin")?;
|
||||
writeln!(file, "{}", buf)?;
|
||||
writeln!(file, "# Managed by Zenith: end")?;
|
||||
writeln!(file, "# Managed by compute_ctl: end")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,37 +1,64 @@
|
||||
use std::convert::Infallible;
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
use anyhow::Result;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Method, Request, Response, Server, StatusCode};
|
||||
use log::{error, info};
|
||||
use serde_json;
|
||||
|
||||
use crate::zenith::*;
|
||||
use crate::compute::{ComputeNode, ComputeStatus};
|
||||
|
||||
// Service function to handle all available routes.
|
||||
async fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Response<Body> {
|
||||
async fn routes(req: Request<Body>, compute: Arc<ComputeNode>) -> Response<Body> {
|
||||
match (req.method(), req.uri().path()) {
|
||||
// Timestamp of the last Postgres activity in the plain text.
|
||||
// DEPRECATED in favour of /status
|
||||
(&Method::GET, "/last_activity") => {
|
||||
info!("serving /last_active GET request");
|
||||
let state = state.read().unwrap();
|
||||
let state = compute.state.read().unwrap();
|
||||
|
||||
// Use RFC3339 format for consistency.
|
||||
Response::new(Body::from(state.last_active.to_rfc3339()))
|
||||
}
|
||||
|
||||
// Has compute setup process finished? -> true/false
|
||||
// Has compute setup process finished? -> true/false.
|
||||
// DEPRECATED in favour of /status
|
||||
(&Method::GET, "/ready") => {
|
||||
info!("serving /ready GET request");
|
||||
let state = state.read().unwrap();
|
||||
Response::new(Body::from(format!("{}", state.ready)))
|
||||
let status = compute.get_status();
|
||||
Response::new(Body::from(format!("{}", status == ComputeStatus::Running)))
|
||||
}
|
||||
|
||||
// Serialized compute state.
|
||||
(&Method::GET, "/status") => {
|
||||
info!("serving /status GET request");
|
||||
let state = compute.state.read().unwrap();
|
||||
Response::new(Body::from(serde_json::to_string(&*state).unwrap()))
|
||||
}
|
||||
|
||||
// Startup metrics in JSON format. Keep /metrics reserved for a possible
|
||||
// future use for Prometheus metrics format.
|
||||
(&Method::GET, "/metrics.json") => {
|
||||
info!("serving /metrics.json GET request");
|
||||
Response::new(Body::from(serde_json::to_string(&compute.metrics).unwrap()))
|
||||
}
|
||||
|
||||
// DEPRECATED, use POST instead
|
||||
(&Method::GET, "/check_writability") => {
|
||||
info!("serving /check_writability GET request");
|
||||
let res = crate::checker::check_writability(&state).await;
|
||||
let res = crate::checker::check_writability(&compute).await;
|
||||
match res {
|
||||
Ok(_) => Response::new(Body::from("true")),
|
||||
Err(e) => Response::new(Body::from(e.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::POST, "/check_writability") => {
|
||||
info!("serving /check_writability POST request");
|
||||
let res = crate::checker::check_writability(&compute).await;
|
||||
match res {
|
||||
Ok(_) => Response::new(Body::from("true")),
|
||||
Err(e) => Response::new(Body::from(e.to_string())),
|
||||
@@ -49,7 +76,7 @@ async fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Respons
|
||||
|
||||
// Main Hyper HTTP server function that runs it and blocks waiting on it forever.
|
||||
#[tokio::main]
|
||||
async fn serve(state: Arc<RwLock<ComputeState>>) {
|
||||
async fn serve(state: Arc<ComputeNode>) {
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], 3080));
|
||||
|
||||
let make_service = make_service_fn(move |_conn| {
|
||||
@@ -73,7 +100,7 @@ async fn serve(state: Arc<RwLock<ComputeState>>) {
|
||||
}
|
||||
|
||||
/// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
|
||||
pub fn launch_http_server(state: &Arc<RwLock<ComputeState>>) -> Result<thread::JoinHandle<()>> {
|
||||
pub fn launch_http_server(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
|
||||
let state = Arc::clone(state);
|
||||
|
||||
Ok(thread::Builder::new()
|
||||
1
compute_tools/src/http/mod.rs
Normal file
1
compute_tools/src/http/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod api;
|
||||
158
compute_tools/src/http/openapi_spec.yaml
Normal file
158
compute_tools/src/http/openapi_spec.yaml
Normal file
@@ -0,0 +1,158 @@
|
||||
openapi: "3.0.2"
|
||||
info:
|
||||
title: Compute node control API
|
||||
version: "1.0"
|
||||
|
||||
servers:
|
||||
- url: "http://localhost:3080"
|
||||
|
||||
paths:
|
||||
/status:
|
||||
get:
|
||||
tags:
|
||||
- "info"
|
||||
summary: Get compute node internal status
|
||||
description: ""
|
||||
operationId: getComputeStatus
|
||||
responses:
|
||||
"200":
|
||||
description: ComputeState
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ComputeState"
|
||||
|
||||
/metrics.json:
|
||||
get:
|
||||
tags:
|
||||
- "info"
|
||||
summary: Get compute node startup metrics in JSON format
|
||||
description: ""
|
||||
operationId: getComputeMetricsJSON
|
||||
responses:
|
||||
"200":
|
||||
description: ComputeMetrics
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ComputeMetrics"
|
||||
|
||||
/ready:
|
||||
get:
|
||||
deprecated: true
|
||||
tags:
|
||||
- "info"
|
||||
summary: Check whether compute startup process finished successfully
|
||||
description: ""
|
||||
operationId: computeIsReady
|
||||
responses:
|
||||
"200":
|
||||
description: Compute is ready ('true') or not ('false')
|
||||
content:
|
||||
text/plain:
|
||||
schema:
|
||||
type: string
|
||||
example: "true"
|
||||
|
||||
/last_activity:
|
||||
get:
|
||||
deprecated: true
|
||||
tags:
|
||||
- "info"
|
||||
summary: Get timestamp of the last compute activity
|
||||
description: ""
|
||||
operationId: getLastComputeActivityTS
|
||||
responses:
|
||||
"200":
|
||||
description: Timestamp of the last compute activity
|
||||
content:
|
||||
text/plain:
|
||||
schema:
|
||||
type: string
|
||||
example: "2022-10-12T07:20:50.52Z"
|
||||
|
||||
/check_writability:
|
||||
get:
|
||||
deprecated: true
|
||||
tags:
|
||||
- "check"
|
||||
summary: Check that we can write new data on this compute
|
||||
description: ""
|
||||
operationId: checkComputeWritabilityDeprecated
|
||||
responses:
|
||||
"200":
|
||||
description: Check result
|
||||
content:
|
||||
text/plain:
|
||||
schema:
|
||||
type: string
|
||||
description: Error text or 'true' if check passed
|
||||
example: "true"
|
||||
|
||||
post:
|
||||
tags:
|
||||
- "check"
|
||||
summary: Check that we can write new data on this compute
|
||||
description: ""
|
||||
operationId: checkComputeWritability
|
||||
responses:
|
||||
"200":
|
||||
description: Check result
|
||||
content:
|
||||
text/plain:
|
||||
schema:
|
||||
type: string
|
||||
description: Error text or 'true' if check passed
|
||||
example: "true"
|
||||
|
||||
components:
|
||||
securitySchemes:
|
||||
JWT:
|
||||
type: http
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
|
||||
schemas:
|
||||
ComputeMetrics:
|
||||
type: object
|
||||
description: Compute startup metrics
|
||||
required:
|
||||
- sync_safekeepers_ms
|
||||
- basebackup_ms
|
||||
- config_ms
|
||||
- total_startup_ms
|
||||
properties:
|
||||
sync_safekeepers_ms:
|
||||
type: integer
|
||||
basebackup_ms:
|
||||
type: integer
|
||||
config_ms:
|
||||
type: integer
|
||||
total_startup_ms:
|
||||
type: integer
|
||||
|
||||
ComputeState:
|
||||
type: object
|
||||
required:
|
||||
- status
|
||||
- last_active
|
||||
properties:
|
||||
status:
|
||||
$ref: '#/components/schemas/ComputeStatus'
|
||||
last_active:
|
||||
type: string
|
||||
description: The last detected compute activity timestamp in UTC and RFC3339 format
|
||||
example: "2022-10-12T07:20:50.52Z"
|
||||
error:
|
||||
type: string
|
||||
description: Text of the error during compute startup, if any
|
||||
|
||||
ComputeStatus:
|
||||
type: string
|
||||
enum:
|
||||
- init
|
||||
- failed
|
||||
- running
|
||||
|
||||
security:
|
||||
- JWT: []
|
||||
@@ -4,11 +4,11 @@
|
||||
//!
|
||||
pub mod checker;
|
||||
pub mod config;
|
||||
pub mod http_api;
|
||||
pub mod http;
|
||||
#[macro_use]
|
||||
pub mod logger;
|
||||
pub mod compute;
|
||||
pub mod monitor;
|
||||
pub mod params;
|
||||
pub mod pg_helpers;
|
||||
pub mod spec;
|
||||
pub mod zenith;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::sync::Arc;
|
||||
use std::{thread, time};
|
||||
|
||||
use anyhow::Result;
|
||||
@@ -6,18 +6,18 @@ use chrono::{DateTime, Utc};
|
||||
use log::{debug, info};
|
||||
use postgres::{Client, NoTls};
|
||||
|
||||
use crate::zenith::ComputeState;
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds
|
||||
|
||||
// Spin in a loop and figure out the last activity time in the Postgres.
|
||||
// Then update it in the shared state. This function never errors out.
|
||||
// XXX: the only expected panic is at `RwLock` unwrap().
|
||||
fn watch_compute_activity(state: &Arc<RwLock<ComputeState>>) {
|
||||
fn watch_compute_activity(compute: &ComputeNode) {
|
||||
// Suppose that `connstr` doesn't change
|
||||
let connstr = state.read().unwrap().connstr.clone();
|
||||
let connstr = compute.connstr.as_str();
|
||||
// Define `client` outside of the loop to reuse existing connection if it's active.
|
||||
let mut client = Client::connect(&connstr, NoTls);
|
||||
let mut client = Client::connect(connstr, NoTls);
|
||||
let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);
|
||||
|
||||
info!("watching Postgres activity at {}", connstr);
|
||||
@@ -32,7 +32,7 @@ fn watch_compute_activity(state: &Arc<RwLock<ComputeState>>) {
|
||||
info!("connection to postgres closed, trying to reconnect");
|
||||
|
||||
// Connection is closed, reconnect and try again.
|
||||
client = Client::connect(&connstr, NoTls);
|
||||
client = Client::connect(connstr, NoTls);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -43,10 +43,10 @@ fn watch_compute_activity(state: &Arc<RwLock<ComputeState>>) {
|
||||
FROM pg_stat_activity
|
||||
WHERE backend_type = 'client backend'
|
||||
AND pid != pg_backend_pid()
|
||||
AND usename != 'zenith_admin';", // XXX: find a better way to filter other monitors?
|
||||
AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
|
||||
&[],
|
||||
);
|
||||
let mut last_active = state.read().unwrap().last_active;
|
||||
let mut last_active = compute.state.read().unwrap().last_active;
|
||||
|
||||
if let Ok(backs) = backends {
|
||||
let mut idle_backs: Vec<DateTime<Utc>> = vec![];
|
||||
@@ -83,24 +83,24 @@ fn watch_compute_activity(state: &Arc<RwLock<ComputeState>>) {
|
||||
}
|
||||
|
||||
// Update the last activity in the shared state if we got a more recent one.
|
||||
let mut state = state.write().unwrap();
|
||||
let mut state = compute.state.write().unwrap();
|
||||
if last_active > state.last_active {
|
||||
state.last_active = last_active;
|
||||
debug!("set the last compute activity time to: {}", last_active);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
info!("cannot connect to postgres: {}, retrying", e);
|
||||
debug!("cannot connect to postgres: {}, retrying", e);
|
||||
|
||||
// Establish a new connection and try again.
|
||||
client = Client::connect(&connstr, NoTls);
|
||||
client = Client::connect(connstr, NoTls);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Launch a separate compute monitor thread and return its `JoinHandle`.
|
||||
pub fn launch_monitor(state: &Arc<RwLock<ComputeState>>) -> Result<thread::JoinHandle<()>> {
|
||||
pub fn launch_monitor(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
|
||||
let state = Arc::clone(state);
|
||||
|
||||
Ok(thread::Builder::new()
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use std::fmt::Write;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::net::{SocketAddr, TcpStream};
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::process::Child;
|
||||
use std::str::FromStr;
|
||||
use std::{fs, thread, time};
|
||||
|
||||
@@ -136,9 +139,11 @@ impl Role {
|
||||
// Now we also support SCRAM-SHA-256 and to preserve compatibility
|
||||
// we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
|
||||
if pass.starts_with("SCRAM-SHA-256") {
|
||||
params.push_str(&format!(" PASSWORD '{}'", pass));
|
||||
write!(params, " PASSWORD '{pass}'")
|
||||
.expect("String is documented to not to error during write operations");
|
||||
} else {
|
||||
params.push_str(&format!(" PASSWORD 'md5{}'", pass));
|
||||
write!(params, " PASSWORD 'md5{pass}'")
|
||||
.expect("String is documented to not to error during write operations");
|
||||
}
|
||||
} else {
|
||||
params.push_str(" PASSWORD NULL");
|
||||
@@ -156,7 +161,8 @@ impl Database {
|
||||
/// it may require a proper quoting too.
|
||||
pub fn to_pg_options(&self) -> String {
|
||||
let mut params: String = self.options.as_pg_options();
|
||||
params.push_str(&format!(" OWNER {}", &self.owner.quote()));
|
||||
write!(params, " OWNER {}", &self.owner.quote())
|
||||
.expect("String is documented to not to error during write operations");
|
||||
|
||||
params
|
||||
}
|
||||
@@ -220,12 +226,12 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
|
||||
/// Wait for Postgres to become ready to accept connections:
|
||||
/// - state should be `ready` in the `pgdata/postmaster.pid`
|
||||
/// - and we should be able to connect to 127.0.0.1:5432
|
||||
pub fn wait_for_postgres(port: &str, pgdata: &Path) -> Result<()> {
|
||||
pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()> {
|
||||
let pid_path = pgdata.join("postmaster.pid");
|
||||
let mut slept: u64 = 0; // ms
|
||||
let pause = time::Duration::from_millis(100);
|
||||
|
||||
let timeout = time::Duration::from_millis(200);
|
||||
let timeout = time::Duration::from_millis(10);
|
||||
let addr = SocketAddr::from_str(&format!("127.0.0.1:{}", port)).unwrap();
|
||||
|
||||
loop {
|
||||
@@ -236,14 +242,19 @@ pub fn wait_for_postgres(port: &str, pgdata: &Path) -> Result<()> {
|
||||
bail!("timed out while waiting for Postgres to start");
|
||||
}
|
||||
|
||||
if let Ok(Some(status)) = pg.try_wait() {
|
||||
// Postgres exited, that is not what we expected, bail out earlier.
|
||||
let code = status.code().unwrap_or(-1);
|
||||
bail!("Postgres exited unexpectedly with code {}", code);
|
||||
}
|
||||
|
||||
if pid_path.exists() {
|
||||
// XXX: dumb and the simplest way to get the last line in a text file
|
||||
// TODO: better use `.lines().last()` later
|
||||
let stdout = Command::new("tail")
|
||||
.args(&["-n1", pid_path.to_str().unwrap()])
|
||||
.output()?
|
||||
.stdout;
|
||||
let status = String::from_utf8(stdout)?;
|
||||
let file = BufReader::new(File::open(&pid_path)?);
|
||||
let status = file
|
||||
.lines()
|
||||
.last()
|
||||
.unwrap()
|
||||
.unwrap_or_else(|_| "unknown".to_string());
|
||||
let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
|
||||
|
||||
// Now Postgres is ready to accept connections
|
||||
|
||||
@@ -2,17 +2,55 @@ use std::path::Path;
|
||||
|
||||
use anyhow::Result;
|
||||
use log::{info, log_enabled, warn, Level};
|
||||
use postgres::Client;
|
||||
use postgres::{Client, NoTls};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::config;
|
||||
use crate::params::PG_HBA_ALL_MD5;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::zenith::ClusterSpec;
|
||||
|
||||
/// Cluster spec or configuration represented as an optional number of
|
||||
/// delta operations + final cluster state description.
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct ComputeSpec {
|
||||
pub format_version: f32,
|
||||
pub timestamp: String,
|
||||
pub operation_uuid: Option<String>,
|
||||
/// Expected cluster state at the end of transition process.
|
||||
pub cluster: Cluster,
|
||||
pub delta_operations: Option<Vec<DeltaOp>>,
|
||||
}
|
||||
|
||||
/// Cluster state seen from the perspective of the external tools
|
||||
/// like Rails web console.
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct Cluster {
|
||||
pub cluster_id: String,
|
||||
pub name: String,
|
||||
pub state: Option<String>,
|
||||
pub roles: Vec<Role>,
|
||||
pub databases: Vec<Database>,
|
||||
pub settings: GenericOptions,
|
||||
}
|
||||
|
||||
/// Single cluster state changing operation that could not be represented as
|
||||
/// a static `Cluster` structure. For example:
|
||||
/// - DROP DATABASE
|
||||
/// - DROP ROLE
|
||||
/// - ALTER ROLE name RENAME TO new_name
|
||||
/// - ALTER DATABASE name RENAME TO new_name
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct DeltaOp {
|
||||
pub action: String,
|
||||
pub name: PgIdent,
|
||||
pub new_name: Option<PgIdent>,
|
||||
}
|
||||
|
||||
/// It takes cluster specification and does the following:
|
||||
/// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
|
||||
/// - Update `pg_hba.conf` to allow external connections.
|
||||
pub fn handle_configuration(spec: &ClusterSpec, pgdata_path: &Path) -> Result<()> {
|
||||
pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
|
||||
// File `postgresql.conf` is no longer included into `basebackup`, so just
|
||||
// always write all config into it creating new file.
|
||||
config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
|
||||
@@ -39,7 +77,7 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
|
||||
|
||||
/// Given a cluster spec json and open transaction it handles roles creation,
|
||||
/// deletion and update.
|
||||
pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
|
||||
pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
let mut xact = client.transaction()?;
|
||||
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
|
||||
|
||||
@@ -60,18 +98,13 @@ pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
|
||||
|
||||
// Process delta operations first
|
||||
if let Some(ops) = &spec.delta_operations {
|
||||
info!("processing delta operations on roles");
|
||||
info!("processing role renames");
|
||||
for op in ops {
|
||||
match op.action.as_ref() {
|
||||
// We do not check either role exists or not,
|
||||
// Postgres will take care of it for us
|
||||
"delete_role" => {
|
||||
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
|
||||
|
||||
warn!("deleting role '{}'", &op.name);
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
// no-op now, roles will be deleted at the end of configuration
|
||||
}
|
||||
// Renaming role drops its password, since tole name is
|
||||
// Renaming role drops its password, since role name is
|
||||
// used as a salt there. It is important that this role
|
||||
// is recorded with a new `name` in the `roles` list.
|
||||
// Follow up roles update will set the new password.
|
||||
@@ -136,13 +169,20 @@ pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
}
|
||||
} else {
|
||||
info!("role name {}", &name);
|
||||
info!("role name: '{}'", &name);
|
||||
let mut query: String = format!("CREATE ROLE {} ", name.quote());
|
||||
info!("role create query {}", &query);
|
||||
info!("role create query: '{}'", &query);
|
||||
info_print!(" -> create");
|
||||
|
||||
query.push_str(&role.to_pg_options());
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
|
||||
let grant_query = format!(
|
||||
"GRANT pg_read_all_data, pg_write_all_data TO {}",
|
||||
name.quote()
|
||||
);
|
||||
xact.execute(grant_query.as_str(), &[])?;
|
||||
info!("role grant query: '{}'", &grant_query);
|
||||
}
|
||||
|
||||
info_print!("\n");
|
||||
@@ -153,12 +193,76 @@ pub fn handle_roles(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reassign all dependent objects and delete requested roles.
|
||||
pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
||||
let spec = &node.spec;
|
||||
|
||||
// First, reassign all dependent objects to db owners.
|
||||
if let Some(ops) = &spec.delta_operations {
|
||||
info!("reassigning dependent objects of to-be-deleted roles");
|
||||
for op in ops {
|
||||
if op.action == "delete_role" {
|
||||
reassign_owned_objects(node, &op.name)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Second, proceed with role deletions.
|
||||
let mut xact = client.transaction()?;
|
||||
if let Some(ops) = &spec.delta_operations {
|
||||
info!("processing role deletions");
|
||||
for op in ops {
|
||||
// We do not check either role exists or not,
|
||||
// Postgres will take care of it for us
|
||||
if op.action == "delete_role" {
|
||||
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
|
||||
|
||||
warn!("deleting role '{}'", &op.name);
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Reassign all owned objects in all databases to the owner of the database.
|
||||
fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
|
||||
for db in &node.spec.cluster.databases {
|
||||
if db.owner != *role_name {
|
||||
let mut connstr = node.connstr.clone();
|
||||
// database name is always the last and the only component of the path
|
||||
connstr.set_path(&db.name);
|
||||
|
||||
let mut client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
|
||||
// This will reassign all dependent objects to the db owner
|
||||
let reassign_query = format!(
|
||||
"REASSIGN OWNED BY {} TO {}",
|
||||
role_name.quote(),
|
||||
db.owner.quote()
|
||||
);
|
||||
info!(
|
||||
"reassigning objects owned by '{}' in db '{}' to '{}'",
|
||||
role_name, &db.name, &db.owner
|
||||
);
|
||||
client.simple_query(&reassign_query)?;
|
||||
|
||||
// This now will only drop privileges of the role
|
||||
let drop_query = format!("DROP OWNED BY {}", role_name.quote());
|
||||
client.simple_query(&drop_query)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// It follows mostly the same logic as `handle_roles()` excepting that we
|
||||
/// does not use an explicit transactions block, since major database operations
|
||||
/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
|
||||
/// atomicity should be enough here due to the order of operations and various checks,
|
||||
/// which together provide us idempotency.
|
||||
pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
|
||||
pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
let existing_dbs: Vec<Database> = get_existing_dbs(client)?;
|
||||
|
||||
// Print a list of existing Postgres databases (only in debug mode)
|
||||
@@ -247,16 +351,29 @@ pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
|
||||
|
||||
// Grant CREATE ON DATABASE to the database owner
|
||||
// to allow clients create trusted extensions.
|
||||
pub fn handle_grants(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
|
||||
pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
info!("cluster spec grants:");
|
||||
|
||||
// We now have a separate `web_access` role to connect to the database
|
||||
// via the web interface and proxy link auth. And also we grant a
|
||||
// read / write all data privilege to every role. So also grant
|
||||
// create to everyone.
|
||||
// XXX: later we should stop messing with Postgres ACL in such horrible
|
||||
// ways.
|
||||
let roles = spec
|
||||
.cluster
|
||||
.roles
|
||||
.iter()
|
||||
.map(|r| r.name.quote())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for db in &spec.cluster.databases {
|
||||
let dbname = &db.name;
|
||||
|
||||
let query: String = format!(
|
||||
"GRANT CREATE ON DATABASE {} TO {}",
|
||||
dbname.quote(),
|
||||
db.owner.quote()
|
||||
roles.join(", ")
|
||||
);
|
||||
info!("grant query {}", &query);
|
||||
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
use anyhow::Result;
|
||||
use chrono::{DateTime, Utc};
|
||||
use postgres::{Client, NoTls};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::pg_helpers::*;
|
||||
|
||||
/// Compute node state shared across several `zenith_ctl` threads.
|
||||
/// Should be used under `RwLock` to allow HTTP API server to serve
|
||||
/// status requests, while configuration is in progress.
|
||||
pub struct ComputeState {
|
||||
pub connstr: String,
|
||||
pub pgdata: String,
|
||||
pub pgbin: String,
|
||||
pub spec: ClusterSpec,
|
||||
/// Compute setup process has finished
|
||||
pub ready: bool,
|
||||
/// Timestamp of the last Postgres activity
|
||||
pub last_active: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Cluster spec or configuration represented as an optional number of
|
||||
/// delta operations + final cluster state description.
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct ClusterSpec {
|
||||
pub format_version: f32,
|
||||
pub timestamp: String,
|
||||
pub operation_uuid: Option<String>,
|
||||
/// Expected cluster state at the end of transition process.
|
||||
pub cluster: Cluster,
|
||||
pub delta_operations: Option<Vec<DeltaOp>>,
|
||||
}
|
||||
|
||||
/// Cluster state seen from the perspective of the external tools
|
||||
/// like Rails web console.
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct Cluster {
|
||||
pub cluster_id: String,
|
||||
pub name: String,
|
||||
pub state: Option<String>,
|
||||
pub roles: Vec<Role>,
|
||||
pub databases: Vec<Database>,
|
||||
pub settings: GenericOptions,
|
||||
}
|
||||
|
||||
/// Single cluster state changing operation that could not be represented as
|
||||
/// a static `Cluster` structure. For example:
|
||||
/// - DROP DATABASE
|
||||
/// - DROP ROLE
|
||||
/// - ALTER ROLE name RENAME TO new_name
|
||||
/// - ALTER DATABASE name RENAME TO new_name
|
||||
#[derive(Clone, Deserialize)]
|
||||
pub struct DeltaOp {
|
||||
pub action: String,
|
||||
pub name: PgIdent,
|
||||
pub new_name: Option<PgIdent>,
|
||||
}
|
||||
|
||||
/// Get basebackup from the libpq connection to pageserver using `connstr` and
|
||||
/// unarchive it to `pgdata` directory overriding all its previous content.
|
||||
pub fn get_basebackup(
|
||||
pgdata: &str,
|
||||
connstr: &str,
|
||||
tenant: &str,
|
||||
timeline: &str,
|
||||
lsn: &str,
|
||||
) -> Result<()> {
|
||||
let mut client = Client::connect(connstr, NoTls)?;
|
||||
let basebackup_cmd = match lsn {
|
||||
"0/0" => format!("basebackup {} {}", tenant, timeline), // First start of the compute
|
||||
_ => format!("basebackup {} {} {}", tenant, timeline, lsn),
|
||||
};
|
||||
let copyreader = client.copy_out(basebackup_cmd.as_str())?;
|
||||
let mut ar = tar::Archive::new(copyreader);
|
||||
|
||||
ar.unpack(&pgdata)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run `postgres` in a special mode with `--sync-safekeepers` argument
|
||||
/// and return the reported LSN back to the caller.
|
||||
pub fn sync_safekeepers(pgdata: &str, pgbin: &str) -> Result<String> {
|
||||
let sync_handle = Command::new(&pgbin)
|
||||
.args(&["--sync-safekeepers"])
|
||||
.env("PGDATA", &pgdata) // we cannot use -D in this mode
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
.expect("postgres --sync-safekeepers failed to start");
|
||||
|
||||
// `postgres --sync-safekeepers` will print all log output to stderr and
|
||||
// final LSN to stdout. So we pipe only stdout, while stderr will be automatically
|
||||
// redirected to the caller output.
|
||||
let sync_output = sync_handle
|
||||
.wait_with_output()
|
||||
.expect("postgres --sync-safekeepers failed");
|
||||
if !sync_output.status.success() {
|
||||
anyhow::bail!(
|
||||
"postgres --sync-safekeepers exited with non-zero status: {}",
|
||||
sync_output.status,
|
||||
);
|
||||
}
|
||||
|
||||
let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());
|
||||
|
||||
Ok(lsn)
|
||||
}
|
||||
@@ -85,7 +85,7 @@
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "wal_acceptors",
|
||||
"name": "safekeepers",
|
||||
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
|
||||
"vartype": "string"
|
||||
},
|
||||
@@ -150,7 +150,7 @@
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "zenith.zenith_tenant",
|
||||
"name": "neon.tenant_id",
|
||||
"value": "b0554b632bd4d547a63b86c3630317e8",
|
||||
"vartype": "string"
|
||||
},
|
||||
@@ -160,13 +160,13 @@
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "zenith.zenith_timeline",
|
||||
"name": "neon.timeline_id",
|
||||
"value": "2414a61ffc94e428f14b5758fe308e13",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "shared_preload_libraries",
|
||||
"value": "zenith",
|
||||
"value": "neon",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
@@ -175,7 +175,7 @@
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "zenith.page_server_connstring",
|
||||
"name": "neon.pageserver_connstring",
|
||||
"value": "host=127.0.0.1 port=6400",
|
||||
"vartype": "string"
|
||||
}
|
||||
|
||||
@@ -4,12 +4,12 @@ mod pg_helpers_tests {
|
||||
use std::fs::File;
|
||||
|
||||
use compute_tools::pg_helpers::*;
|
||||
use compute_tools::zenith::ClusterSpec;
|
||||
use compute_tools::spec::ComputeSpec;
|
||||
|
||||
#[test]
|
||||
fn params_serialize() {
|
||||
let file = File::open("tests/cluster_spec.json").unwrap();
|
||||
let spec: ClusterSpec = serde_json::from_reader(file).unwrap();
|
||||
let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
spec.cluster.databases.first().unwrap().to_pg_options(),
|
||||
@@ -24,11 +24,11 @@ mod pg_helpers_tests {
|
||||
#[test]
|
||||
fn settings_serialize() {
|
||||
let file = File::open("tests/cluster_spec.json").unwrap();
|
||||
let spec: ClusterSpec = serde_json::from_reader(file).unwrap();
|
||||
let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
spec.cluster.settings.as_pg_settings(),
|
||||
"fsync = off\nwal_level = replica\nhot_standby = on\nwal_acceptors = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nzenith.zenith_tenant = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nzenith.zenith_timeline = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'zenith'\nsynchronous_standby_names = 'walproposer'\nzenith.page_server_connstring = 'host=127.0.0.1 port=6400'"
|
||||
"fsync = off\nwal_level = replica\nhot_standby = on\nsafekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
tar = "0.4.33"
|
||||
tar = "0.4.38"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = "1.12.0"
|
||||
|
||||
@@ -9,3 +9,6 @@ auth_type = 'Trust'
|
||||
id = 1
|
||||
pg_port = 5454
|
||||
http_port = 7676
|
||||
|
||||
[etcd_broker]
|
||||
broker_endpoints = ['http://127.0.0.1:2379']
|
||||
|
||||
@@ -148,9 +148,9 @@ impl PostgresNode {
|
||||
// Read a few options from the config file
|
||||
let context = format!("in config file {}", cfg_path_str);
|
||||
let port: u16 = conf.parse_field("port", &context)?;
|
||||
let timeline_id: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?;
|
||||
let tenant_id: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?;
|
||||
let uses_wal_proposer = conf.get("wal_acceptors").is_some();
|
||||
let timeline_id: ZTimelineId = conf.parse_field("neon.timeline_id", &context)?;
|
||||
let tenant_id: ZTenantId = conf.parse_field("neon.tenant_id", &context)?;
|
||||
let uses_wal_proposer = conf.get("safekeepers").is_some();
|
||||
|
||||
// parse recovery_target_lsn, if any
|
||||
let recovery_target_lsn: Option<Lsn> =
|
||||
@@ -231,8 +231,13 @@ impl PostgresNode {
|
||||
.context("page server 'basebackup' command failed")?;
|
||||
|
||||
// Read the archive directly from the `CopyOutReader`
|
||||
tar::Archive::new(copyreader)
|
||||
.unpack(&self.pgdata())
|
||||
//
|
||||
// Set `ignore_zeros` so that unpack() reads all the Copy data and
|
||||
// doesn't stop at the end-of-archive marker. Otherwise, if the server
|
||||
// sends an Error after finishing the tarball, we will not notice it.
|
||||
let mut ar = tar::Archive::new(copyreader);
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.pgdata())
|
||||
.context("extracting base backup failed")?;
|
||||
|
||||
Ok(())
|
||||
@@ -274,6 +279,8 @@ impl PostgresNode {
|
||||
conf.append("listen_addresses", &self.address.ip().to_string());
|
||||
conf.append("port", &self.address.port().to_string());
|
||||
conf.append("wal_keep_size", "0");
|
||||
// walproposer panics when basebackup is invalid, it is pointless to restart in this case.
|
||||
conf.append("restart_after_crash", "off");
|
||||
|
||||
// Configure the node to fetch pages from pageserver
|
||||
let pageserver_connstr = {
|
||||
@@ -296,11 +303,11 @@ impl PostgresNode {
|
||||
// uses only needed variables namely host, port, user, password.
|
||||
format!("postgresql://no_user:{}@{}:{}", password, host, port)
|
||||
};
|
||||
conf.append("shared_preload_libraries", "zenith");
|
||||
conf.append("shared_preload_libraries", "neon");
|
||||
conf.append_line("");
|
||||
conf.append("zenith.page_server_connstring", &pageserver_connstr);
|
||||
conf.append("zenith.zenith_tenant", &self.tenant_id.to_string());
|
||||
conf.append("zenith.zenith_timeline", &self.timeline_id.to_string());
|
||||
conf.append("neon.pageserver_connstring", &pageserver_connstr);
|
||||
conf.append("neon.tenant_id", &self.tenant_id.to_string());
|
||||
conf.append("neon.timeline_id", &self.timeline_id.to_string());
|
||||
if let Some(lsn) = self.lsn {
|
||||
conf.append("recovery_target_lsn", &lsn.to_string());
|
||||
}
|
||||
@@ -334,7 +341,7 @@ impl PostgresNode {
|
||||
.map(|sk| format!("localhost:{}", sk.pg_port))
|
||||
.collect::<Vec<String>>()
|
||||
.join(",");
|
||||
conf.append("wal_acceptors", &safekeepers);
|
||||
conf.append("safekeepers", &safekeepers);
|
||||
} else {
|
||||
// We only use setup without safekeepers for tests,
|
||||
// and don't care about data durability on pageserver,
|
||||
@@ -345,7 +352,6 @@ impl PostgresNode {
|
||||
// This isn't really a supported configuration, but can be useful for
|
||||
// testing.
|
||||
conf.append("synchronous_standby_names", "pageserver");
|
||||
conf.append("zenith.callmemaybe_connstring", &self.connstr());
|
||||
}
|
||||
|
||||
let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
|
||||
@@ -492,7 +498,7 @@ impl PostgresNode {
|
||||
"host={} port={} user={} dbname={}",
|
||||
self.address.ip(),
|
||||
self.address.port(),
|
||||
"zenith_admin",
|
||||
"cloud_admin",
|
||||
"postgres"
|
||||
)
|
||||
}
|
||||
|
||||
97
control_plane/src/etcd.rs
Normal file
97
control_plane/src/etcd.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
use std::{
|
||||
fs,
|
||||
path::PathBuf,
|
||||
process::{Command, Stdio},
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use nix::{
|
||||
sys::signal::{kill, Signal},
|
||||
unistd::Pid,
|
||||
};
|
||||
|
||||
use crate::{local_env, read_pidfile};
|
||||
|
||||
pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
let etcd_broker = &env.etcd_broker;
|
||||
println!(
|
||||
"Starting etcd broker using {}",
|
||||
etcd_broker.etcd_binary_path.display()
|
||||
);
|
||||
|
||||
let etcd_data_dir = env.base_data_dir.join("etcd");
|
||||
fs::create_dir_all(&etcd_data_dir).with_context(|| {
|
||||
format!(
|
||||
"Failed to create etcd data dir: {}",
|
||||
etcd_data_dir.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let etcd_stdout_file =
|
||||
fs::File::create(etcd_data_dir.join("etcd.stdout.log")).with_context(|| {
|
||||
format!(
|
||||
"Failed to create ectd stout file in directory {}",
|
||||
etcd_data_dir.display()
|
||||
)
|
||||
})?;
|
||||
let etcd_stderr_file =
|
||||
fs::File::create(etcd_data_dir.join("etcd.stderr.log")).with_context(|| {
|
||||
format!(
|
||||
"Failed to create ectd stderr file in directory {}",
|
||||
etcd_data_dir.display()
|
||||
)
|
||||
})?;
|
||||
let client_urls = etcd_broker.comma_separated_endpoints();
|
||||
|
||||
let etcd_process = Command::new(&etcd_broker.etcd_binary_path)
|
||||
.args(&[
|
||||
format!("--data-dir={}", etcd_data_dir.display()),
|
||||
format!("--listen-client-urls={client_urls}"),
|
||||
format!("--advertise-client-urls={client_urls}"),
|
||||
// Set --quota-backend-bytes to keep the etcd virtual memory
|
||||
// size smaller. Our test etcd clusters are very small.
|
||||
// See https://github.com/etcd-io/etcd/issues/7910
|
||||
"--quota-backend-bytes=100000000".to_string(),
|
||||
])
|
||||
.stdout(Stdio::from(etcd_stdout_file))
|
||||
.stderr(Stdio::from(etcd_stderr_file))
|
||||
.spawn()
|
||||
.context("Failed to spawn etcd subprocess")?;
|
||||
let pid = etcd_process.id();
|
||||
|
||||
let etcd_pid_file_path = etcd_pid_file_path(env);
|
||||
fs::write(&etcd_pid_file_path, pid.to_string()).with_context(|| {
|
||||
format!(
|
||||
"Failed to create etcd pid file at {}",
|
||||
etcd_pid_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn stop_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
let etcd_path = &env.etcd_broker.etcd_binary_path;
|
||||
println!("Stopping etcd broker at {}", etcd_path.display());
|
||||
|
||||
let etcd_pid_file_path = etcd_pid_file_path(env);
|
||||
let pid = Pid::from_raw(read_pidfile(&etcd_pid_file_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to read etcd pid file at {}",
|
||||
etcd_pid_file_path.display()
|
||||
)
|
||||
})?);
|
||||
|
||||
kill(pid, Signal::SIGTERM).with_context(|| {
|
||||
format!(
|
||||
"Failed to stop etcd with pid {pid} at {}",
|
||||
etcd_pid_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn etcd_pid_file_path(env: &local_env::LocalEnv) -> PathBuf {
|
||||
env.base_data_dir.join("etcd.pid")
|
||||
}
|
||||
@@ -12,6 +12,7 @@ use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
pub mod compute;
|
||||
pub mod etcd;
|
||||
pub mod local_env;
|
||||
pub mod postgresql_conf;
|
||||
pub mod safekeeper;
|
||||
@@ -48,3 +49,12 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
|
||||
cmd
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
|
||||
if let Ok(value) = std::env::var(env_key) {
|
||||
cmd = cmd.env(env_key, value);
|
||||
}
|
||||
}
|
||||
cmd
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
//! script which will use local paths.
|
||||
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use reqwest::Url;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use std::collections::HashMap;
|
||||
@@ -14,15 +15,15 @@ use std::process::{Command, Stdio};
|
||||
use utils::{
|
||||
auth::{encode_from_key_file, Claims, Scope},
|
||||
postgres_backend::AuthType,
|
||||
zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
|
||||
//
|
||||
// This data structures represents zenith CLI config
|
||||
// This data structures represents neon_local CLI config
|
||||
//
|
||||
// It is deserialized from the .zenith/config file, or the config file passed
|
||||
// It is deserialized from the .neon/config file, or the config file passed
|
||||
// to 'zenith init --config=<path>' option. See control_plane/simple.conf for
|
||||
// an example.
|
||||
//
|
||||
@@ -33,8 +34,8 @@ pub struct LocalEnv {
|
||||
// compute nodes).
|
||||
//
|
||||
// This is not stored in the config file. Rather, this is the path where the
|
||||
// config file itself is. It is read from the ZENITH_REPO_DIR env variable or
|
||||
// '.zenith' if not given.
|
||||
// config file itself is. It is read from the NEON_REPO_DIR env variable or
|
||||
// '.neon' if not given.
|
||||
#[serde(skip)]
|
||||
pub base_data_dir: PathBuf,
|
||||
|
||||
@@ -59,13 +60,7 @@ pub struct LocalEnv {
|
||||
#[serde(default)]
|
||||
pub private_key_path: PathBuf,
|
||||
|
||||
// A comma separated broker (etcd) endpoints for storage nodes coordination, e.g. 'http://127.0.0.1:2379'.
|
||||
#[serde(default)]
|
||||
pub broker_endpoints: Option<String>,
|
||||
|
||||
/// A prefix to all to any key when pushing/polling etcd from a node.
|
||||
#[serde(default)]
|
||||
pub broker_etcd_prefix: Option<String>,
|
||||
pub etcd_broker: EtcdBroker,
|
||||
|
||||
pub pageserver: PageServerConf,
|
||||
|
||||
@@ -81,11 +76,75 @@ pub struct LocalEnv {
|
||||
branch_name_mappings: HashMap<String, Vec<(ZTenantId, ZTimelineId)>>,
|
||||
}
|
||||
|
||||
/// Etcd broker config for cluster internal communication.
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
pub struct EtcdBroker {
|
||||
/// A prefix to all to any key when pushing/polling etcd from a node.
|
||||
#[serde(default)]
|
||||
pub broker_etcd_prefix: Option<String>,
|
||||
|
||||
/// Broker (etcd) endpoints for storage nodes coordination, e.g. 'http://127.0.0.1:2379'.
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Vec<DisplayFromStr>")]
|
||||
pub broker_endpoints: Vec<Url>,
|
||||
|
||||
/// Etcd binary path to use.
|
||||
#[serde(default)]
|
||||
pub etcd_binary_path: PathBuf,
|
||||
}
|
||||
|
||||
impl EtcdBroker {
|
||||
pub fn locate_etcd() -> anyhow::Result<PathBuf> {
|
||||
let which_output = Command::new("which")
|
||||
.arg("etcd")
|
||||
.output()
|
||||
.context("Failed to run 'which etcd' command")?;
|
||||
let stdout = String::from_utf8_lossy(&which_output.stdout);
|
||||
ensure!(
|
||||
which_output.status.success(),
|
||||
"'which etcd' invocation failed. Status: {}, stdout: {stdout}, stderr: {}",
|
||||
which_output.status,
|
||||
String::from_utf8_lossy(&which_output.stderr)
|
||||
);
|
||||
|
||||
let etcd_path = PathBuf::from(stdout.trim());
|
||||
ensure!(
|
||||
etcd_path.is_file(),
|
||||
"'which etcd' invocation was successful, but the path it returned is not a file or does not exist: {}",
|
||||
etcd_path.display()
|
||||
);
|
||||
|
||||
Ok(etcd_path)
|
||||
}
|
||||
|
||||
pub fn comma_separated_endpoints(&self) -> String {
|
||||
self.broker_endpoints
|
||||
.iter()
|
||||
.map(|url| {
|
||||
// URL by default adds a '/' path at the end, which is not what etcd CLI wants.
|
||||
let url_string = url.as_str();
|
||||
if url_string.ends_with('/') {
|
||||
&url_string[0..url_string.len() - 1]
|
||||
} else {
|
||||
url_string
|
||||
}
|
||||
})
|
||||
.fold(String::new(), |mut comma_separated_urls, url| {
|
||||
if !comma_separated_urls.is_empty() {
|
||||
comma_separated_urls.push(',');
|
||||
}
|
||||
comma_separated_urls.push_str(url);
|
||||
comma_separated_urls
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct PageServerConf {
|
||||
// node id
|
||||
pub id: ZNodeId,
|
||||
pub id: NodeId,
|
||||
// Pageserver connection settings
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
@@ -100,7 +159,7 @@ pub struct PageServerConf {
|
||||
impl Default for PageServerConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
id: ZNodeId(0),
|
||||
id: NodeId(0),
|
||||
listen_pg_addr: String::new(),
|
||||
listen_http_addr: String::new(),
|
||||
auth_type: AuthType::Trust,
|
||||
@@ -112,19 +171,25 @@ impl Default for PageServerConf {
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct SafekeeperConf {
|
||||
pub id: ZNodeId,
|
||||
pub id: NodeId,
|
||||
pub pg_port: u16,
|
||||
pub http_port: u16,
|
||||
pub sync: bool,
|
||||
pub remote_storage: Option<String>,
|
||||
pub backup_threads: Option<u32>,
|
||||
pub auth_enabled: bool,
|
||||
}
|
||||
|
||||
impl Default for SafekeeperConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
id: ZNodeId(0),
|
||||
id: NodeId(0),
|
||||
pg_port: 0,
|
||||
http_port: 0,
|
||||
sync: true,
|
||||
remote_storage: None,
|
||||
backup_threads: None,
|
||||
auth_enabled: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -184,12 +249,7 @@ impl LocalEnv {
|
||||
if old_timeline_id == &timeline_id {
|
||||
Ok(())
|
||||
} else {
|
||||
bail!(
|
||||
"branch '{}' is already mapped to timeline {}, cannot map to another timeline {}",
|
||||
branch_name,
|
||||
old_timeline_id,
|
||||
timeline_id
|
||||
);
|
||||
bail!("branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}");
|
||||
}
|
||||
} else {
|
||||
existing_values.push((tenant_id, timeline_id));
|
||||
@@ -225,7 +285,7 @@ impl LocalEnv {
|
||||
///
|
||||
/// Unlike 'load_config', this function fills in any defaults that are missing
|
||||
/// from the config file.
|
||||
pub fn create_config(toml: &str) -> anyhow::Result<Self> {
|
||||
pub fn parse_config(toml: &str) -> anyhow::Result<Self> {
|
||||
let mut env: LocalEnv = toml::from_str(toml)?;
|
||||
|
||||
// Find postgres binaries.
|
||||
@@ -238,26 +298,11 @@ impl LocalEnv {
|
||||
env.pg_distrib_dir = cwd.join("tmp_install")
|
||||
}
|
||||
}
|
||||
if !env.pg_distrib_dir.join("bin/postgres").exists() {
|
||||
bail!(
|
||||
"Can't find postgres binary at {}",
|
||||
env.pg_distrib_dir.display()
|
||||
);
|
||||
}
|
||||
|
||||
// Find zenith binaries.
|
||||
if env.zenith_distrib_dir == Path::new("") {
|
||||
env.zenith_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
|
||||
}
|
||||
for binary in ["pageserver", "safekeeper"] {
|
||||
if !env.zenith_distrib_dir.join(binary).exists() {
|
||||
bail!(
|
||||
"Can't find binary '{}' in zenith distrib dir '{}'",
|
||||
binary,
|
||||
env.zenith_distrib_dir.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// If no initial tenant ID was given, generate it.
|
||||
if env.default_tenant_id.is_none() {
|
||||
@@ -294,7 +339,7 @@ impl LocalEnv {
|
||||
pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
|
||||
// Currently, the user first passes a config file with 'zenith init --config=<path>'
|
||||
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
|
||||
// to .zenith/config. TODO: We lose any formatting and comments along the way, which is
|
||||
// to .neon/config. TODO: We lose any formatting and comments along the way, which is
|
||||
// a bit sad.
|
||||
let mut conf_content = r#"# This file describes a locale deployment of the page server
|
||||
# and safekeeeper node. It is read by the 'zenith' command-line
|
||||
@@ -346,11 +391,26 @@ impl LocalEnv {
|
||||
base_path != Path::new(""),
|
||||
"repository base path is missing"
|
||||
);
|
||||
|
||||
ensure!(
|
||||
!base_path.exists(),
|
||||
"directory '{}' already exists. Perhaps already initialized?",
|
||||
base_path.display()
|
||||
);
|
||||
if !self.pg_distrib_dir.join("bin/postgres").exists() {
|
||||
bail!(
|
||||
"Can't find postgres binary at {}",
|
||||
self.pg_distrib_dir.display()
|
||||
);
|
||||
}
|
||||
for binary in ["pageserver", "safekeeper"] {
|
||||
if !self.zenith_distrib_dir.join(binary).exists() {
|
||||
bail!(
|
||||
"Can't find binary '{binary}' in zenith distrib dir '{}'",
|
||||
self.zenith_distrib_dir.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fs::create_dir(&base_path)?;
|
||||
|
||||
@@ -407,8 +467,36 @@ impl LocalEnv {
|
||||
}
|
||||
|
||||
fn base_path() -> PathBuf {
|
||||
match std::env::var_os("ZENITH_REPO_DIR") {
|
||||
Some(val) => PathBuf::from(val.to_str().unwrap()),
|
||||
None => ".zenith".into(),
|
||||
match std::env::var_os("NEON_REPO_DIR") {
|
||||
Some(val) => PathBuf::from(val),
|
||||
None => PathBuf::from(".neon"),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple_conf_parsing() {
|
||||
let simple_conf_toml = include_str!("../simple.conf");
|
||||
let simple_conf_parse_result = LocalEnv::parse_config(simple_conf_toml);
|
||||
assert!(
|
||||
simple_conf_parse_result.is_ok(),
|
||||
"failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
|
||||
);
|
||||
|
||||
let string_to_replace = "broker_endpoints = ['http://127.0.0.1:2379']";
|
||||
let spoiled_url_str = "broker_endpoints = ['!@$XOXO%^&']";
|
||||
let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
|
||||
assert!(
|
||||
spoiled_url_toml.contains(spoiled_url_str),
|
||||
"Failed to replace string {string_to_replace} in the toml file {simple_conf_toml}"
|
||||
);
|
||||
let spoiled_url_parse_result = LocalEnv::parse_config(&spoiled_url_toml);
|
||||
assert!(
|
||||
spoiled_url_parse_result.is_err(),
|
||||
"expected toml with invalid Url {spoiled_url_toml} to fail the parsing, but got {spoiled_url_parse_result:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,12 +18,12 @@ use thiserror::Error;
|
||||
use utils::{
|
||||
connstring::connection_address,
|
||||
http::error::HttpErrorBody,
|
||||
zid::{ZNodeId, ZTenantId, ZTimelineId},
|
||||
zid::{NodeId, ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::local_env::{LocalEnv, SafekeeperConf};
|
||||
use crate::storage::PageServerNode;
|
||||
use crate::{fill_rust_env_vars, read_pidfile};
|
||||
use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SafekeeperHttpError {
|
||||
@@ -52,7 +52,7 @@ impl ResponseErrorMessageExt for Response {
|
||||
Err(SafekeeperHttpError::Response(
|
||||
match self.json::<HttpErrorBody>() {
|
||||
Ok(err_body) => format!("Error: {}", err_body.msg),
|
||||
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
|
||||
Err(_) => format!("Http error ({}) at {url}.", status.as_u16()),
|
||||
},
|
||||
))
|
||||
}
|
||||
@@ -65,7 +65,7 @@ impl ResponseErrorMessageExt for Response {
|
||||
//
|
||||
#[derive(Debug)]
|
||||
pub struct SafekeeperNode {
|
||||
pub id: ZNodeId,
|
||||
pub id: NodeId,
|
||||
|
||||
pub conf: SafekeeperConf,
|
||||
|
||||
@@ -75,17 +75,12 @@ pub struct SafekeeperNode {
|
||||
pub http_base_url: String,
|
||||
|
||||
pub pageserver: Arc<PageServerNode>,
|
||||
|
||||
broker_endpoints: Option<String>,
|
||||
broker_etcd_prefix: Option<String>,
|
||||
}
|
||||
|
||||
impl SafekeeperNode {
|
||||
pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {
|
||||
let pageserver = Arc::new(PageServerNode::from_env(env));
|
||||
|
||||
println!("initializing for sk {} for {}", conf.id, conf.http_port);
|
||||
|
||||
SafekeeperNode {
|
||||
id: conf.id,
|
||||
conf: conf.clone(),
|
||||
@@ -94,8 +89,6 @@ impl SafekeeperNode {
|
||||
http_client: Client::new(),
|
||||
http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
|
||||
pageserver,
|
||||
broker_endpoints: env.broker_endpoints.clone(),
|
||||
broker_etcd_prefix: env.broker_etcd_prefix.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,7 +100,7 @@ impl SafekeeperNode {
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: ZNodeId) -> PathBuf {
|
||||
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
|
||||
env.safekeeper_data_dir(format!("sk{}", sk_id).as_ref())
|
||||
}
|
||||
|
||||
@@ -142,12 +135,27 @@ impl SafekeeperNode {
|
||||
if !self.conf.sync {
|
||||
cmd.arg("--no-sync");
|
||||
}
|
||||
if let Some(ref ep) = self.broker_endpoints {
|
||||
cmd.args(&["--broker-endpoints", ep]);
|
||||
|
||||
let comma_separated_endpoints = self.env.etcd_broker.comma_separated_endpoints();
|
||||
if !comma_separated_endpoints.is_empty() {
|
||||
cmd.args(&["--broker-endpoints", &comma_separated_endpoints]);
|
||||
}
|
||||
if let Some(prefix) = self.broker_etcd_prefix.as_deref() {
|
||||
if let Some(prefix) = self.env.etcd_broker.broker_etcd_prefix.as_deref() {
|
||||
cmd.args(&["--broker-etcd-prefix", prefix]);
|
||||
}
|
||||
if let Some(threads) = self.conf.backup_threads {
|
||||
cmd.args(&["--backup-threads", threads.to_string().as_ref()]);
|
||||
}
|
||||
if let Some(ref remote_storage) = self.conf.remote_storage {
|
||||
cmd.args(&["--remote-storage", remote_storage]);
|
||||
}
|
||||
if self.conf.auth_enabled {
|
||||
cmd.arg("--auth-validation-public-key-path");
|
||||
// PathBuf is better be passed as is, not via `String`.
|
||||
cmd.arg(self.env.base_data_dir.join("auth_public_key.pem"));
|
||||
}
|
||||
|
||||
fill_aws_secrets_vars(&mut cmd);
|
||||
|
||||
if !cmd.status()?.success() {
|
||||
bail!(
|
||||
@@ -210,12 +218,13 @@ impl SafekeeperNode {
|
||||
let pid = Pid::from_raw(pid);
|
||||
|
||||
let sig = if immediate {
|
||||
println!("Stop safekeeper immediately");
|
||||
print!("Stopping safekeeper {} immediately..", self.id);
|
||||
Signal::SIGQUIT
|
||||
} else {
|
||||
println!("Stop safekeeper gracefully");
|
||||
print!("Stopping safekeeper {} gracefully..", self.id);
|
||||
Signal::SIGTERM
|
||||
};
|
||||
io::stdout().flush().unwrap();
|
||||
match kill(pid, sig) {
|
||||
Ok(_) => (),
|
||||
Err(Errno::ESRCH) => {
|
||||
@@ -237,25 +246,35 @@ impl SafekeeperNode {
|
||||
// TODO Remove this "timeout" and handle it on caller side instead.
|
||||
// Shutting down may take a long time,
|
||||
// if safekeeper flushes a lot of data
|
||||
let mut tcp_stopped = false;
|
||||
for _ in 0..100 {
|
||||
if let Err(_e) = TcpStream::connect(&address) {
|
||||
println!("Safekeeper stopped receiving connections");
|
||||
|
||||
//Now check status
|
||||
match self.check_status() {
|
||||
Ok(_) => {
|
||||
println!("Safekeeper status is OK. Wait a bit.");
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
Err(err) => {
|
||||
println!("Safekeeper status is: {}", err);
|
||||
return Ok(());
|
||||
if !tcp_stopped {
|
||||
if let Err(err) = TcpStream::connect(&address) {
|
||||
tcp_stopped = true;
|
||||
if err.kind() != io::ErrorKind::ConnectionRefused {
|
||||
eprintln!("\nSafekeeper connection failed with error: {err}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("Safekeeper still receives connections");
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
if tcp_stopped {
|
||||
// Also check status on the HTTP port
|
||||
match self.check_status() {
|
||||
Err(SafekeeperHttpError::Transport(err)) if err.is_connect() => {
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("\nSafekeeper status check failed with error: {err}");
|
||||
return Ok(());
|
||||
}
|
||||
Ok(()) => {
|
||||
// keep waiting
|
||||
}
|
||||
}
|
||||
}
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
|
||||
bail!("Failed to stop safekeeper with pid {}", pid);
|
||||
@@ -280,7 +299,7 @@ impl SafekeeperNode {
|
||||
&self,
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
peer_ids: Vec<ZNodeId>,
|
||||
peer_ids: Vec<NodeId>,
|
||||
) -> Result<()> {
|
||||
Ok(self
|
||||
.http_request(
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Write};
|
||||
use std::net::TcpStream;
|
||||
use std::num::NonZeroU64;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
@@ -11,6 +13,7 @@ use nix::errno::Errno;
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
|
||||
use pageserver::tenant_mgr::TenantInfo;
|
||||
use pageserver::timelines::TimelineInfo;
|
||||
use postgres::{Config, NoTls};
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
@@ -25,8 +28,7 @@ use utils::{
|
||||
};
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::{fill_rust_env_vars, read_pidfile};
|
||||
use pageserver::tenant_mgr::TenantInfo;
|
||||
use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum PageserverHttpError {
|
||||
@@ -37,6 +39,12 @@ pub enum PageserverHttpError {
|
||||
Response(String),
|
||||
}
|
||||
|
||||
impl From<anyhow::Error> for PageserverHttpError {
|
||||
fn from(e: anyhow::Error) -> Self {
|
||||
Self::Response(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
type Result<T> = result::Result<T, PageserverHttpError>;
|
||||
|
||||
pub trait ResponseErrorMessageExt: Sized {
|
||||
@@ -121,6 +129,16 @@ impl PageServerNode {
|
||||
);
|
||||
let listen_pg_addr_param =
|
||||
format!("listen_pg_addr='{}'", self.env.pageserver.listen_pg_addr);
|
||||
let broker_endpoints_param = format!(
|
||||
"broker_endpoints=[{}]",
|
||||
self.env
|
||||
.etcd_broker
|
||||
.broker_endpoints
|
||||
.iter()
|
||||
.map(|url| format!("'{url}'"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
let mut args = Vec::with_capacity(20);
|
||||
|
||||
args.push("--init");
|
||||
@@ -129,8 +147,19 @@ impl PageServerNode {
|
||||
args.extend(["-c", &authg_type_param]);
|
||||
args.extend(["-c", &listen_http_addr_param]);
|
||||
args.extend(["-c", &listen_pg_addr_param]);
|
||||
args.extend(["-c", &broker_endpoints_param]);
|
||||
args.extend(["-c", &id]);
|
||||
|
||||
let broker_etcd_prefix_param = self
|
||||
.env
|
||||
.etcd_broker
|
||||
.broker_etcd_prefix
|
||||
.as_ref()
|
||||
.map(|prefix| format!("broker_etcd_prefix='{prefix}'"));
|
||||
if let Some(broker_etcd_prefix_param) = broker_etcd_prefix_param.as_deref() {
|
||||
args.extend(["-c", broker_etcd_prefix_param]);
|
||||
}
|
||||
|
||||
for config_override in config_overrides {
|
||||
args.extend(["-c", config_override]);
|
||||
}
|
||||
@@ -260,12 +289,13 @@ impl PageServerNode {
|
||||
let pid = Pid::from_raw(read_pidfile(&pid_file)?);
|
||||
|
||||
let sig = if immediate {
|
||||
println!("Stop pageserver immediately");
|
||||
print!("Stopping pageserver immediately..");
|
||||
Signal::SIGQUIT
|
||||
} else {
|
||||
println!("Stop pageserver gracefully");
|
||||
print!("Stopping pageserver gracefully..");
|
||||
Signal::SIGTERM
|
||||
};
|
||||
io::stdout().flush().unwrap();
|
||||
match kill(pid, sig) {
|
||||
Ok(_) => (),
|
||||
Err(Errno::ESRCH) => {
|
||||
@@ -287,25 +317,36 @@ impl PageServerNode {
|
||||
// TODO Remove this "timeout" and handle it on caller side instead.
|
||||
// Shutting down may take a long time,
|
||||
// if pageserver checkpoints a lot of data
|
||||
let mut tcp_stopped = false;
|
||||
for _ in 0..100 {
|
||||
if let Err(_e) = TcpStream::connect(&address) {
|
||||
println!("Pageserver stopped receiving connections");
|
||||
|
||||
//Now check status
|
||||
match self.check_status() {
|
||||
Ok(_) => {
|
||||
println!("Pageserver status is OK. Wait a bit.");
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
Err(err) => {
|
||||
println!("Pageserver status is: {}", err);
|
||||
return Ok(());
|
||||
if !tcp_stopped {
|
||||
if let Err(err) = TcpStream::connect(&address) {
|
||||
tcp_stopped = true;
|
||||
if err.kind() != io::ErrorKind::ConnectionRefused {
|
||||
eprintln!("\nPageserver connection failed with error: {err}");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!("Pageserver still receives connections");
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
if tcp_stopped {
|
||||
// Also check status on the HTTP port
|
||||
|
||||
match self.check_status() {
|
||||
Err(PageserverHttpError::Transport(err)) if err.is_connect() => {
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("\nPageserver status check failed with error: {err}");
|
||||
return Ok(());
|
||||
}
|
||||
Ok(()) => {
|
||||
// keep waiting
|
||||
}
|
||||
}
|
||||
}
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
|
||||
bail!("Failed to stop pageserver with pid {}", pid);
|
||||
@@ -377,6 +418,15 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()?,
|
||||
pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
|
||||
walreceiver_connect_timeout: settings
|
||||
.get("walreceiver_connect_timeout")
|
||||
.map(|x| x.to_string()),
|
||||
lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
|
||||
max_lsn_wal_lag: settings
|
||||
.get("max_lsn_wal_lag")
|
||||
.map(|x| x.parse::<NonZeroU64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
@@ -400,22 +450,41 @@ impl PageServerNode {
|
||||
tenant_id,
|
||||
checkpoint_distance: settings
|
||||
.get("checkpoint_distance")
|
||||
.map(|x| x.parse::<u64>().unwrap()),
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'checkpoint_distance' as an integer")?,
|
||||
compaction_target_size: settings
|
||||
.get("compaction_target_size")
|
||||
.map(|x| x.parse::<u64>().unwrap()),
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_target_size' as an integer")?,
|
||||
compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
|
||||
compaction_threshold: settings
|
||||
.get("compaction_threshold")
|
||||
.map(|x| x.parse::<usize>().unwrap()),
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'compaction_threshold' as an integer")?,
|
||||
gc_horizon: settings
|
||||
.get("gc_horizon")
|
||||
.map(|x| x.parse::<u64>().unwrap()),
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'gc_horizon' as an integer")?,
|
||||
gc_period: settings.get("gc_period").map(|x| x.to_string()),
|
||||
image_creation_threshold: settings
|
||||
.get("image_creation_threshold")
|
||||
.map(|x| x.parse::<usize>().unwrap()),
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_creation_threshold' as non zero integer")?,
|
||||
pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
|
||||
walreceiver_connect_timeout: settings
|
||||
.get("walreceiver_connect_timeout")
|
||||
.map(|x| x.to_string()),
|
||||
lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
|
||||
max_lsn_wal_lag: settings
|
||||
.get("max_lsn_wal_lag")
|
||||
.map(|x| x.parse::<NonZeroU64>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?;
|
||||
@@ -459,13 +528,54 @@ impl PageServerNode {
|
||||
|
||||
Ok(timeline_info_response)
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
|
||||
if let Ok(value) = std::env::var(env_key) {
|
||||
cmd = cmd.env(env_key, value);
|
||||
/// Import a basebackup prepared using either:
|
||||
/// a) `pg_basebackup -F tar`, or
|
||||
/// b) The `fullbackup` pageserver endpoint
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `tenant_id` - tenant to import into. Created if not exists
|
||||
/// * `timeline_id` - id to assign to imported timeline
|
||||
/// * `base` - (start lsn of basebackup, path to `base.tar` file)
|
||||
/// * `pg_wal` - if there's any wal to import: (end lsn, path to `pg_wal.tar`)
|
||||
pub fn timeline_import(
|
||||
&self,
|
||||
tenant_id: ZTenantId,
|
||||
timeline_id: ZTimelineId,
|
||||
base: (Lsn, PathBuf),
|
||||
pg_wal: Option<(Lsn, PathBuf)>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut client = self.pg_connection_config.connect(NoTls).unwrap();
|
||||
|
||||
// Init base reader
|
||||
let (start_lsn, base_tarfile_path) = base;
|
||||
let base_tarfile = File::open(base_tarfile_path)?;
|
||||
let mut base_reader = BufReader::new(base_tarfile);
|
||||
|
||||
// Init wal reader if necessary
|
||||
let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {
|
||||
let wal_tarfile = File::open(wal_tarfile_path)?;
|
||||
let wal_reader = BufReader::new(wal_tarfile);
|
||||
(end_lsn, Some(wal_reader))
|
||||
} else {
|
||||
(start_lsn, None)
|
||||
};
|
||||
|
||||
// Import base
|
||||
let import_cmd =
|
||||
format!("import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
|
||||
let mut writer = client.copy_in(&import_cmd)?;
|
||||
io::copy(&mut base_reader, &mut writer)?;
|
||||
writer.finish()?;
|
||||
|
||||
// Import wal if necessary
|
||||
if let Some(mut wal_reader) = wal_reader {
|
||||
let import_cmd = format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
|
||||
let mut writer = client.copy_in(&import_cmd)?;
|
||||
io::copy(&mut wal_reader, &mut writer)?;
|
||||
writer.finish()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
cmd
|
||||
}
|
||||
|
||||
@@ -1,13 +1,20 @@
|
||||
#!/bin/sh
|
||||
set -eux
|
||||
|
||||
broker_endpoints_param="${BROKER_ENDPOINT:-absent}"
|
||||
if [ "$broker_endpoints_param" != "absent" ]; then
|
||||
broker_endpoints_param="-c broker_endpoints=['$broker_endpoints_param']"
|
||||
else
|
||||
broker_endpoints_param=''
|
||||
fi
|
||||
|
||||
if [ "$1" = 'pageserver' ]; then
|
||||
if [ ! -d "/data/tenants" ]; then
|
||||
echo "Initializing pageserver data directory"
|
||||
pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10"
|
||||
pageserver --init -D /data -c "pg_distrib_dir='/usr/local'" -c "id=10" $broker_endpoints_param
|
||||
fi
|
||||
echo "Staring pageserver at 0.0.0.0:6400"
|
||||
pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -D /data
|
||||
pageserver -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" $broker_endpoints_param -D /data
|
||||
else
|
||||
"$@"
|
||||
fi
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
- [docker.md](docker.md) — Docker images and building pipeline.
|
||||
- [glossary.md](glossary.md) — Glossary of all the terms used in codebase.
|
||||
- [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
|
||||
- [sourcetree.md](sourcetree.md) — Overview of the source tree layeout.
|
||||
- [sourcetree.md](sourcetree.md) — Overview of the source tree layout.
|
||||
- [pageserver/README.md](/pageserver/README.md) — pageserver overview.
|
||||
- [postgres_ffi/README.md](/libs/postgres_ffi/README.md) — Postgres FFI overview.
|
||||
- [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.
|
||||
|
||||
@@ -188,7 +188,7 @@ Not currently committed but proposed:
|
||||
3. Prefetching
|
||||
- Why?
|
||||
As far as pages in Zenith are loaded on demand, to reduce node startup time
|
||||
and also sppedup some massive queries we need some mechanism for bulk loading to
|
||||
and also speedup some massive queries we need some mechanism for bulk loading to
|
||||
reduce page request round-trip overhead.
|
||||
|
||||
Currently Postgres is supporting prefetching only for bitmap scan.
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
# Docker images of Zenith
|
||||
# Docker images of Neon
|
||||
|
||||
## Images
|
||||
|
||||
Currently we build two main images:
|
||||
|
||||
- [zenithdb/zenith](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [zenithdb/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [zenithdb/postgres](https://github.com/zenithdb/postgres).
|
||||
- [neondatabase/neon](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [neondatabase/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres).
|
||||
|
||||
And additional intermediate images:
|
||||
And additional intermediate image:
|
||||
|
||||
- [zenithdb/compute-tools](https://hub.docker.com/repository/docker/zenithdb/compute-tools) — compute node configuration management tools.
|
||||
- [neondatabase/compute-tools](https://hub.docker.com/repository/docker/neondatabase/compute-tools) — compute node configuration management tools.
|
||||
|
||||
## Building pipeline
|
||||
|
||||
1. Image `zenithdb/compute-tools` is re-built automatically.
|
||||
We build all images after a successful `release` tests run and push automatically to Docker Hub with two parallel CI jobs
|
||||
|
||||
2. Image `zenithdb/compute-node` is built independently in the [zenithdb/postgres](https://github.com/zenithdb/postgres) repo.
|
||||
1. `neondatabase/compute-tools` and `neondatabase/compute-node`
|
||||
|
||||
3. Image `zenithdb/zenith` is built in this repo after a successful `release` tests run and pushed to Docker Hub automatically.
|
||||
2. `neondatabase/neon`
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
### Authentication
|
||||
|
||||
### Backpresssure
|
||||
### Backpressure
|
||||
|
||||
Backpressure is used to limit the lag between pageserver and compute node or WAL service.
|
||||
|
||||
@@ -21,7 +21,7 @@ NOTE:It has nothing to do with PostgreSQL pg_basebackup.
|
||||
|
||||
### Branch
|
||||
|
||||
We can create branch at certain LSN using `zenith timeline branch` command.
|
||||
We can create branch at certain LSN using `neon_local timeline branch` command.
|
||||
Each Branch lives in a corresponding timeline[] and has an ancestor[].
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ The layer map tracks what layers exist in a timeline.
|
||||
|
||||
### Layered repository
|
||||
|
||||
Zenith repository implementation that keeps data in layers.
|
||||
Neon repository implementation that keeps data in layers.
|
||||
### LSN
|
||||
|
||||
The Log Sequence Number (LSN) is a unique identifier of the WAL record[] in the WAL log.
|
||||
@@ -101,7 +101,7 @@ It is printed as two hexadecimal numbers of up to 8 digits each, separated by a
|
||||
Check also [PostgreSQL doc about pg_lsn type](https://www.postgresql.org/docs/devel/datatype-pg-lsn.html)
|
||||
Values can be compared to calculate the volume of WAL data that separates them, so they are used to measure the progress of replication and recovery.
|
||||
|
||||
In postgres and Zenith lsns are used to describe certain points in WAL handling.
|
||||
In Postgres and Neon LSNs are used to describe certain points in WAL handling.
|
||||
|
||||
PostgreSQL LSNs and functions to monitor them:
|
||||
* `pg_current_wal_insert_lsn()` - Returns the current write-ahead log insert location.
|
||||
@@ -111,13 +111,13 @@ PostgreSQL LSNs and functions to monitor them:
|
||||
* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
|
||||
[source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):
|
||||
|
||||
Zenith safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
|
||||
Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
|
||||
* `CommitLSN`: position in WAL confirmed by quorum safekeepers.
|
||||
* `RestartLSN`: position in WAL confirmed by all safekeepers.
|
||||
* `FlushLSN`: part of WAL persisted to the disk by safekeeper.
|
||||
* `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.
|
||||
* `VCL`: the largest LSN for which we can guarantee availability of all prior records.
|
||||
|
||||
Zenith pageserver LSNs:
|
||||
Neon pageserver LSNs:
|
||||
* `last_record_lsn` - the end of last processed WAL record.
|
||||
* `disk_consistent_lsn` - data is known to be fully flushed and fsync'd to local disk on pageserver up to this LSN.
|
||||
* `remote_consistent_lsn` - The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash.
|
||||
@@ -132,7 +132,7 @@ This is the unit of data exchange between compute node and pageserver.
|
||||
|
||||
### Pageserver
|
||||
|
||||
Zenith storage engine: repositories + wal receiver + page service + wal redo.
|
||||
Neon storage engine: repositories + wal receiver + page service + wal redo.
|
||||
|
||||
### Page service
|
||||
|
||||
@@ -184,10 +184,10 @@ relation exceeds that size, it is split into multiple segments.
|
||||
SLRUs include pg_clog, pg_multixact/members, and
|
||||
pg_multixact/offsets. There are other SLRUs in PostgreSQL, but
|
||||
they don't need to be stored permanently (e.g. pg_subtrans),
|
||||
or we do not support them in zenith yet (pg_commit_ts).
|
||||
or we do not support them in neon yet (pg_commit_ts).
|
||||
|
||||
### Tenant (Multitenancy)
|
||||
Tenant represents a single customer, interacting with Zenith.
|
||||
Tenant represents a single customer, interacting with Neon.
|
||||
Wal redo[] activity, timelines[], layers[] are managed for each tenant independently.
|
||||
One pageserver[] can serve multiple tenants at once.
|
||||
One safekeeper
|
||||
|
||||
@@ -6,7 +6,7 @@ Zenith supports multitenancy. One pageserver can serve multiple tenants at once.
|
||||
|
||||
### Tenants in other commands
|
||||
|
||||
By default during `zenith init` new tenant is created on the pageserver. Newly created tenant's id is saved to cli config, so other commands can use it automatically if no direct arugment `--tenantid=<tenantid>` is provided. So generally tenantid more frequently appears in internal pageserver interface. Its commands take tenantid argument to distinguish to which tenant operation should be applied. CLI support creation of new tenants.
|
||||
By default during `zenith init` new tenant is created on the pageserver. Newly created tenant's id is saved to cli config, so other commands can use it automatically if no direct argument `--tenantid=<tenantid>` is provided. So generally tenantid more frequently appears in internal pageserver interface. Its commands take tenantid argument to distinguish to which tenant operation should be applied. CLI support creation of new tenants.
|
||||
|
||||
Examples for cli:
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ Upon storage node restart recent WAL files are applied to appropriate pages and
|
||||
|
||||
### **Checkpointing**
|
||||
|
||||
No such mechanism is needed. Or we may look at the storage node as at kind of continuous chekpointer.
|
||||
No such mechanism is needed. Or we may look at the storage node as at kind of continuous checkpointer.
|
||||
|
||||
### **Full page writes (torn page protection)**
|
||||
|
||||
@@ -111,13 +111,13 @@ Since we are storing page diffs of variable sizes there is no structural depende
|
||||
|
||||
### **Chunk metadata**
|
||||
|
||||
Chunk metadata is a file lies in chunk directory that stores info about current snapshots and PITR regions. Chunck should always consult this data when merging SSTables and applying delete markers.
|
||||
Chunk metadata is a file lies in chunk directory that stores info about current snapshots and PITR regions. Chunk should always consult this data when merging SSTables and applying delete markers.
|
||||
|
||||
### **Chunk splitting**
|
||||
|
||||
*(NB: following paragraph is about how to avoid page splitting)*
|
||||
|
||||
When chunks hits some soft storage limit (let's say 100Gb) it should be split in half and global matadata about chunk boundaries should be updated. Here i assume that chunk split is a local operation happening on single node. Process of chink splitting should look like following:
|
||||
When chunks hits some soft storage limit (let's say 100Gb) it should be split in half and global metadata about chunk boundaries should be updated. Here i assume that chunk split is a local operation happening on single node. Process of chink splitting should look like following:
|
||||
|
||||
1. Find separation key and spawn two new chunks with [lo, mid) [mid, hi) boundaries.
|
||||
|
||||
@@ -166,7 +166,7 @@ Multi-tenant storage makes sense even on a laptop, when you work with different
|
||||
|
||||
Few databases are stored in one chunk, replicated three times
|
||||
|
||||
- When database can't fit into one storage node it can occupy lots of chunks that were split while database was growing. Chunk placement on nodes is controlled by us with some automatization, but we alway may manually move chunks around the cluster.
|
||||
- When database can't fit into one storage node it can occupy lots of chunks that were split while database was growing. Chunk placement on nodes is controlled by us with some automatization, but we always may manually move chunks around the cluster.
|
||||
|
||||
<img width="940" alt="Screenshot_2021-02-22_at_16 49 10" src="https://user-images.githubusercontent.com/284219/108729815-fb071e00-753b-11eb-86e0-be6703e47d82.png">
|
||||
|
||||
|
||||
@@ -123,7 +123,7 @@ Show currently attached storages. For example:
|
||||
> zenith storage list
|
||||
NAME USED TYPE OPTIONS PATH
|
||||
local 5.1G zenith-local /opt/zenith/store/local
|
||||
local.compr 20.4G zenith-local comression=on /opt/zenith/store/local.compr
|
||||
local.compr 20.4G zenith-local compression=on /opt/zenith/store/local.compr
|
||||
zcloud 60G zenith-remote zenith.tech/stas/mystore
|
||||
s3tank 80G S3
|
||||
```
|
||||
@@ -136,9 +136,9 @@ s3tank 80G S3
|
||||
|
||||
## pg
|
||||
|
||||
Manages postgres data directories and can start postgreses with proper configuration. An experienced user may avoid using that (except pg create) and configure/run postgres by themself.
|
||||
Manages postgres data directories and can start postgres instances with proper configuration. An experienced user may avoid using that (except pg create) and configure/run postgres by themselves.
|
||||
|
||||
Pg is a term for a single postgres running on some data. I'm trying to avoid here separation of datadir management and postgres instance management -- both that concepts bundled here together.
|
||||
Pg is a term for a single postgres running on some data. I'm trying to avoid separation of datadir management and postgres instance management -- both that concepts bundled here together.
|
||||
|
||||
**zenith pg create** [--no-start --snapshot --cow] -s storage-name -n pgdata
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ In addition to the WAL safekeeper nodes, the WAL is archived in
|
||||
S3. WAL that has been archived to S3 can be removed from the
|
||||
safekeepers, so the safekeepers don't need a lot of disk space.
|
||||
|
||||
|
||||
```
|
||||
+----------------+
|
||||
+-----> | WAL safekeeper |
|
||||
| +----------------+
|
||||
@@ -42,23 +42,23 @@ safekeepers, so the safekeepers don't need a lot of disk space.
|
||||
\
|
||||
\
|
||||
\
|
||||
\ +--------+
|
||||
\ | |
|
||||
+--> | S3 |
|
||||
| |
|
||||
+--------+
|
||||
|
||||
\ +--------+
|
||||
\ | |
|
||||
+------> | S3 |
|
||||
| |
|
||||
+--------+
|
||||
|
||||
```
|
||||
Every WAL safekeeper holds a section of WAL, and a VCL value.
|
||||
The WAL can be divided into three portions:
|
||||
|
||||
|
||||
```
|
||||
VCL LSN
|
||||
| |
|
||||
V V
|
||||
.................ccccccccccccccccccccXXXXXXXXXXXXXXXXXXXXXXX
|
||||
Archived WAL Completed WAL In-flight WAL
|
||||
|
||||
```
|
||||
|
||||
Note that all this WAL kept in a safekeeper is a contiguous section.
|
||||
This is different from Aurora: In Aurora, there can be holes in the
|
||||
|
||||
@@ -31,7 +31,7 @@ Ideally, just one binary that incorporates all elements we need.
|
||||
|
||||
#### Components:
|
||||
|
||||
- **zenith-CLI** - interface for end-users. Turns commands to REST requests and handles responces to show them in a user-friendly way.
|
||||
- **zenith-CLI** - interface for end-users. Turns commands to REST requests and handles responses to show them in a user-friendly way.
|
||||
CLI proposal is here https://github.com/libzenith/rfcs/blob/003-laptop-cli.md/003-laptop-cli.md
|
||||
WIP code is here: https://github.com/libzenith/postgres/tree/main/pageserver/src/bin/cli
|
||||
|
||||
|
||||
@@ -25,9 +25,9 @@ To make changes in the catalog you need to run compute nodes
|
||||
zenith start /home/pipedpiper/northwind:main -- starts a compute instance
|
||||
zenith start zenith://zenith.tech/northwind:main -- starts a compute instance in the cloud
|
||||
-- you can start a compute node against any hash or branch
|
||||
zenith start /home/pipedpiper/northwind:experimental --port 8008 -- start anothe compute instance (on different port)
|
||||
zenith start /home/pipedpiper/northwind:experimental --port 8008 -- start another compute instance (on different port)
|
||||
-- you can start a compute node against any hash or branch
|
||||
zenith start /home/pipedpiper/northwind:<hash> --port 8009 -- start anothe compute instance (on different port)
|
||||
zenith start /home/pipedpiper/northwind:<hash> --port 8009 -- start another compute instance (on different port)
|
||||
|
||||
-- After running some DML you can run
|
||||
-- zenith status and see how there are two WAL streams one on top of
|
||||
|
||||
@@ -121,7 +121,7 @@ repository, launch an instance on the same branch in both clones, and
|
||||
later try to push/pull between them? Perhaps create a new timeline
|
||||
every time you start up an instance? Then you would detect that the
|
||||
timelines have diverged. That would match with the "epoch" concept
|
||||
that we have in the WAL safekeepr
|
||||
that we have in the WAL safekeeper
|
||||
|
||||
### zenith checkout/commit
|
||||
|
||||
|
||||
@@ -2,9 +2,9 @@ While working on export/import commands, I understood that they fit really well
|
||||
|
||||
We may think about backups as snapshots in a different format (i.e plain pgdata format, basebackup tar format, WAL-G format (if they want to support it) and so on). They use same storage API, the only difference is the code that packs/unpacks files.
|
||||
|
||||
Even if zenith aims to maintains durability using it's own snapshots, backups will be useful for uploading data from postges to zenith.
|
||||
Even if zenith aims to maintains durability using it's own snapshots, backups will be useful for uploading data from postgres to zenith.
|
||||
|
||||
So here is an attemt to design consistent CLI for diferent usage scenarios:
|
||||
So here is an attempt to design consistent CLI for different usage scenarios:
|
||||
|
||||
#### 1. Start empty pageserver.
|
||||
That is what we have now.
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
GetPage@LSN can be called with older LSNs, and the page server needs
|
||||
to be able to reconstruct older page versions. That's needed for
|
||||
having read-only replicas that lag behind the primary, or that are
|
||||
"anchored" at an older LSN, and internally in the page server whne you
|
||||
"anchored" at an older LSN, and internally in the page server when you
|
||||
branch at an older point in time. How do you do that?
|
||||
|
||||
For now, I'm not considering incremental snapshots at all. I don't
|
||||
@@ -192,7 +192,7 @@ for a particular relation readily available alongside the snapshot
|
||||
files, and you don't need to track what snapshot LSNs exist
|
||||
separately.
|
||||
|
||||
(If we wanted to minize the number of files, you could include the
|
||||
(If we wanted to minimize the number of files, you could include the
|
||||
snapshot @300 and the WAL between 200 and 300 in the same file, but I
|
||||
feel it's probably better to keep them separate)
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ The properties of s3 that we depend on are:
|
||||
list objects
|
||||
streaming read of entire object
|
||||
read byte range from object
|
||||
streaming write new object (may use multipart upload for better relialibity)
|
||||
streaming write new object (may use multipart upload for better reliability)
|
||||
delete object (that should not disrupt an already-started read).
|
||||
|
||||
Uploaded files, restored backups, or s3 buckets controlled by users could contain malicious content. We should always validate that objects contain the content they’re supposed to. Incorrect, Corrupt or malicious-looking contents should cause software (cloud tools, pageserver) to fail gracefully.
|
||||
|
||||
@@ -40,7 +40,7 @@ b) overwrite older pages with the newer pages -- if there is no replica we proba
|
||||
|
||||
I imagine that newly created pages would just be added to the back of PageStore (again in queue-like fashion) and this way there wouldn't be any meaningful ordering inside of that queue. When we are forming a new incremental snapshot we may prohibit any updates to the current set of pages in PageStore (giving up on single page version rule) and cut off that whole set when snapshot creation is complete.
|
||||
|
||||
With option b) we can also treat PageStor as an uncompleted increamental snapshot.
|
||||
With option b) we can also treat PageStor as an uncompleted incremental snapshot.
|
||||
|
||||
### LocalStore
|
||||
|
||||
@@ -123,7 +123,7 @@ As far as I understand Bookfile/Aversion addresses versioning and serialization
|
||||
As for exact data that should go to snapshots I think it is the following for each snapshot:
|
||||
|
||||
* format version number
|
||||
* set of key/values to interpret content (e.g. is page compression enabled, is that a full or incremental snapshot, previous snapshot id, is there WAL at the end on file, etc) -- it is up to a reader to decide what to do if some keys are missing or some unknow key are present. If we add something backward compatible to the file we can keep the version number.
|
||||
* set of key/values to interpret content (e.g. is page compression enabled, is that a full or incremental snapshot, previous snapshot id, is there WAL at the end on file, etc) -- it is up to a reader to decide what to do if some keys are missing or some unknown key are present. If we add something backward compatible to the file we can keep the version number.
|
||||
* array of [BuffTag, corresponding offset in file] for pages -- IIUC that is analogous to ToC in Bookfile
|
||||
* array of [(BuffTag, LSN), corresponding offset in file] for the WAL records
|
||||
* pages, one by one
|
||||
@@ -131,7 +131,7 @@ As for exact data that should go to snapshots I think it is the following for ea
|
||||
|
||||
It is also important to be able to load metadata quickly since it would be one of the main factors impacting the time of page server start. E.g. if would store/cache about 10TB of data per page server, the size of uncompressed page references would be about 30GB (10TB / ( 8192 bytes page size / ( ~18 bytes per ObjectTag + 8 bytes offset in the file))).
|
||||
|
||||
1) Since our ToC/array of entries can be sorted by ObjectTag we can store the whole BufferTag only when realtion_id is changed and store only delta-encoded offsets for a given relation. That would reduce the average per-page metadata size to something less than 4 bytes instead of 26 (assuming that pages would follow the same order and offset delatas would be small).
|
||||
1) Since our ToC/array of entries can be sorted by ObjectTag we can store the whole BufferTag only when relation_id is changed and store only delta-encoded offsets for a given relation. That would reduce the average per-page metadata size to something less than 4 bytes instead of 26 (assuming that pages would follow the same order and offset deltas would be small).
|
||||
2) It makes sense to keep ToC at the beginning of the file to avoid extra seeks to locate it. Doesn't matter too much with the local files but matters on S3 -- if we are accessing a lot of ~1Gb files with the size of metadata ~ 1Mb then the time to transfer this metadata would be comparable with access latency itself (which is about a half of a second). So by slurping metadata with one read of file header instead of N reads we can improve the speed of page server start by this N factor.
|
||||
|
||||
I think both of that optimizations can be done later, but that is something to keep in mind when we are designing our storage serialization routines.
|
||||
|
||||
@@ -7,13 +7,13 @@ and e.g. prevents electing two proposers with the same term -- it is actually
|
||||
called `term` in the code. The second, called `epoch`, reflects progress of log
|
||||
receival and this might lag behind `term`; safekeeper switches to epoch `n` when
|
||||
it has received all committed log records from all `< n` terms. This roughly
|
||||
correspones to proposed in
|
||||
corresponds to proposed in
|
||||
|
||||
https://github.com/zenithdb/rfcs/pull/3/files
|
||||
|
||||
|
||||
This makes our biggest our difference from Raft. In Raft, every log record is
|
||||
stamped with term in which it was generated; while we essentialy store in
|
||||
stamped with term in which it was generated; while we essentially store in
|
||||
`epoch` only the term of the highest record on this safekeeper -- when we know
|
||||
it -- because during recovery generally we don't, and `epoch` is bumped directly
|
||||
to the term of the proposer who performs the recovery when it is finished. It is
|
||||
|
||||
@@ -124,7 +124,7 @@ Each storage node can subscribe to the relevant sets of keys and maintain a loca
|
||||
|
||||
### Safekeeper address discovery
|
||||
|
||||
During the startup safekeeper should publish the address he is listening on as the part of `{"sk_#{sk_id}" => ip_address}`. Then the pageserver can resolve `sk_#{sk_id}` to the actual address. This way it would work both locally and in the cloud setup. Safekeeper should have `--advertised-address` CLI option so that we can listen on e.g. 0.0.0.0 but advertize something more useful.
|
||||
During the startup safekeeper should publish the address he is listening on as the part of `{"sk_#{sk_id}" => ip_address}`. Then the pageserver can resolve `sk_#{sk_id}` to the actual address. This way it would work both locally and in the cloud setup. Safekeeper should have `--advertised-address` CLI option so that we can listen on e.g. 0.0.0.0 but advertise something more useful.
|
||||
|
||||
### Safekeeper behavior
|
||||
|
||||
@@ -195,7 +195,7 @@ sequenceDiagram
|
||||
PS1->>SK1: start replication
|
||||
```
|
||||
|
||||
#### Behavour of services during typical operations
|
||||
#### Behaviour of services during typical operations
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
@@ -250,7 +250,7 @@ sequenceDiagram
|
||||
PS2->>M: Register downloaded timeline
|
||||
PS2->>M: Get safekeepers for timeline, subscribe to changes
|
||||
PS2->>SK1: Start replication to catch up
|
||||
note over O: PS2 catched up, time to switch compute
|
||||
note over O: PS2 caught up, time to switch compute
|
||||
O->>C: Restart compute with new pageserver url in config
|
||||
note over C: Wal push is restarted
|
||||
loop request pages
|
||||
|
||||
@@ -49,7 +49,7 @@ topics.
|
||||
|
||||
RFC lifecycle:
|
||||
|
||||
- Should be submitted in a pull request with and full RFC text in a commited markdown file and copy of the Summary and Motivation sections also included in the PR body.
|
||||
- Should be submitted in a pull request with and full RFC text in a committed markdown file and copy of the Summary and Motivation sections also included in the PR body.
|
||||
- RFC should be published for review before most of the actual code is written. This isn’t a strict rule, don’t hesitate to experiment and build a POC in parallel with writing an RFC.
|
||||
- Add labels to the PR in the same manner as you do Issues. Example TBD
|
||||
- Request the review from your peers. Reviewing the RFCs from your peers is a priority, same as reviewing the actual code.
|
||||
|
||||
@@ -22,8 +22,8 @@ so we don't want to give users access to the functionality that we don't think i
|
||||
|
||||
* pageserver - calculate the size consumed by a timeline and add it to the feedback message.
|
||||
* safekeeper - pass feedback message from pageserver to compute.
|
||||
* compute - receive feedback message, enforce size limit based on GUC `zenith.max_cluster_size`.
|
||||
* console - set and update `zenith.max_cluster_size` setting
|
||||
* compute - receive feedback message, enforce size limit based on GUC `neon.max_cluster_size`.
|
||||
* console - set and update `neon.max_cluster_size` setting
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
@@ -36,12 +36,12 @@ This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
|
||||
Alternatively, we could count only relation data. As in pg_database_size().
|
||||
This approach is somewhat more user-friendly because it is the data that is really affected by the user.
|
||||
On the other hand, it puts us in a weaker position than other services, i.e., RDS.
|
||||
We will need to refactor the timeline_size counter or add another counter to implement it.
|
||||
We will need to refactor the timeline_size counter or add another counter to implement it.
|
||||
|
||||
Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
|
||||
Then this size should be reported to compute node.
|
||||
|
||||
`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
|
||||
`current_timeline_size` value is included in the walreceiver's custom feedback message: `ReplicationFeedback.`
|
||||
|
||||
(PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).
|
||||
|
||||
@@ -49,7 +49,7 @@ This message is received by the safekeeper and propagated to compute node as a p
|
||||
|
||||
Finally, when compute node receives the `current_timeline_size` from safekeeper (or from pageserver directly), it updates the global variable.
|
||||
|
||||
And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > zenith.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
|
||||
And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > neon.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
|
||||
(see Postgres error codes [https://www.postgresql.org/docs/devel/errcodes-appendix.html](https://www.postgresql.org/docs/devel/errcodes-appendix.html))
|
||||
|
||||
TODO:
|
||||
@@ -64,16 +64,16 @@ We should warn users if the limit is soon to be reached.
|
||||
### **Reliability, failure modes and corner cases**
|
||||
|
||||
1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
|
||||
|
||||
|
||||
If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
|
||||
|
||||
|
||||
So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
|
||||
|
||||
|
||||
Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.
|
||||
|
||||
|
||||
### **Security implications**
|
||||
|
||||
We treat compute as an untrusted component. That's why we try to isolate it with secure container runtime or a VM.
|
||||
Malicious users may change the `zenith.max_cluster_size`, so we need an extra size limit check.
|
||||
Malicious users may change the `neon.max_cluster_size`, so we need an extra size limit check.
|
||||
To cover this case, we also monitor the compute node size in the console.
|
||||
|
||||
@@ -23,18 +23,22 @@ gc_horizon = '67108864'
|
||||
max_file_descriptors = '100'
|
||||
|
||||
# initial superuser role name to use when creating a new tenant
|
||||
initial_superuser_name = 'zenith_admin'
|
||||
initial_superuser_name = 'cloud_admin'
|
||||
|
||||
broker_etcd_prefix = 'neon'
|
||||
broker_endpoints = ['some://etcd']
|
||||
|
||||
# [remote_storage]
|
||||
```
|
||||
|
||||
The config above shows default values for all basic pageserver settings.
|
||||
The config above shows default values for all basic pageserver settings, besides `broker_endpoints`: that one has to be set by the user,
|
||||
see the corresponding section below.
|
||||
Pageserver uses default values for all files that are missing in the config, so it's not a hard error to leave the config blank.
|
||||
Yet, it validates the config values it can (e.g. postgres install dir) and errors if the validation fails, refusing to start.
|
||||
|
||||
Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#table) in TOML specification and
|
||||
|
||||
- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'zenith_admin'`
|
||||
- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'cloud_admin'`
|
||||
|
||||
- or can be placed anywhere if rewritten in identical form as [inline table](https://toml.io/en/v1.0.0#inline-table): `remote_storage = {foo = 2}`
|
||||
|
||||
@@ -46,6 +50,17 @@ Example: `${PAGESERVER_BIN} -c "checkpoint_period = '100 s'" -c "remote_storage=
|
||||
|
||||
Note that TOML distinguishes between strings and integers, the former require single or double quotes around them.
|
||||
|
||||
#### broker_endpoints
|
||||
|
||||
A list of endpoints (etcd currently) to connect and pull the information from.
|
||||
Mandatory, does not have a default, since requires etcd to be started as a separate process,
|
||||
and its connection url should be specified separately.
|
||||
|
||||
#### broker_etcd_prefix
|
||||
|
||||
A prefix to add for every etcd key used, to separate one group of related instances from another, in the same cluster.
|
||||
Default is `neon`.
|
||||
|
||||
#### checkpoint_distance
|
||||
|
||||
`checkpoint_distance` is the amount of incoming WAL that is held in
|
||||
@@ -90,17 +105,31 @@ Interval at which garbage collection is triggered. Default is 100 s.
|
||||
|
||||
#### image_creation_threshold
|
||||
|
||||
L0 delta layer threshold for L1 iamge layer creation. Default is 3.
|
||||
L0 delta layer threshold for L1 image layer creation. Default is 3.
|
||||
|
||||
#### pitr_interval
|
||||
|
||||
WAL retention duration for PITR branching. Default is 30 days.
|
||||
|
||||
#### walreceiver_connect_timeout
|
||||
|
||||
Time to wait to establish the wal receiver connection before failing
|
||||
|
||||
#### lagging_wal_timeout
|
||||
|
||||
Time the pageserver did not get any WAL updates from safekeeper (if any).
|
||||
Avoids lagging pageserver preemptively by forcing to switch it from stalled connections.
|
||||
|
||||
#### max_lsn_wal_lag
|
||||
|
||||
Difference between Lsn values of the latest available WAL on safekeepers: if currently connected safekeeper starts to lag too long and too much,
|
||||
it gets swapped to the different one.
|
||||
|
||||
#### initial_superuser_name
|
||||
|
||||
Name of the initial superuser role, passed to initdb when a new tenant
|
||||
is initialized. It doesn't affect anything after initialization. The
|
||||
default is Note: The default is 'zenith_admin', and the console
|
||||
default is Note: The default is 'cloud_admin', and the console
|
||||
depends on that, so if you change it, bad things will happen.
|
||||
|
||||
#### page_cache_size
|
||||
@@ -125,7 +154,7 @@ The default distrib dir is `./tmp_install/`.
|
||||
#### workdir (-D)
|
||||
|
||||
A directory in the file system, where pageserver will store its files.
|
||||
The default is `./.zenith/`.
|
||||
The default is `./.neon/`.
|
||||
|
||||
This parameter has a special CLI alias (`-D`) and can not be overridden with regular `-c` way.
|
||||
|
||||
@@ -170,7 +199,7 @@ If no IAM bucket access is used during the remote storage usage, use the `AWS_AC
|
||||
|
||||
###### General remote storage configuration
|
||||
|
||||
Pagesever allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.
|
||||
Pageserver allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.
|
||||
No default values are used for the remote storage configuration parameters.
|
||||
|
||||
Besides, there are parameters common for all types of remote storage that can be configured, those have defaults:
|
||||
|
||||
@@ -10,7 +10,7 @@ Intended to be used in integration tests and in CLI tools for local installation
|
||||
|
||||
`/docs`:
|
||||
|
||||
Documentaion of the Zenith features and concepts.
|
||||
Documentation of the Zenith features and concepts.
|
||||
Now it is mostly dev documentation.
|
||||
|
||||
`/monitoring`:
|
||||
@@ -42,13 +42,13 @@ Integration tests, written in Python using the `pytest` framework.
|
||||
|
||||
`/vendor/postgres`:
|
||||
|
||||
PostgreSQL source tree, with the modifications needed for Zenith.
|
||||
PostgreSQL source tree, with the modifications needed for Neon.
|
||||
|
||||
`/vendor/postgres/contrib/zenith`:
|
||||
`/vendor/postgres/contrib/neon`:
|
||||
|
||||
PostgreSQL extension that implements storage manager API and network communications with remote page server.
|
||||
|
||||
`/vendor/postgres/contrib/zenith_test_utils`:
|
||||
`/vendor/postgres/contrib/neon_test_utils`:
|
||||
|
||||
PostgreSQL extension that contains functions needed for testing and debugging.
|
||||
|
||||
@@ -91,18 +91,22 @@ so manual installation of dependencies is not recommended.
|
||||
A single virtual environment with all dependencies is described in the single `Pipfile`.
|
||||
|
||||
### Prerequisites
|
||||
- Install Python 3.7 (the minimal supported version) or greater.
|
||||
- Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesnt work as expected.
|
||||
- If you have some trouble with other version you can resolve it by installing Python 3.7 separately, via pyenv or via system package manager e.g.:
|
||||
- Install Python 3.9 (the minimal supported version) or greater.
|
||||
- Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesn't work as expected.
|
||||
- If you have some trouble with other version you can resolve it by installing Python 3.9 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
|
||||
```bash
|
||||
# In Ubuntu
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||
sudo apt update
|
||||
sudo apt install python3.7
|
||||
sudo apt install python3.9
|
||||
```
|
||||
- Install `poetry`
|
||||
- Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation)`.
|
||||
- Install dependencies via `./scripts/pysync`. Note that CI uses Python 3.7 so if you have different version some linting tools can yield different result locally vs in the CI.
|
||||
- Install dependencies via `./scripts/pysync`.
|
||||
- Note that CI uses specific Python version (look for `PYTHON_VERSION` [here](https://github.com/neondatabase/docker-images/blob/main/rust/Dockerfile))
|
||||
so if you have different version some linting tools can yield different result locally vs in the CI.
|
||||
- You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.9`.
|
||||
This may also disable the `The currently activated Python version X.Y.Z is not supported by the project` warning.
|
||||
|
||||
Run `poetry shell` to activate the virtual environment.
|
||||
Alternatively, use `poetry run` to run a single command in the venv, e.g. `poetry run pytest`.
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_with = "1.12.0"
|
||||
once_cell = "1.8.0"
|
||||
|
||||
utils = { path = "../utils" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
@@ -1,335 +1,209 @@
|
||||
//! A set of primitives to access a shared data/updates, propagated via etcd broker (not persistent).
|
||||
//! Intended to connect services to each other, not to store their data.
|
||||
use std::{
|
||||
collections::{hash_map, HashMap},
|
||||
fmt::Display,
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use regex::{Captures, Regex};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
/// All broker keys, that are used when dealing with etcd.
|
||||
pub mod subscription_key;
|
||||
/// All broker values, possible to use when dealing with etcd.
|
||||
pub mod subscription_value;
|
||||
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
use subscription_key::SubscriptionKey;
|
||||
use tokio::{sync::mpsc, task::JoinHandle};
|
||||
use tracing::*;
|
||||
|
||||
use crate::subscription_key::SubscriptionFullKey;
|
||||
|
||||
pub use etcd_client::*;
|
||||
|
||||
use tokio::{sync::mpsc, task::JoinHandle};
|
||||
use tracing::*;
|
||||
use utils::{
|
||||
lsn::Lsn,
|
||||
zid::{ZNodeId, ZTenantId, ZTenantTimelineId},
|
||||
};
|
||||
/// Default value to use for prefixing to all etcd keys with.
|
||||
/// This way allows isolating safekeeper/pageserver groups in the same etcd cluster.
|
||||
pub const DEFAULT_NEON_BROKER_ETCD_PREFIX: &str = "neon";
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
struct SafekeeperTimeline {
|
||||
safekeeper_id: ZNodeId,
|
||||
info: SkTimelineInfo,
|
||||
/// A way to control the data retrieval from a certain subscription.
|
||||
pub struct BrokerSubscription<V> {
|
||||
/// An unbounded channel to fetch the relevant etcd updates from.
|
||||
pub value_updates: mpsc::UnboundedReceiver<BrokerUpdate<V>>,
|
||||
key: SubscriptionKey,
|
||||
/// A subscription task handle, to allow waiting on it for the task to complete.
|
||||
/// Both the updates channel and the handle require `&mut`, so it's better to keep
|
||||
/// both `pub` to allow using both in the same structures without borrow checker complaining.
|
||||
pub watcher_handle: JoinHandle<Result<(), BrokerError>>,
|
||||
watcher: Watcher,
|
||||
}
|
||||
|
||||
/// Published data about safekeeper's timeline. Fields made optional for easy migrations.
|
||||
#[serde_as]
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct SkTimelineInfo {
|
||||
/// Term of the last entry.
|
||||
pub last_log_term: Option<u64>,
|
||||
/// LSN of the last record.
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub flush_lsn: Option<Lsn>,
|
||||
/// Up to which LSN safekeeper regards its WAL as committed.
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub commit_lsn: Option<Lsn>,
|
||||
/// LSN up to which safekeeper offloaded WAL to s3.
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub s3_wal_lsn: Option<Lsn>,
|
||||
/// LSN of last checkpoint uploaded by pageserver.
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub remote_consistent_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub peer_horizon_lsn: Option<Lsn>,
|
||||
#[serde(default)]
|
||||
pub wal_stream_connection_string: Option<String>,
|
||||
impl<V> BrokerSubscription<V> {
|
||||
/// Cancels the subscription, stopping the data poller and waiting for it to shut down.
|
||||
pub async fn cancel(mut self) -> Result<(), BrokerError> {
|
||||
self.watcher.cancel().await.map_err(|e| {
|
||||
BrokerError::EtcdClient(
|
||||
e,
|
||||
format!("Failed to cancel broker subscription, kind: {:?}", self.key),
|
||||
)
|
||||
})?;
|
||||
match (&mut self.watcher_handle).await {
|
||||
Ok(res) => res,
|
||||
Err(e) => {
|
||||
if e.is_cancelled() {
|
||||
// don't error on the tasks that are cancelled already
|
||||
Ok(())
|
||||
} else {
|
||||
Err(BrokerError::InternalError(format!(
|
||||
"Panicked during broker subscription task, kind: {:?}, error: {e}",
|
||||
self.key
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<V> Drop for BrokerSubscription<V> {
|
||||
fn drop(&mut self) {
|
||||
// we poll data from etcd into the channel in the same struct, so if the whole struct gets dropped,
|
||||
// no more data is used by the receiver and it's safe to cancel and drop the whole etcd subscription task.
|
||||
self.watcher_handle.abort();
|
||||
}
|
||||
}
|
||||
|
||||
/// An update from the etcd broker.
|
||||
pub struct BrokerUpdate<V> {
|
||||
/// Etcd generation version, the bigger the more actual the data is.
|
||||
pub etcd_version: i64,
|
||||
/// Etcd key for the corresponding value, parsed from the broker KV.
|
||||
pub key: SubscriptionFullKey,
|
||||
/// Current etcd value, parsed from the broker KV.
|
||||
pub value: V,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum BrokerError {
|
||||
#[error("Etcd client error: {0}. Context: {1}")]
|
||||
EtcdClient(etcd_client::Error, String),
|
||||
#[error("Error during parsing etcd data: {0}")]
|
||||
ParsingError(String),
|
||||
#[error("Error during parsing etcd key: {0}")]
|
||||
KeyNotParsed(String),
|
||||
#[error("Internal error: {0}")]
|
||||
InternalError(String),
|
||||
}
|
||||
|
||||
/// A way to control the data retrieval from a certain subscription.
|
||||
pub struct SkTimelineSubscription {
|
||||
safekeeper_timeline_updates:
|
||||
mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>>>,
|
||||
kind: SkTimelineSubscriptionKind,
|
||||
watcher_handle: JoinHandle<Result<(), BrokerError>>,
|
||||
watcher: Watcher,
|
||||
}
|
||||
|
||||
impl SkTimelineSubscription {
|
||||
/// Asynchronously polls for more data from the subscription, suspending the current future if there's no data sent yet.
|
||||
pub async fn fetch_data(
|
||||
&mut self,
|
||||
) -> Option<HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>>> {
|
||||
self.safekeeper_timeline_updates.recv().await
|
||||
}
|
||||
|
||||
/// Cancels the subscription, stopping the data poller and waiting for it to shut down.
|
||||
pub async fn cancel(mut self) -> Result<(), BrokerError> {
|
||||
self.watcher.cancel().await.map_err(|e| {
|
||||
BrokerError::EtcdClient(
|
||||
e,
|
||||
format!(
|
||||
"Failed to cancel timeline subscription, kind: {:?}",
|
||||
self.kind
|
||||
),
|
||||
)
|
||||
})?;
|
||||
self.watcher_handle.await.map_err(|e| {
|
||||
BrokerError::InternalError(format!(
|
||||
"Failed to join the timeline updates task, kind: {:?}, error: {e}",
|
||||
self.kind
|
||||
))
|
||||
})?
|
||||
}
|
||||
}
|
||||
|
||||
/// The subscription kind to the timeline updates from safekeeper.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct SkTimelineSubscriptionKind {
|
||||
broker_prefix: String,
|
||||
kind: SubscriptionKind,
|
||||
}
|
||||
|
||||
impl SkTimelineSubscriptionKind {
|
||||
pub fn all(broker_prefix: String) -> Self {
|
||||
Self {
|
||||
broker_prefix,
|
||||
kind: SubscriptionKind::All,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tenant(broker_prefix: String, tenant: ZTenantId) -> Self {
|
||||
Self {
|
||||
broker_prefix,
|
||||
kind: SubscriptionKind::Tenant(tenant),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn timeline(broker_prefix: String, timeline: ZTenantTimelineId) -> Self {
|
||||
Self {
|
||||
broker_prefix,
|
||||
kind: SubscriptionKind::Timeline(timeline),
|
||||
}
|
||||
}
|
||||
|
||||
fn watch_regex(&self) -> Regex {
|
||||
match self.kind {
|
||||
SubscriptionKind::All => Regex::new(&format!(
|
||||
r"^{}/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
|
||||
self.broker_prefix
|
||||
))
|
||||
.expect("wrong regex for 'everything' subscription"),
|
||||
SubscriptionKind::Tenant(tenant_id) => Regex::new(&format!(
|
||||
r"^{}/{tenant_id}/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
|
||||
self.broker_prefix
|
||||
))
|
||||
.expect("wrong regex for 'tenant' subscription"),
|
||||
SubscriptionKind::Timeline(ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
}) => Regex::new(&format!(
|
||||
r"^{}/{tenant_id}/{timeline_id}/safekeeper/([[:digit:]])$",
|
||||
self.broker_prefix
|
||||
))
|
||||
.expect("wrong regex for 'timeline' subscription"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Etcd key to use for watching a certain timeline updates from safekeepers.
|
||||
pub fn watch_key(&self) -> String {
|
||||
match self.kind {
|
||||
SubscriptionKind::All => self.broker_prefix.to_string(),
|
||||
SubscriptionKind::Tenant(tenant_id) => {
|
||||
format!("{}/{tenant_id}/safekeeper", self.broker_prefix)
|
||||
}
|
||||
SubscriptionKind::Timeline(ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
}) => format!(
|
||||
"{}/{tenant_id}/{timeline_id}/safekeeper",
|
||||
self.broker_prefix
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
enum SubscriptionKind {
|
||||
/// Get every timeline update.
|
||||
All,
|
||||
/// Get certain tenant timelines' updates.
|
||||
Tenant(ZTenantId),
|
||||
/// Get certain timeline updates.
|
||||
Timeline(ZTenantTimelineId),
|
||||
}
|
||||
|
||||
/// Creates a background task to poll etcd for timeline updates from safekeepers.
|
||||
/// Stops and returns `Err` on any error during etcd communication.
|
||||
/// Watches the key changes until either the watcher is cancelled via etcd or the subscription cancellation handle,
|
||||
/// exiting normally in such cases.
|
||||
pub async fn subscribe_to_safekeeper_timeline_updates(
|
||||
/// Etcd values are parsed as json fukes into a type, specified in the generic patameter.
|
||||
pub async fn subscribe_for_json_values<V>(
|
||||
client: &mut Client,
|
||||
subscription: SkTimelineSubscriptionKind,
|
||||
) -> Result<SkTimelineSubscription, BrokerError> {
|
||||
info!("Subscribing to timeline updates, subscription kind: {subscription:?}");
|
||||
key: SubscriptionKey,
|
||||
) -> Result<BrokerSubscription<V>, BrokerError>
|
||||
where
|
||||
V: DeserializeOwned + Send + 'static,
|
||||
{
|
||||
subscribe_for_values(client, key, |_, value_str| {
|
||||
match serde_json::from_str::<V>(value_str) {
|
||||
Ok(value) => Some(value),
|
||||
Err(e) => {
|
||||
error!("Failed to parse value str '{value_str}': {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
/// Same as [`subscribe_for_json_values`], but allows to specify a custom parser of a etcd value string.
|
||||
pub async fn subscribe_for_values<P, V>(
|
||||
client: &mut Client,
|
||||
key: SubscriptionKey,
|
||||
value_parser: P,
|
||||
) -> Result<BrokerSubscription<V>, BrokerError>
|
||||
where
|
||||
V: Send + 'static,
|
||||
P: Fn(SubscriptionFullKey, &str) -> Option<V> + Send + 'static,
|
||||
{
|
||||
info!("Subscribing to broker value updates, key: {key:?}");
|
||||
let subscription_key = key.clone();
|
||||
|
||||
let (watcher, mut stream) = client
|
||||
.watch(
|
||||
subscription.watch_key(),
|
||||
Some(WatchOptions::new().with_prefix()),
|
||||
)
|
||||
.watch(key.watch_key(), Some(WatchOptions::new().with_prefix()))
|
||||
.await
|
||||
.map_err(|e| {
|
||||
BrokerError::EtcdClient(
|
||||
e,
|
||||
format!("Failed to init the watch for subscription {subscription:?}"),
|
||||
format!("Failed to init the watch for subscription {key:?}"),
|
||||
)
|
||||
})?;
|
||||
|
||||
let (timeline_updates_sender, safekeeper_timeline_updates) = mpsc::unbounded_channel();
|
||||
|
||||
let subscription_kind = subscription.kind;
|
||||
let regex = subscription.watch_regex();
|
||||
let (value_updates_sender, value_updates_receiver) = mpsc::unbounded_channel();
|
||||
let watcher_handle = tokio::spawn(async move {
|
||||
while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
|
||||
"Failed to get messages from the subscription stream, kind: {subscription_kind:?}, error: {e}"
|
||||
"Failed to get messages from the subscription stream, kind: {:?}, error: {e}", key.kind
|
||||
)))? {
|
||||
if resp.canceled() {
|
||||
info!("Watch for timeline updates subscription was canceled, exiting");
|
||||
break;
|
||||
}
|
||||
|
||||
let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>> =
|
||||
HashMap::new();
|
||||
|
||||
let events = resp.events();
|
||||
debug!("Processing {} events", events.len());
|
||||
|
||||
for event in events {
|
||||
if EventType::Put == event.event_type() {
|
||||
if let Some(kv) = event.kv() {
|
||||
match parse_etcd_key_value(subscription_kind, ®ex, kv) {
|
||||
Ok(Some((zttid, timeline))) => {
|
||||
match timeline_updates
|
||||
.entry(zttid)
|
||||
.or_default()
|
||||
.entry(timeline.safekeeper_id)
|
||||
{
|
||||
hash_map::Entry::Occupied(mut o) => {
|
||||
if o.get().flush_lsn < timeline.info.flush_lsn {
|
||||
o.insert(timeline.info);
|
||||
}
|
||||
}
|
||||
hash_map::Entry::Vacant(v) => {
|
||||
v.insert(timeline.info);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(None) => {}
|
||||
Err(e) => error!("Failed to parse timeline update: {e}"),
|
||||
if let Some(new_etcd_kv) = event.kv() {
|
||||
match parse_etcd_kv(new_etcd_kv, &value_parser, &key.cluster_prefix) {
|
||||
Ok(Some((key, value))) => if let Err(e) = value_updates_sender.send(BrokerUpdate {
|
||||
etcd_version: new_etcd_kv.version(),
|
||||
key,
|
||||
value,
|
||||
}) {
|
||||
info!("Broker value updates for key {key:?} sender got dropped, exiting: {e}");
|
||||
break;
|
||||
},
|
||||
Ok(None) => debug!("Ignoring key {key:?} : no value was returned by the parser"),
|
||||
Err(BrokerError::KeyNotParsed(e)) => debug!("Unexpected key {key:?} for timeline update: {e}"),
|
||||
Err(e) => error!("Failed to represent etcd KV {new_etcd_kv:?}: {e}"),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(e) = timeline_updates_sender.send(timeline_updates) {
|
||||
info!("Timeline updates sender got dropped, exiting: {e}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
});
|
||||
}.instrument(info_span!("etcd_broker")));
|
||||
|
||||
Ok(SkTimelineSubscription {
|
||||
kind: subscription,
|
||||
safekeeper_timeline_updates,
|
||||
Ok(BrokerSubscription {
|
||||
key: subscription_key,
|
||||
value_updates: value_updates_receiver,
|
||||
watcher_handle,
|
||||
watcher,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_etcd_key_value(
|
||||
subscription_kind: SubscriptionKind,
|
||||
regex: &Regex,
|
||||
fn parse_etcd_kv<P, V>(
|
||||
kv: &KeyValue,
|
||||
) -> Result<Option<(ZTenantTimelineId, SafekeeperTimeline)>, BrokerError> {
|
||||
let caps = if let Some(caps) = regex.captures(kv.key_str().map_err(|e| {
|
||||
BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as key str"))
|
||||
})?) {
|
||||
caps
|
||||
} else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let (zttid, safekeeper_id) = match subscription_kind {
|
||||
SubscriptionKind::All => (
|
||||
ZTenantTimelineId::new(
|
||||
parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
|
||||
parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?,
|
||||
),
|
||||
ZNodeId(parse_capture(&caps, 3).map_err(BrokerError::ParsingError)?),
|
||||
),
|
||||
SubscriptionKind::Tenant(tenant_id) => (
|
||||
ZTenantTimelineId::new(
|
||||
tenant_id,
|
||||
parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
|
||||
),
|
||||
ZNodeId(parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?),
|
||||
),
|
||||
SubscriptionKind::Timeline(zttid) => (
|
||||
zttid,
|
||||
ZNodeId(parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?),
|
||||
),
|
||||
};
|
||||
|
||||
let info_str = kv.value_str().map_err(|e| {
|
||||
BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as value str"))
|
||||
})?;
|
||||
Ok(Some((
|
||||
zttid,
|
||||
SafekeeperTimeline {
|
||||
safekeeper_id,
|
||||
info: serde_json::from_str(info_str).map_err(|e| {
|
||||
BrokerError::ParsingError(format!(
|
||||
"Failed to parse '{info_str}' as safekeeper timeline info: {e}"
|
||||
))
|
||||
})?,
|
||||
},
|
||||
)))
|
||||
}
|
||||
|
||||
fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
|
||||
value_parser: &P,
|
||||
cluster_prefix: &str,
|
||||
) -> Result<Option<(SubscriptionFullKey, V)>, BrokerError>
|
||||
where
|
||||
T: FromStr,
|
||||
<T as FromStr>::Err: Display,
|
||||
P: Fn(SubscriptionFullKey, &str) -> Option<V>,
|
||||
{
|
||||
let capture_match = caps
|
||||
.get(index)
|
||||
.ok_or_else(|| format!("Failed to get capture match at index {index}"))?
|
||||
.as_str();
|
||||
capture_match.parse().map_err(|e| {
|
||||
format!(
|
||||
"Failed to parse {} from {capture_match}: {e}",
|
||||
std::any::type_name::<T>()
|
||||
)
|
||||
})
|
||||
let key_str = kv.key_str().map_err(|e| {
|
||||
BrokerError::EtcdClient(e, "Failed to extract key str out of etcd KV".to_string())
|
||||
})?;
|
||||
let value_str = kv.value_str().map_err(|e| {
|
||||
BrokerError::EtcdClient(e, "Failed to extract value str out of etcd KV".to_string())
|
||||
})?;
|
||||
|
||||
if !key_str.starts_with(cluster_prefix) {
|
||||
return Err(BrokerError::KeyNotParsed(format!(
|
||||
"KV has unexpected key '{key_str}' that does not start with cluster prefix {cluster_prefix}"
|
||||
)));
|
||||
}
|
||||
|
||||
let key = SubscriptionFullKey::from_str(&key_str[cluster_prefix.len()..]).map_err(|e| {
|
||||
BrokerError::KeyNotParsed(format!("Failed to parse KV key '{key_str}': {e}"))
|
||||
})?;
|
||||
|
||||
Ok(value_parser(key, value_str).map(|value| (key, value)))
|
||||
}
|
||||
|
||||
310
libs/etcd_broker/src/subscription_key.rs
Normal file
310
libs/etcd_broker/src/subscription_key.rs
Normal file
@@ -0,0 +1,310 @@
|
||||
//! Etcd broker keys, used in the project and shared between instances.
|
||||
//! The keys are split into two categories:
|
||||
//!
|
||||
//! * [`SubscriptionFullKey`] full key format: `<cluster_prefix>/<tenant>/<timeline>/<node_kind>/<operation>/<node_id>`
|
||||
//! Always returned from etcd in this form, always start with the user key provided.
|
||||
//!
|
||||
//! * [`SubscriptionKey`] user input key format: always partial, since it's unknown which `node_id`'s are available.
|
||||
//! Full key always starts with the user input one, due to etcd subscription properties.
|
||||
|
||||
use std::{fmt::Display, str::FromStr};
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::{Captures, Regex};
|
||||
use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId};
|
||||
|
||||
/// The subscription kind to the timeline updates from safekeeper.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct SubscriptionKey {
|
||||
/// Generic cluster prefix, allowing to use the same etcd instance by multiple logic groups.
|
||||
pub cluster_prefix: String,
|
||||
/// The subscription kind.
|
||||
pub kind: SubscriptionKind,
|
||||
}
|
||||
|
||||
/// All currently possible key kinds of a etcd broker subscription.
|
||||
/// Etcd works so, that every key that starts with the subbscription key given is considered matching and
|
||||
/// returned as part of the subscrption.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum SubscriptionKind {
|
||||
/// Get every update in etcd.
|
||||
All,
|
||||
/// Get etcd updates for any timeiline of a certain tenant, affected by any operation from any node kind.
|
||||
TenantTimelines(ZTenantId),
|
||||
/// Get etcd updates for a certain timeline of a tenant, affected by any operation from any node kind.
|
||||
Timeline(ZTenantTimelineId),
|
||||
/// Get etcd timeline updates, specific to a certain node kind.
|
||||
Node(ZTenantTimelineId, NodeKind),
|
||||
/// Get etcd timeline updates for a certain operation on specific nodes.
|
||||
Operation(ZTenantTimelineId, NodeKind, OperationKind),
|
||||
}
|
||||
|
||||
/// All kinds of nodes, able to write into etcd.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum NodeKind {
|
||||
Safekeeper,
|
||||
Pageserver,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum OperationKind {
|
||||
Safekeeper(SkOperationKind),
|
||||
}
|
||||
|
||||
/// Current operations, running inside the safekeeper node.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub enum SkOperationKind {
|
||||
TimelineInfo,
|
||||
WalBackup,
|
||||
}
|
||||
|
||||
static SUBSCRIPTION_FULL_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/([^/]+)/([^/]+)/([[:digit:]]+)$")
|
||||
.expect("wrong subscription full etcd key regex")
|
||||
});
|
||||
|
||||
/// Full key, received from etcd during any of the component's work.
|
||||
/// No other etcd keys are considered during system's work.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct SubscriptionFullKey {
|
||||
pub id: ZTenantTimelineId,
|
||||
pub node_kind: NodeKind,
|
||||
pub operation: OperationKind,
|
||||
pub node_id: NodeId,
|
||||
}
|
||||
|
||||
impl SubscriptionKey {
|
||||
/// Subscribes for all etcd updates.
|
||||
pub fn all(cluster_prefix: String) -> Self {
|
||||
SubscriptionKey {
|
||||
cluster_prefix,
|
||||
kind: SubscriptionKind::All,
|
||||
}
|
||||
}
|
||||
|
||||
/// Subscribes to a given timeline info updates from safekeepers.
|
||||
pub fn sk_timeline_info(cluster_prefix: String, timeline: ZTenantTimelineId) -> Self {
|
||||
Self {
|
||||
cluster_prefix,
|
||||
kind: SubscriptionKind::Operation(
|
||||
timeline,
|
||||
NodeKind::Safekeeper,
|
||||
OperationKind::Safekeeper(SkOperationKind::TimelineInfo),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Subscribes to all timeine updates during specific operations, running on the corresponding nodes.
|
||||
pub fn operation(
|
||||
cluster_prefix: String,
|
||||
timeline: ZTenantTimelineId,
|
||||
node_kind: NodeKind,
|
||||
operation: OperationKind,
|
||||
) -> Self {
|
||||
Self {
|
||||
cluster_prefix,
|
||||
kind: SubscriptionKind::Operation(timeline, node_kind, operation),
|
||||
}
|
||||
}
|
||||
|
||||
/// Etcd key to use for watching a certain timeline updates from safekeepers.
|
||||
pub fn watch_key(&self) -> String {
|
||||
let cluster_prefix = &self.cluster_prefix;
|
||||
match self.kind {
|
||||
SubscriptionKind::All => cluster_prefix.to_string(),
|
||||
SubscriptionKind::TenantTimelines(tenant_id) => {
|
||||
format!("{cluster_prefix}/{tenant_id}")
|
||||
}
|
||||
SubscriptionKind::Timeline(id) => {
|
||||
format!("{cluster_prefix}/{id}")
|
||||
}
|
||||
SubscriptionKind::Node(id, node_kind) => {
|
||||
format!("{cluster_prefix}/{id}/{node_kind}")
|
||||
}
|
||||
SubscriptionKind::Operation(id, node_kind, operation_kind) => {
|
||||
format!("{cluster_prefix}/{id}/{node_kind}/{operation_kind}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for OperationKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
OperationKind::Safekeeper(o) => o.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for OperationKind {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(operation_kind_str: &str) -> Result<Self, Self::Err> {
|
||||
match operation_kind_str {
|
||||
"timeline_info" => Ok(OperationKind::Safekeeper(SkOperationKind::TimelineInfo)),
|
||||
"wal_backup" => Ok(OperationKind::Safekeeper(SkOperationKind::WalBackup)),
|
||||
_ => Err(format!("Unknown operation kind: {operation_kind_str}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SubscriptionFullKey {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self {
|
||||
id,
|
||||
node_kind,
|
||||
operation,
|
||||
node_id,
|
||||
} = self;
|
||||
write!(f, "{id}/{node_kind}/{operation}/{node_id}")
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for SubscriptionFullKey {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(subscription_kind_str: &str) -> Result<Self, Self::Err> {
|
||||
let key_captures = match SUBSCRIPTION_FULL_KEY_REGEX.captures(subscription_kind_str) {
|
||||
Some(captures) => captures,
|
||||
None => {
|
||||
return Err(format!(
|
||||
"Subscription kind str does not match a subscription full key regex {}",
|
||||
SUBSCRIPTION_FULL_KEY_REGEX.as_str()
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
id: ZTenantTimelineId::new(
|
||||
parse_capture(&key_captures, 1)?,
|
||||
parse_capture(&key_captures, 2)?,
|
||||
),
|
||||
node_kind: parse_capture(&key_captures, 3)?,
|
||||
operation: parse_capture(&key_captures, 4)?,
|
||||
node_id: NodeId(parse_capture(&key_captures, 5)?),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
|
||||
where
|
||||
T: FromStr,
|
||||
<T as FromStr>::Err: Display,
|
||||
{
|
||||
let capture_match = caps
|
||||
.get(index)
|
||||
.ok_or_else(|| format!("Failed to get capture match at index {index}"))?
|
||||
.as_str();
|
||||
capture_match.parse().map_err(|e| {
|
||||
format!(
|
||||
"Failed to parse {} from {capture_match}: {e}",
|
||||
std::any::type_name::<T>()
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
impl Display for NodeKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Safekeeper => write!(f, "safekeeper"),
|
||||
Self::Pageserver => write!(f, "pageserver"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for NodeKind {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(node_kind_str: &str) -> Result<Self, Self::Err> {
|
||||
match node_kind_str {
|
||||
"safekeeper" => Ok(Self::Safekeeper),
|
||||
"pageserver" => Ok(Self::Pageserver),
|
||||
_ => Err(format!("Invalid node kind: {node_kind_str}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SkOperationKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::TimelineInfo => write!(f, "timeline_info"),
|
||||
Self::WalBackup => write!(f, "wal_backup"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for SkOperationKind {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(operation_str: &str) -> Result<Self, Self::Err> {
|
||||
match operation_str {
|
||||
"timeline_info" => Ok(Self::TimelineInfo),
|
||||
"wal_backup" => Ok(Self::WalBackup),
|
||||
_ => Err(format!("Invalid operation: {operation_str}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use utils::zid::ZTimelineId;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn full_cluster_key_parsing() {
|
||||
let prefix = "neon";
|
||||
let node_kind = NodeKind::Safekeeper;
|
||||
let operation_kind = OperationKind::Safekeeper(SkOperationKind::WalBackup);
|
||||
let tenant_id = ZTenantId::generate();
|
||||
let timeline_id = ZTimelineId::generate();
|
||||
let id = ZTenantTimelineId::new(tenant_id, timeline_id);
|
||||
let node_id = NodeId(1);
|
||||
|
||||
let timeline_subscription_keys = [
|
||||
SubscriptionKey {
|
||||
cluster_prefix: prefix.to_string(),
|
||||
kind: SubscriptionKind::All,
|
||||
},
|
||||
SubscriptionKey {
|
||||
cluster_prefix: prefix.to_string(),
|
||||
kind: SubscriptionKind::TenantTimelines(tenant_id),
|
||||
},
|
||||
SubscriptionKey {
|
||||
cluster_prefix: prefix.to_string(),
|
||||
kind: SubscriptionKind::Timeline(id),
|
||||
},
|
||||
SubscriptionKey {
|
||||
cluster_prefix: prefix.to_string(),
|
||||
kind: SubscriptionKind::Node(id, node_kind),
|
||||
},
|
||||
SubscriptionKey {
|
||||
cluster_prefix: prefix.to_string(),
|
||||
kind: SubscriptionKind::Operation(id, node_kind, operation_kind),
|
||||
},
|
||||
];
|
||||
|
||||
let full_key_string = format!(
|
||||
"{}/{node_id}",
|
||||
timeline_subscription_keys.last().unwrap().watch_key()
|
||||
);
|
||||
|
||||
for key in timeline_subscription_keys {
|
||||
assert!(full_key_string.starts_with(&key.watch_key()), "Full key '{full_key_string}' should start with any of the keys, keys, but {key:?} did not match");
|
||||
}
|
||||
|
||||
let full_key = SubscriptionFullKey::from_str(&full_key_string).unwrap_or_else(|e| {
|
||||
panic!("Failed to parse {full_key_string} as a subscription full key: {e}")
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
full_key,
|
||||
SubscriptionFullKey {
|
||||
id,
|
||||
node_kind,
|
||||
operation: operation_kind,
|
||||
node_id
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
35
libs/etcd_broker/src/subscription_value.rs
Normal file
35
libs/etcd_broker/src/subscription_value.rs
Normal file
@@ -0,0 +1,35 @@
|
||||
//! Module for the values to put into etcd.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
/// Data about safekeeper's timeline. Fields made optional for easy migrations.
|
||||
#[serde_as]
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct SkTimelineInfo {
|
||||
/// Term of the last entry.
|
||||
pub last_log_term: Option<u64>,
|
||||
/// LSN of the last record.
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub flush_lsn: Option<Lsn>,
|
||||
/// Up to which LSN safekeeper regards its WAL as committed.
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub commit_lsn: Option<Lsn>,
|
||||
/// LSN up to which safekeeper has backed WAL.
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub backup_lsn: Option<Lsn>,
|
||||
/// LSN of last checkpoint uploaded by pageserver.
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub remote_consistent_lsn: Option<Lsn>,
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
#[serde(default)]
|
||||
pub peer_horizon_lsn: Option<Lsn>,
|
||||
/// A connection string to use for WAL receiving.
|
||||
#[serde(default)]
|
||||
pub safekeeper_connstr: Option<String>,
|
||||
}
|
||||
@@ -4,7 +4,7 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
prometheus = {version = "0.13", default_features=false} # removes protobuf dependency
|
||||
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
|
||||
libc = "0.2"
|
||||
lazy_static = "1.4"
|
||||
once_cell = "1.8.0"
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//! Otherwise, we might not see all metrics registered via
|
||||
//! a default registry.
|
||||
use lazy_static::lazy_static;
|
||||
use once_cell::race::OnceBox;
|
||||
pub use prometheus::{core, default_registry, proto};
|
||||
pub use prometheus::{exponential_buckets, linear_buckets};
|
||||
pub use prometheus::{register_gauge, Gauge};
|
||||
pub use prometheus::{register_gauge_vec, GaugeVec};
|
||||
@@ -27,48 +27,15 @@ pub fn gather() -> Vec<prometheus::proto::MetricFamily> {
|
||||
prometheus::gather()
|
||||
}
|
||||
|
||||
static COMMON_METRICS_PREFIX: OnceBox<&str> = OnceBox::new();
|
||||
|
||||
/// Sets a prefix which will be used for all common metrics, typically a service
|
||||
/// name like 'pageserver'. Should be executed exactly once in the beginning of
|
||||
/// any executable which uses common metrics.
|
||||
pub fn set_common_metrics_prefix(prefix: &'static str) {
|
||||
// Not unwrap() because metrics may be initialized after multiple threads have been started.
|
||||
COMMON_METRICS_PREFIX
|
||||
.set(prefix.into())
|
||||
.unwrap_or_else(|_| {
|
||||
eprintln!(
|
||||
"set_common_metrics_prefix() was called second time with '{}', exiting",
|
||||
prefix
|
||||
);
|
||||
std::process::exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
/// Prepends a prefix to a common metric name so they are distinguished between
|
||||
/// different services, see <https://github.com/zenithdb/zenith/pull/681>
|
||||
/// A call to set_common_metrics_prefix() is necessary prior to calling this.
|
||||
pub fn new_common_metric_name(unprefixed_metric_name: &str) -> String {
|
||||
// Not unwrap() because metrics may be initialized after multiple threads have been started.
|
||||
format!(
|
||||
"{}_{}",
|
||||
COMMON_METRICS_PREFIX.get().unwrap_or_else(|| {
|
||||
eprintln!("set_common_metrics_prefix() was not called, but metrics are used, exiting");
|
||||
std::process::exit(1);
|
||||
}),
|
||||
unprefixed_metric_name
|
||||
)
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref DISK_IO_BYTES: IntGaugeVec = register_int_gauge_vec!(
|
||||
new_common_metric_name("disk_io_bytes"),
|
||||
"libmetrics_disk_io_bytes_total",
|
||||
"Bytes written and read from disk, grouped by the operation (read|write)",
|
||||
&["io_operation"]
|
||||
)
|
||||
.expect("Failed to register disk i/o bytes int gauge vec");
|
||||
static ref MAXRSS_KB: IntGauge = register_int_gauge!(
|
||||
new_common_metric_name("maxrss_kb"),
|
||||
"libmetrics_maxrss_kb",
|
||||
"Memory usage (Maximum Resident Set Size)"
|
||||
)
|
||||
.expect("Failed to register maxrss_kb int gauge");
|
||||
|
||||
@@ -20,5 +20,10 @@ serde = { version = "1.0", features = ["derive"] }
|
||||
utils = { path = "../utils" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
env_logger = "0.9"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
wal_generate = { path = "wal_generate" }
|
||||
|
||||
[build-dependencies]
|
||||
bindgen = "0.59.1"
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#![allow(deref_nullptr)]
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||
|
||||
@@ -37,3 +38,21 @@ pub const fn transaction_id_precedes(id1: TransactionId, id2: TransactionId) ->
|
||||
let diff = id1.wrapping_sub(id2) as i32;
|
||||
diff < 0
|
||||
}
|
||||
|
||||
// Check if page is not yet initialized (port of Postgres PageIsInit() macro)
|
||||
pub fn page_is_new(pg: &[u8]) -> bool {
|
||||
pg[14] == 0 && pg[15] == 0 // pg_upper == 0
|
||||
}
|
||||
|
||||
// ExtractLSN from page header
|
||||
pub fn page_get_lsn(pg: &[u8]) -> Lsn {
|
||||
Lsn(
|
||||
((u32::from_le_bytes(pg[0..4].try_into().unwrap()) as u64) << 32)
|
||||
| u32::from_le_bytes(pg[4..8].try_into().unwrap()) as u64,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn page_set_lsn(pg: &mut [u8], lsn: Lsn) {
|
||||
pg[0..4].copy_from_slice(&((lsn.0 >> 32) as u32).to_le_bytes());
|
||||
pg[4..8].copy_from_slice(&(lsn.0 as u32).to_le_bytes());
|
||||
}
|
||||
|
||||
@@ -73,7 +73,7 @@ impl WalStreamDecoder {
|
||||
/// Returns one of the following:
|
||||
/// Ok((Lsn, Bytes)): a tuple containing the LSN of next record, and the record itself
|
||||
/// Ok(None): there is not enough data in the input buffer. Feed more by calling the `feed_bytes` function
|
||||
/// Err(WalDecodeError): an error occured while decoding, meaning the input was invalid.
|
||||
/// Err(WalDecodeError): an error occurred while decoding, meaning the input was invalid.
|
||||
///
|
||||
pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
|
||||
let recordbuf;
|
||||
|
||||
@@ -15,7 +15,7 @@ use crate::XLogPageHeaderData;
|
||||
use crate::XLogRecord;
|
||||
use crate::XLOG_PAGE_MAGIC;
|
||||
|
||||
use anyhow::bail;
|
||||
use anyhow::{bail, ensure};
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use bytes::BytesMut;
|
||||
use bytes::{Buf, Bytes};
|
||||
@@ -30,6 +30,7 @@ use std::path::{Path, PathBuf};
|
||||
use std::time::SystemTime;
|
||||
use utils::bin_ser::DeserializeError;
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::const_assert;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
pub const XLOG_FNAME_LEN: usize = 24;
|
||||
@@ -149,8 +150,9 @@ fn find_end_of_wal_segment(
|
||||
) -> anyhow::Result<u32> {
|
||||
// step back to the beginning of the page to read it in...
|
||||
let mut offs: usize = start_offset - start_offset % XLOG_BLCKSZ;
|
||||
let mut skipping_first_contrecord: bool = false;
|
||||
let mut contlen: usize = 0;
|
||||
let mut wal_crc: u32 = 0;
|
||||
let mut xl_crc: u32 = 0;
|
||||
let mut crc: u32 = 0;
|
||||
let mut rec_offs: usize = 0;
|
||||
let mut buf = [0u8; XLOG_BLCKSZ];
|
||||
@@ -158,11 +160,15 @@ fn find_end_of_wal_segment(
|
||||
let mut last_valid_rec_pos: usize = start_offset; // assume at given start_offset begins new record
|
||||
let mut file = File::open(data_dir.join(file_name.clone() + ".partial")).unwrap();
|
||||
file.seek(SeekFrom::Start(offs as u64))?;
|
||||
// xl_crc is the last field in XLogRecord, will not be read into rec_hdr
|
||||
const_assert!(XLOG_RECORD_CRC_OFFS + 4 == XLOG_SIZE_OF_XLOG_RECORD);
|
||||
let mut rec_hdr = [0u8; XLOG_RECORD_CRC_OFFS];
|
||||
|
||||
trace!("find_end_of_wal_segment(data_dir={}, segno={}, tli={}, wal_seg_size={}, start_offset=0x{:x})", data_dir.display(), segno, tli, wal_seg_size, start_offset);
|
||||
while offs < wal_seg_size {
|
||||
// we are at the beginning of the page; read it in
|
||||
if offs % XLOG_BLCKSZ == 0 {
|
||||
trace!("offs=0x{:x}: new page", offs);
|
||||
let bytes_read = file.read(&mut buf)?;
|
||||
if bytes_read != buf.len() {
|
||||
bail!(
|
||||
@@ -176,30 +182,49 @@ fn find_end_of_wal_segment(
|
||||
let xlp_magic = LittleEndian::read_u16(&buf[0..2]);
|
||||
let xlp_info = LittleEndian::read_u16(&buf[2..4]);
|
||||
let xlp_rem_len = LittleEndian::read_u32(&buf[XLP_REM_LEN_OFFS..XLP_REM_LEN_OFFS + 4]);
|
||||
trace!(
|
||||
" xlp_magic=0x{:x}, xlp_info=0x{:x}, xlp_rem_len={}",
|
||||
xlp_magic,
|
||||
xlp_info,
|
||||
xlp_rem_len
|
||||
);
|
||||
// this is expected in current usage when valid WAL starts after page header
|
||||
if xlp_magic != XLOG_PAGE_MAGIC as u16 {
|
||||
trace!(
|
||||
"invalid WAL file {}.partial magic {} at {:?}",
|
||||
" invalid WAL file {}.partial magic {} at {:?}",
|
||||
file_name,
|
||||
xlp_magic,
|
||||
Lsn(XLogSegNoOffsetToRecPtr(segno, offs as u32, wal_seg_size)),
|
||||
);
|
||||
}
|
||||
if offs == 0 {
|
||||
offs = XLOG_SIZE_OF_XLOG_LONG_PHD;
|
||||
offs += XLOG_SIZE_OF_XLOG_LONG_PHD;
|
||||
if (xlp_info & XLP_FIRST_IS_CONTRECORD) != 0 {
|
||||
offs += ((xlp_rem_len + 7) & !7) as usize;
|
||||
trace!(" first record is contrecord");
|
||||
skipping_first_contrecord = true;
|
||||
contlen = xlp_rem_len as usize;
|
||||
if offs < start_offset {
|
||||
// Pre-condition failed: the beginning of the segment is unexpectedly corrupted.
|
||||
ensure!(start_offset - offs >= contlen,
|
||||
"start_offset is in the middle of the first record (which happens to be a contrecord), \
|
||||
expected to be on a record boundary. Is beginning of the segment corrupted?");
|
||||
contlen = 0;
|
||||
// keep skipping_first_contrecord to avoid counting the contrecord as valid, we did not check it.
|
||||
}
|
||||
} else {
|
||||
trace!(" first record is not contrecord");
|
||||
}
|
||||
} else {
|
||||
offs += XLOG_SIZE_OF_XLOG_SHORT_PHD;
|
||||
}
|
||||
// ... and step forward again if asked
|
||||
trace!(" skipped header to 0x{:x}", offs);
|
||||
offs = max(offs, start_offset);
|
||||
|
||||
// beginning of the next record
|
||||
} else if contlen == 0 {
|
||||
let page_offs = offs % XLOG_BLCKSZ;
|
||||
let xl_tot_len = LittleEndian::read_u32(&buf[page_offs..page_offs + 4]) as usize;
|
||||
trace!("offs=0x{:x}: new record, xl_tot_len={}", offs, xl_tot_len);
|
||||
if xl_tot_len == 0 {
|
||||
info!(
|
||||
"find_end_of_wal_segment reached zeros at {:?}, last records ends at {:?}",
|
||||
@@ -212,10 +237,25 @@ fn find_end_of_wal_segment(
|
||||
);
|
||||
break; // zeros, reached the end
|
||||
}
|
||||
last_valid_rec_pos = offs;
|
||||
if skipping_first_contrecord {
|
||||
skipping_first_contrecord = false;
|
||||
trace!(" first contrecord has been just completed");
|
||||
} else {
|
||||
trace!(
|
||||
" updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
|
||||
last_valid_rec_pos,
|
||||
offs
|
||||
);
|
||||
last_valid_rec_pos = offs;
|
||||
}
|
||||
offs += 4;
|
||||
rec_offs = 4;
|
||||
contlen = xl_tot_len - 4;
|
||||
trace!(
|
||||
" reading rec_hdr[0..4] <-- [0x{:x}; 0x{:x})",
|
||||
page_offs,
|
||||
page_offs + 4
|
||||
);
|
||||
rec_hdr[0..4].copy_from_slice(&buf[page_offs..page_offs + 4]);
|
||||
} else {
|
||||
// we're continuing a record, possibly from previous page.
|
||||
@@ -224,42 +264,118 @@ fn find_end_of_wal_segment(
|
||||
|
||||
// read the rest of the record, or as much as fits on this page.
|
||||
let n = min(contlen, pageleft);
|
||||
// fill rec_hdr (header up to (but not including) xl_crc field)
|
||||
trace!(
|
||||
"offs=0x{:x}, record continuation, pageleft={}, contlen={}",
|
||||
offs,
|
||||
pageleft,
|
||||
contlen
|
||||
);
|
||||
// fill rec_hdr header up to (but not including) xl_crc field
|
||||
trace!(
|
||||
" rec_offs={}, XLOG_RECORD_CRC_OFFS={}, XLOG_SIZE_OF_XLOG_RECORD={}",
|
||||
rec_offs,
|
||||
XLOG_RECORD_CRC_OFFS,
|
||||
XLOG_SIZE_OF_XLOG_RECORD
|
||||
);
|
||||
if rec_offs < XLOG_RECORD_CRC_OFFS {
|
||||
let len = min(XLOG_RECORD_CRC_OFFS - rec_offs, n);
|
||||
trace!(
|
||||
" reading rec_hdr[{}..{}] <-- [0x{:x}; 0x{:x})",
|
||||
rec_offs,
|
||||
rec_offs + len,
|
||||
page_offs,
|
||||
page_offs + len
|
||||
);
|
||||
rec_hdr[rec_offs..rec_offs + len].copy_from_slice(&buf[page_offs..page_offs + len]);
|
||||
}
|
||||
if rec_offs <= XLOG_RECORD_CRC_OFFS && rec_offs + n >= XLOG_SIZE_OF_XLOG_RECORD {
|
||||
let crc_offs = page_offs - rec_offs + XLOG_RECORD_CRC_OFFS;
|
||||
wal_crc = LittleEndian::read_u32(&buf[crc_offs..crc_offs + 4]);
|
||||
// All records are aligned on 8-byte boundary, so their 8-byte frames
|
||||
// cannot be split between pages. As xl_crc is the last field,
|
||||
// its content is always on the same page.
|
||||
const_assert!(XLOG_RECORD_CRC_OFFS % 8 == 4);
|
||||
// We should always start reading aligned records even in incorrect WALs so if
|
||||
// the condition is false it is likely a bug. However, it is localized somewhere
|
||||
// in this function, hence we do not crash and just report failure instead.
|
||||
ensure!(crc_offs % 8 == 4, "Record is not aligned properly (bug?)");
|
||||
xl_crc = LittleEndian::read_u32(&buf[crc_offs..crc_offs + 4]);
|
||||
trace!(
|
||||
" reading xl_crc: [0x{:x}; 0x{:x}) = 0x{:x}",
|
||||
crc_offs,
|
||||
crc_offs + 4,
|
||||
xl_crc
|
||||
);
|
||||
crc = crc32c_append(0, &buf[crc_offs + 4..page_offs + n]);
|
||||
} else {
|
||||
crc ^= 0xFFFFFFFFu32;
|
||||
trace!(
|
||||
" initializing crc: [0x{:x}; 0x{:x}); crc = 0x{:x}",
|
||||
crc_offs + 4,
|
||||
page_offs + n,
|
||||
crc
|
||||
);
|
||||
} else if rec_offs > XLOG_RECORD_CRC_OFFS {
|
||||
// As all records are 8-byte aligned, the header is already fully read and `crc` is initialized in the branch above.
|
||||
ensure!(rec_offs >= XLOG_SIZE_OF_XLOG_RECORD);
|
||||
let old_crc = crc;
|
||||
crc = crc32c_append(crc, &buf[page_offs..page_offs + n]);
|
||||
trace!(
|
||||
" appending to crc: [0x{:x}; 0x{:x}); 0x{:x} --> 0x{:x}",
|
||||
page_offs,
|
||||
page_offs + n,
|
||||
old_crc,
|
||||
crc
|
||||
);
|
||||
} else {
|
||||
// Correct because of the way conditions are written above.
|
||||
assert!(rec_offs + n < XLOG_SIZE_OF_XLOG_RECORD);
|
||||
// If `skipping_first_contrecord == true`, we may be reading from a middle of a record
|
||||
// which started in the previous segment. Hence there is no point in validating the header.
|
||||
if !skipping_first_contrecord && rec_offs + n > XLOG_RECORD_CRC_OFFS {
|
||||
info!(
|
||||
"Curiously corrupted WAL: a record stops inside the header; \
|
||||
offs=0x{:x}, record continuation, pageleft={}, contlen={}",
|
||||
offs, pageleft, contlen
|
||||
);
|
||||
break;
|
||||
}
|
||||
// Do nothing: we are still reading the header. It's accounted in CRC in the end of the record.
|
||||
}
|
||||
crc = !crc;
|
||||
rec_offs += n;
|
||||
offs += n;
|
||||
contlen -= n;
|
||||
|
||||
if contlen == 0 {
|
||||
crc = !crc;
|
||||
trace!(" record completed at 0x{:x}", offs);
|
||||
crc = crc32c_append(crc, &rec_hdr);
|
||||
offs = (offs + 7) & !7; // pad on 8 bytes boundary */
|
||||
if crc == wal_crc {
|
||||
trace!(
|
||||
" padded offs to 0x{:x}, crc is {:x}, expected crc is {:x}",
|
||||
offs,
|
||||
crc,
|
||||
xl_crc
|
||||
);
|
||||
if skipping_first_contrecord {
|
||||
// do nothing, the flag will go down on next iteration when we're reading new record
|
||||
trace!(" first conrecord has been just completed");
|
||||
} else if crc == xl_crc {
|
||||
// record is valid, advance the result to its end (with
|
||||
// alignment to the next record taken into account)
|
||||
trace!(
|
||||
" updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
|
||||
last_valid_rec_pos,
|
||||
offs
|
||||
);
|
||||
last_valid_rec_pos = offs;
|
||||
} else {
|
||||
info!(
|
||||
"CRC mismatch {} vs {} at {}",
|
||||
crc, wal_crc, last_valid_rec_pos
|
||||
crc, xl_crc, last_valid_rec_pos
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
trace!("last_valid_rec_pos=0x{:x}", last_valid_rec_pos);
|
||||
Ok(last_valid_rec_pos as u32)
|
||||
}
|
||||
|
||||
@@ -415,7 +531,7 @@ impl CheckPoint {
|
||||
///
|
||||
/// Returns 'true' if the XID was updated.
|
||||
pub fn update_next_xid(&mut self, xid: u32) -> bool {
|
||||
// nextXid should nw greate than any XID in WAL, so increment provided XID and check for wraparround.
|
||||
// nextXid should nw greater than any XID in WAL, so increment provided XID and check for wraparround.
|
||||
let mut new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
|
||||
// To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.
|
||||
// XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
|
||||
@@ -476,78 +592,126 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result<Bytes, Seriali
|
||||
mod tests {
|
||||
use super::*;
|
||||
use regex::Regex;
|
||||
use std::{env, process::Command, str::FromStr};
|
||||
use std::{env, str::FromStr};
|
||||
|
||||
// Run find_end_of_wal against file in test_wal dir
|
||||
// Ensure that it finds last record correctly
|
||||
#[test]
|
||||
pub fn test_find_end_of_wal() {
|
||||
// 1. Run initdb to generate some WAL
|
||||
fn init_logging() {
|
||||
let _ = env_logger::Builder::from_env(
|
||||
env_logger::Env::default()
|
||||
.default_filter_or("wal_generate=info,postgres_ffi::xlog_utils=trace"),
|
||||
)
|
||||
.is_test(true)
|
||||
.try_init();
|
||||
}
|
||||
|
||||
fn test_end_of_wal(
|
||||
test_name: &str,
|
||||
generate_wal: impl Fn(&mut postgres::Client) -> anyhow::Result<postgres::types::PgLsn>,
|
||||
expected_end_of_wal_non_partial: Lsn,
|
||||
last_segment: &str,
|
||||
) {
|
||||
use wal_generate::*;
|
||||
// 1. Generate some WAL
|
||||
let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("..")
|
||||
.join("..");
|
||||
let data_dir = top_path.join("test_output/test_find_end_of_wal");
|
||||
let initdb_path = top_path.join("tmp_install/bin/initdb");
|
||||
let lib_path = top_path.join("tmp_install/lib");
|
||||
if data_dir.exists() {
|
||||
fs::remove_dir_all(&data_dir).unwrap();
|
||||
let cfg = Conf {
|
||||
pg_distrib_dir: top_path.join("tmp_install"),
|
||||
datadir: top_path.join(format!("test_output/{}", test_name)),
|
||||
};
|
||||
if cfg.datadir.exists() {
|
||||
fs::remove_dir_all(&cfg.datadir).unwrap();
|
||||
}
|
||||
println!("Using initdb from '{}'", initdb_path.display());
|
||||
println!("Data directory '{}'", data_dir.display());
|
||||
let initdb_output = Command::new(initdb_path)
|
||||
.args(&["-D", data_dir.to_str().unwrap()])
|
||||
.arg("--no-instructions")
|
||||
.arg("--no-sync")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &lib_path)
|
||||
.env("DYLD_LIBRARY_PATH", &lib_path)
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(
|
||||
initdb_output.status.success(),
|
||||
"initdb failed. Status: '{}', stdout: '{}', stderr: '{}'",
|
||||
initdb_output.status,
|
||||
String::from_utf8_lossy(&initdb_output.stdout),
|
||||
String::from_utf8_lossy(&initdb_output.stderr),
|
||||
);
|
||||
cfg.initdb().unwrap();
|
||||
let mut srv = cfg.start_server().unwrap();
|
||||
let expected_wal_end: Lsn =
|
||||
u64::from(generate_wal(&mut srv.connect_with_timeout().unwrap()).unwrap()).into();
|
||||
srv.kill();
|
||||
|
||||
// 2. Pick WAL generated by initdb
|
||||
let wal_dir = data_dir.join("pg_wal");
|
||||
let wal_dir = cfg.datadir.join("pg_wal");
|
||||
let wal_seg_size = 16 * 1024 * 1024;
|
||||
|
||||
// 3. Check end_of_wal on non-partial WAL segment (we treat it as fully populated)
|
||||
let (wal_end, tli) = find_end_of_wal(&wal_dir, wal_seg_size, true, Lsn(0)).unwrap();
|
||||
let wal_end = Lsn(wal_end);
|
||||
println!("wal_end={}, tli={}", wal_end, tli);
|
||||
assert_eq!(wal_end, "0/2000000".parse::<Lsn>().unwrap());
|
||||
info!(
|
||||
"find_end_of_wal returned (wal_end={}, tli={})",
|
||||
wal_end, tli
|
||||
);
|
||||
assert_eq!(wal_end, expected_end_of_wal_non_partial);
|
||||
|
||||
// 4. Get the actual end of WAL by pg_waldump
|
||||
let waldump_path = top_path.join("tmp_install/bin/pg_waldump");
|
||||
let waldump_output = Command::new(waldump_path)
|
||||
.arg(wal_dir.join("000000010000000000000001"))
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", &lib_path)
|
||||
.env("DYLD_LIBRARY_PATH", &lib_path)
|
||||
.output()
|
||||
.unwrap();
|
||||
let waldump_output = std::str::from_utf8(&waldump_output.stderr).unwrap();
|
||||
println!("waldump_output = '{}'", &waldump_output);
|
||||
let re = Regex::new(r"invalid record length at (.+):").unwrap();
|
||||
let caps = re.captures(waldump_output).unwrap();
|
||||
let waldump_output = cfg
|
||||
.pg_waldump("000000010000000000000001", last_segment)
|
||||
.unwrap()
|
||||
.stderr;
|
||||
let waldump_output = std::str::from_utf8(&waldump_output).unwrap();
|
||||
let caps = match Regex::new(r"invalid record length at (.+):")
|
||||
.unwrap()
|
||||
.captures(waldump_output)
|
||||
{
|
||||
Some(caps) => caps,
|
||||
None => {
|
||||
error!("Unable to parse pg_waldump's stderr:\n{}", waldump_output);
|
||||
panic!();
|
||||
}
|
||||
};
|
||||
let waldump_wal_end = Lsn::from_str(caps.get(1).unwrap().as_str()).unwrap();
|
||||
info!(
|
||||
"waldump erred on {}, expected wal end at {}",
|
||||
waldump_wal_end, expected_wal_end
|
||||
);
|
||||
assert_eq!(waldump_wal_end, expected_wal_end);
|
||||
|
||||
// 5. Rename file to partial to actually find last valid lsn
|
||||
fs::rename(
|
||||
wal_dir.join("000000010000000000000001"),
|
||||
wal_dir.join("000000010000000000000001.partial"),
|
||||
wal_dir.join(last_segment),
|
||||
wal_dir.join(format!("{}.partial", last_segment)),
|
||||
)
|
||||
.unwrap();
|
||||
let (wal_end, tli) = find_end_of_wal(&wal_dir, wal_seg_size, true, Lsn(0)).unwrap();
|
||||
let wal_end = Lsn(wal_end);
|
||||
println!("wal_end={}, tli={}", wal_end, tli);
|
||||
info!(
|
||||
"find_end_of_wal returned (wal_end={}, tli={})",
|
||||
wal_end, tli
|
||||
);
|
||||
assert_eq!(wal_end, waldump_wal_end);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_find_end_of_wal_simple() {
|
||||
init_logging();
|
||||
test_end_of_wal(
|
||||
"test_find_end_of_wal_simple",
|
||||
wal_generate::generate_simple,
|
||||
"0/2000000".parse::<Lsn>().unwrap(),
|
||||
"000000010000000000000001",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_find_end_of_wal_crossing_segment_followed_by_small_one() {
|
||||
init_logging();
|
||||
test_end_of_wal(
|
||||
"test_find_end_of_wal_crossing_segment_followed_by_small_one",
|
||||
wal_generate::generate_wal_record_crossing_segment_followed_by_small_one,
|
||||
"0/3000000".parse::<Lsn>().unwrap(),
|
||||
"000000010000000000000002",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
|
||||
pub fn test_find_end_of_wal_last_crossing_segment() {
|
||||
init_logging();
|
||||
test_end_of_wal(
|
||||
"test_find_end_of_wal_last_crossing_segment",
|
||||
wal_generate::generate_last_wal_record_crossing_segment,
|
||||
"0/3000000".parse::<Lsn>().unwrap(),
|
||||
"000000010000000000000002",
|
||||
);
|
||||
}
|
||||
|
||||
/// Check the math in update_next_xid
|
||||
///
|
||||
/// NOTE: These checks are sensitive to the value of XID_CHECKPOINT_INTERVAL,
|
||||
|
||||
14
libs/postgres_ffi/wal_generate/Cargo.toml
Normal file
14
libs/postgres_ffi/wal_generate/Cargo.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[package]
|
||||
name = "wal_generate"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
clap = "3.0"
|
||||
env_logger = "0.9"
|
||||
log = "0.4"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
tempfile = "3.2"
|
||||
58
libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
Normal file
58
libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
use anyhow::*;
|
||||
use clap::{App, Arg};
|
||||
use wal_generate::*;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
env_logger::Builder::from_env(
|
||||
env_logger::Env::default().default_filter_or("wal_generate=info"),
|
||||
)
|
||||
.init();
|
||||
let arg_matches = App::new("Postgres WAL generator")
|
||||
.about("Generates Postgres databases with specific WAL properties")
|
||||
.arg(
|
||||
Arg::new("datadir")
|
||||
.short('D')
|
||||
.long("datadir")
|
||||
.takes_value(true)
|
||||
.help("Data directory for the Postgres server")
|
||||
.required(true)
|
||||
)
|
||||
.arg(
|
||||
Arg::new("pg-distrib-dir")
|
||||
.long("pg-distrib-dir")
|
||||
.takes_value(true)
|
||||
.help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
|
||||
.default_value("/usr/local")
|
||||
)
|
||||
.arg(
|
||||
Arg::new("type")
|
||||
.long("type")
|
||||
.takes_value(true)
|
||||
.help("Type of WAL to generate")
|
||||
.possible_values(["simple", "last_wal_record_crossing_segment", "wal_record_crossing_segment_followed_by_small_one"])
|
||||
.required(true)
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let cfg = Conf {
|
||||
pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
|
||||
datadir: arg_matches.value_of("datadir").unwrap().into(),
|
||||
};
|
||||
cfg.initdb()?;
|
||||
let mut srv = cfg.start_server()?;
|
||||
let lsn = match arg_matches.value_of("type").unwrap() {
|
||||
"simple" => generate_simple(&mut srv.connect_with_timeout()?)?,
|
||||
"last_wal_record_crossing_segment" => {
|
||||
generate_last_wal_record_crossing_segment(&mut srv.connect_with_timeout()?)?
|
||||
}
|
||||
"wal_record_crossing_segment_followed_by_small_one" => {
|
||||
generate_wal_record_crossing_segment_followed_by_small_one(
|
||||
&mut srv.connect_with_timeout()?,
|
||||
)?
|
||||
}
|
||||
a => panic!("Unknown --type argument: {}", a),
|
||||
};
|
||||
println!("end_of_wal = {}", lsn);
|
||||
srv.kill();
|
||||
Ok(())
|
||||
}
|
||||
285
libs/postgres_ffi/wal_generate/src/lib.rs
Normal file
285
libs/postgres_ffi/wal_generate/src/lib.rs
Normal file
@@ -0,0 +1,285 @@
|
||||
use anyhow::*;
|
||||
use core::time::Duration;
|
||||
use log::*;
|
||||
use postgres::types::PgLsn;
|
||||
use postgres::Client;
|
||||
use std::cmp::Ordering;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use std::time::Instant;
|
||||
use tempfile::{tempdir, TempDir};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Conf {
|
||||
pub pg_distrib_dir: PathBuf,
|
||||
pub datadir: PathBuf,
|
||||
}
|
||||
|
||||
pub struct PostgresServer {
|
||||
process: std::process::Child,
|
||||
_unix_socket_dir: TempDir,
|
||||
client_config: postgres::Config,
|
||||
}
|
||||
|
||||
impl Conf {
|
||||
fn pg_bin_dir(&self) -> PathBuf {
|
||||
self.pg_distrib_dir.join("bin")
|
||||
}
|
||||
|
||||
fn pg_lib_dir(&self) -> PathBuf {
|
||||
self.pg_distrib_dir.join("lib")
|
||||
}
|
||||
|
||||
fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
|
||||
let path = self.pg_bin_dir().join(command);
|
||||
ensure!(path.exists(), "Command {:?} does not exist", path);
|
||||
let mut cmd = Command::new(path);
|
||||
cmd.env_clear()
|
||||
.env("LD_LIBRARY_PATH", self.pg_lib_dir())
|
||||
.env("DYLD_LIBRARY_PATH", self.pg_lib_dir());
|
||||
Ok(cmd)
|
||||
}
|
||||
|
||||
pub fn initdb(&self) -> Result<()> {
|
||||
if let Some(parent) = self.datadir.parent() {
|
||||
info!("Pre-creating parent directory {:?}", parent);
|
||||
// Tests may be run concurrently and there may be a race to create `test_output/`.
|
||||
// std::fs::create_dir_all is guaranteed to have no races with another thread creating directories.
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
info!(
|
||||
"Running initdb in {:?} with user \"postgres\"",
|
||||
self.datadir
|
||||
);
|
||||
let output = self
|
||||
.new_pg_command("initdb")?
|
||||
.arg("-D")
|
||||
.arg(self.datadir.as_os_str())
|
||||
.args(&["-U", "postgres", "--no-instructions", "--no-sync"])
|
||||
.output()?;
|
||||
debug!("initdb output: {:?}", output);
|
||||
ensure!(
|
||||
output.status.success(),
|
||||
"initdb failed, stdout and stderr follow:\n{}{}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn start_server(&self) -> Result<PostgresServer> {
|
||||
info!("Starting Postgres server in {:?}", self.datadir);
|
||||
let log_file = fs::File::create(self.datadir.join("pg.log")).with_context(|| {
|
||||
format!(
|
||||
"Failed to create pg.log file in directory {}",
|
||||
self.datadir.display()
|
||||
)
|
||||
})?;
|
||||
let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
|
||||
let unix_socket_dir_path = unix_socket_dir.path().to_owned();
|
||||
let server_process = self
|
||||
.new_pg_command("postgres")?
|
||||
.args(&["-c", "listen_addresses="])
|
||||
.arg("-k")
|
||||
.arg(unix_socket_dir_path.as_os_str())
|
||||
.arg("-D")
|
||||
.arg(self.datadir.as_os_str())
|
||||
.args(&["-c", "wal_keep_size=50MB"]) // Ensure old WAL is not removed
|
||||
.args(&["-c", "logging_collector=on"]) // stderr will mess up with tests output
|
||||
.args(&["-c", "shared_preload_libraries=neon"]) // can only be loaded at startup
|
||||
// Disable background processes as much as possible
|
||||
.args(&["-c", "wal_writer_delay=10s"])
|
||||
.args(&["-c", "autovacuum=off"])
|
||||
.stderr(Stdio::from(log_file))
|
||||
.spawn()?;
|
||||
let server = PostgresServer {
|
||||
process: server_process,
|
||||
_unix_socket_dir: unix_socket_dir,
|
||||
client_config: {
|
||||
let mut c = postgres::Config::new();
|
||||
c.host_path(&unix_socket_dir_path);
|
||||
c.user("postgres");
|
||||
c.connect_timeout(Duration::from_millis(1000));
|
||||
c
|
||||
},
|
||||
};
|
||||
Ok(server)
|
||||
}
|
||||
|
||||
pub fn pg_waldump(
|
||||
&self,
|
||||
first_segment_name: &str,
|
||||
last_segment_name: &str,
|
||||
) -> Result<std::process::Output> {
|
||||
let first_segment_file = self.datadir.join(first_segment_name);
|
||||
let last_segment_file = self.datadir.join(last_segment_name);
|
||||
info!(
|
||||
"Running pg_waldump for {} .. {}",
|
||||
first_segment_file.display(),
|
||||
last_segment_file.display()
|
||||
);
|
||||
let output = self
|
||||
.new_pg_command("pg_waldump")?
|
||||
.args(&[
|
||||
&first_segment_file.as_os_str(),
|
||||
&last_segment_file.as_os_str(),
|
||||
])
|
||||
.output()?;
|
||||
debug!("waldump output: {:?}", output);
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
impl PostgresServer {
|
||||
pub fn connect_with_timeout(&self) -> Result<Client> {
|
||||
let retry_until = Instant::now() + *self.client_config.get_connect_timeout().unwrap();
|
||||
while Instant::now() < retry_until {
|
||||
use std::result::Result::Ok;
|
||||
if let Ok(client) = self.client_config.connect(postgres::NoTls) {
|
||||
return Ok(client);
|
||||
}
|
||||
std::thread::sleep(Duration::from_millis(100));
|
||||
}
|
||||
bail!("Connection timed out");
|
||||
}
|
||||
|
||||
pub fn kill(&mut self) {
|
||||
self.process.kill().unwrap();
|
||||
self.process.wait().unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for PostgresServer {
|
||||
fn drop(&mut self) {
|
||||
use std::result::Result::Ok;
|
||||
match self.process.try_wait() {
|
||||
Ok(Some(_)) => return,
|
||||
Ok(None) => {
|
||||
warn!("Server was not terminated, will be killed");
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Unable to get status of the server: {}, will be killed", e);
|
||||
}
|
||||
}
|
||||
let _ = self.process.kill();
|
||||
}
|
||||
}
|
||||
|
||||
pub trait PostgresClientExt: postgres::GenericClient {
|
||||
fn pg_current_wal_insert_lsn(&mut self) -> Result<PgLsn> {
|
||||
Ok(self
|
||||
.query_one("SELECT pg_current_wal_insert_lsn()", &[])?
|
||||
.get(0))
|
||||
}
|
||||
fn pg_current_wal_flush_lsn(&mut self) -> Result<PgLsn> {
|
||||
Ok(self
|
||||
.query_one("SELECT pg_current_wal_flush_lsn()", &[])?
|
||||
.get(0))
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: postgres::GenericClient> PostgresClientExt for C {}
|
||||
|
||||
fn generate_internal<C: postgres::GenericClient>(
|
||||
client: &mut C,
|
||||
f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
|
||||
) -> Result<PgLsn> {
|
||||
client.execute("create extension if not exists neon_test_utils", &[])?;
|
||||
|
||||
let wal_segment_size = client.query_one(
|
||||
"select cast(setting as bigint) as setting, unit \
|
||||
from pg_settings where name = 'wal_segment_size'",
|
||||
&[],
|
||||
)?;
|
||||
ensure!(
|
||||
wal_segment_size.get::<_, String>("unit") == "B",
|
||||
"Unexpected wal_segment_size unit"
|
||||
);
|
||||
ensure!(
|
||||
wal_segment_size.get::<_, i64>("setting") == 16 * 1024 * 1024,
|
||||
"Unexpected wal_segment_size in bytes"
|
||||
);
|
||||
|
||||
let initial_lsn = client.pg_current_wal_insert_lsn()?;
|
||||
info!("LSN initial = {}", initial_lsn);
|
||||
|
||||
let last_lsn = match f(client, initial_lsn)? {
|
||||
None => client.pg_current_wal_insert_lsn()?,
|
||||
Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
|
||||
Ordering::Less => bail!("Some records were inserted after the generated WAL"),
|
||||
Ordering::Equal => last_lsn,
|
||||
Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
|
||||
},
|
||||
};
|
||||
|
||||
// Some records may be not flushed, e.g. non-transactional logical messages.
|
||||
client.execute("select neon_xlogflush(pg_current_wal_insert_lsn())", &[])?;
|
||||
match last_lsn.cmp(&client.pg_current_wal_flush_lsn()?) {
|
||||
Ordering::Less => bail!("Some records were flushed after the generated WAL"),
|
||||
Ordering::Equal => {}
|
||||
Ordering::Greater => bail!("Reported LSN is greater than flush_lsn"),
|
||||
}
|
||||
Ok(last_lsn)
|
||||
}
|
||||
|
||||
pub fn generate_simple(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
|
||||
generate_internal(client, |client, _| {
|
||||
client.execute("CREATE table t(x int)", &[])?;
|
||||
Ok(None)
|
||||
})
|
||||
}
|
||||
|
||||
fn generate_single_logical_message(
|
||||
client: &mut impl postgres::GenericClient,
|
||||
transactional: bool,
|
||||
) -> Result<PgLsn> {
|
||||
generate_internal(client, |client, initial_lsn| {
|
||||
ensure!(
|
||||
initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
|
||||
"Initial LSN is too far in the future"
|
||||
);
|
||||
|
||||
let message_lsn: PgLsn = client
|
||||
.query_one(
|
||||
"select pg_logical_emit_message($1, 'big-16mb-msg', \
|
||||
concat(repeat('abcd', 16 * 256 * 1024), 'end')) as message_lsn",
|
||||
&[&transactional],
|
||||
)?
|
||||
.get("message_lsn");
|
||||
ensure!(
|
||||
message_lsn > PgLsn::from(0x0200_0000 + 4 * 8192),
|
||||
"Logical message did not cross the segment boundary"
|
||||
);
|
||||
ensure!(
|
||||
message_lsn < PgLsn::from(0x0400_0000),
|
||||
"Logical message crossed two segments"
|
||||
);
|
||||
|
||||
if transactional {
|
||||
// Transactional logical messages are part of a transaction, so the one above is
|
||||
// followed by a small COMMIT record.
|
||||
|
||||
let after_message_lsn = client.pg_current_wal_insert_lsn()?;
|
||||
ensure!(
|
||||
message_lsn < after_message_lsn,
|
||||
"No record found after the emitted message"
|
||||
);
|
||||
Ok(Some(after_message_lsn))
|
||||
} else {
|
||||
Ok(Some(message_lsn))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn generate_wal_record_crossing_segment_followed_by_small_one(
|
||||
client: &mut impl postgres::GenericClient,
|
||||
) -> Result<PgLsn> {
|
||||
generate_single_logical_message(client, true)
|
||||
}
|
||||
|
||||
pub fn generate_last_wal_record_crossing_segment<C: postgres::GenericClient>(
|
||||
client: &mut C,
|
||||
) -> Result<PgLsn> {
|
||||
generate_single_logical_message(client, false)
|
||||
}
|
||||
@@ -5,14 +5,17 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
|
||||
tokio-util = { version = "0.7", features = ["io"] }
|
||||
tracing = "0.1.27"
|
||||
async-trait = "0.1"
|
||||
metrics = { version = "0.1", path = "../metrics" }
|
||||
once_cell = "1.8.0"
|
||||
rusoto_core = "0.48"
|
||||
rusoto_s3 = "0.48"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
async-trait = "0.1"
|
||||
tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
|
||||
tokio-util = { version = "0.7", features = ["io"] }
|
||||
toml_edit = { version = "0.13", features = ["easy"] }
|
||||
tracing = "0.1.27"
|
||||
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
|
||||
@@ -16,8 +16,10 @@ use std::{
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::{bail, Context};
|
||||
|
||||
use tokio::io;
|
||||
use toml_edit::Item;
|
||||
use tracing::info;
|
||||
|
||||
pub use self::{
|
||||
@@ -87,7 +89,8 @@ pub trait RemoteStorage: Send + Sync {
|
||||
async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()>;
|
||||
}
|
||||
|
||||
/// TODO kb
|
||||
/// Every storage, currently supported.
|
||||
/// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
|
||||
pub enum GenericRemoteStorage {
|
||||
Local(LocalFs),
|
||||
S3(S3Bucket),
|
||||
@@ -202,6 +205,90 @@ pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str)
|
||||
.with_extension(new_extension.as_ref())
|
||||
}
|
||||
|
||||
impl RemoteStorageConfig {
|
||||
pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
|
||||
let local_path = toml.get("local_path");
|
||||
let bucket_name = toml.get("bucket_name");
|
||||
let bucket_region = toml.get("bucket_region");
|
||||
|
||||
let max_concurrent_syncs = NonZeroUsize::new(
|
||||
parse_optional_integer("max_concurrent_syncs", toml)?
|
||||
.unwrap_or(DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS),
|
||||
)
|
||||
.context("Failed to parse 'max_concurrent_syncs' as a positive integer")?;
|
||||
|
||||
let max_sync_errors = NonZeroU32::new(
|
||||
parse_optional_integer("max_sync_errors", toml)?
|
||||
.unwrap_or(DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS),
|
||||
)
|
||||
.context("Failed to parse 'max_sync_errors' as a positive integer")?;
|
||||
|
||||
let concurrency_limit = NonZeroUsize::new(
|
||||
parse_optional_integer("concurrency_limit", toml)?
|
||||
.unwrap_or(DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT),
|
||||
)
|
||||
.context("Failed to parse 'concurrency_limit' as a positive integer")?;
|
||||
|
||||
let storage = match (local_path, bucket_name, bucket_region) {
|
||||
(None, None, None) => bail!("no 'local_path' nor 'bucket_name' option"),
|
||||
(_, Some(_), None) => {
|
||||
bail!("'bucket_region' option is mandatory if 'bucket_name' is given ")
|
||||
}
|
||||
(_, None, Some(_)) => {
|
||||
bail!("'bucket_name' option is mandatory if 'bucket_region' is given ")
|
||||
}
|
||||
(None, Some(bucket_name), Some(bucket_region)) => RemoteStorageKind::AwsS3(S3Config {
|
||||
bucket_name: parse_toml_string("bucket_name", bucket_name)?,
|
||||
bucket_region: parse_toml_string("bucket_region", bucket_region)?,
|
||||
prefix_in_bucket: toml
|
||||
.get("prefix_in_bucket")
|
||||
.map(|prefix_in_bucket| parse_toml_string("prefix_in_bucket", prefix_in_bucket))
|
||||
.transpose()?,
|
||||
endpoint: toml
|
||||
.get("endpoint")
|
||||
.map(|endpoint| parse_toml_string("endpoint", endpoint))
|
||||
.transpose()?,
|
||||
concurrency_limit,
|
||||
}),
|
||||
(Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
|
||||
parse_toml_string("local_path", local_path)?,
|
||||
)),
|
||||
(Some(_), Some(_), _) => bail!("local_path and bucket_name are mutually exclusive"),
|
||||
};
|
||||
|
||||
Ok(RemoteStorageConfig {
|
||||
max_concurrent_syncs,
|
||||
max_sync_errors,
|
||||
storage,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions to parse a toml Item
|
||||
fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
|
||||
where
|
||||
I: TryFrom<i64, Error = E>,
|
||||
E: std::error::Error + Send + Sync + 'static,
|
||||
{
|
||||
let toml_integer = match item.get(name) {
|
||||
Some(item) => item
|
||||
.as_integer()
|
||||
.with_context(|| format!("configure option {name} is not an integer"))?,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
I::try_from(toml_integer)
|
||||
.map(Some)
|
||||
.with_context(|| format!("configure option {name} is too large"))
|
||||
}
|
||||
|
||||
fn parse_toml_string(name: &str, item: &Item) -> anyhow::Result<String> {
|
||||
let s = item
|
||||
.as_str()
|
||||
.with_context(|| format!("configure option {name} is not a string"))?;
|
||||
Ok(s.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -23,6 +23,71 @@ use crate::{strip_path_prefix, RemoteStorage, S3Config};
|
||||
|
||||
use super::StorageMetadata;
|
||||
|
||||
pub(super) mod metrics {
|
||||
use metrics::{register_int_counter_vec, IntCounterVec};
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
static S3_REQUESTS_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"remote_storage_s3_requests_count",
|
||||
"Number of s3 requests of particular type",
|
||||
&["request_type"],
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static S3_REQUESTS_FAIL_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"remote_storage_s3_failures_count",
|
||||
"Number of failed s3 requests of particular type",
|
||||
&["request_type"],
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn inc_get_object() {
|
||||
S3_REQUESTS_COUNT.with_label_values(&["get_object"]).inc();
|
||||
}
|
||||
|
||||
pub fn inc_get_object_fail() {
|
||||
S3_REQUESTS_FAIL_COUNT
|
||||
.with_label_values(&["get_object"])
|
||||
.inc();
|
||||
}
|
||||
|
||||
pub fn inc_put_object() {
|
||||
S3_REQUESTS_COUNT.with_label_values(&["put_object"]).inc();
|
||||
}
|
||||
|
||||
pub fn inc_put_object_fail() {
|
||||
S3_REQUESTS_FAIL_COUNT
|
||||
.with_label_values(&["put_object"])
|
||||
.inc();
|
||||
}
|
||||
|
||||
pub fn inc_delete_object() {
|
||||
S3_REQUESTS_COUNT
|
||||
.with_label_values(&["delete_object"])
|
||||
.inc();
|
||||
}
|
||||
|
||||
pub fn inc_delete_object_fail() {
|
||||
S3_REQUESTS_FAIL_COUNT
|
||||
.with_label_values(&["delete_object"])
|
||||
.inc();
|
||||
}
|
||||
|
||||
pub fn inc_list_objects() {
|
||||
S3_REQUESTS_COUNT.with_label_values(&["list_objects"]).inc();
|
||||
}
|
||||
|
||||
pub fn inc_list_objects_fail() {
|
||||
S3_REQUESTS_FAIL_COUNT
|
||||
.with_label_values(&["list_objects"])
|
||||
.inc();
|
||||
}
|
||||
}
|
||||
|
||||
const S3_PREFIX_SEPARATOR: char = '/';
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
||||
@@ -152,6 +217,9 @@ impl RemoteStorage for S3Bucket {
|
||||
.acquire()
|
||||
.await
|
||||
.context("Concurrency limiter semaphore got closed during S3 list")?;
|
||||
|
||||
metrics::inc_list_objects();
|
||||
|
||||
let fetch_response = self
|
||||
.client
|
||||
.list_objects_v2(ListObjectsV2Request {
|
||||
@@ -160,7 +228,11 @@ impl RemoteStorage for S3Bucket {
|
||||
continuation_token,
|
||||
..ListObjectsV2Request::default()
|
||||
})
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
metrics::inc_list_objects_fail();
|
||||
e
|
||||
})?;
|
||||
document_keys.extend(
|
||||
fetch_response
|
||||
.contents
|
||||
@@ -190,6 +262,8 @@ impl RemoteStorage for S3Bucket {
|
||||
.acquire()
|
||||
.await
|
||||
.context("Concurrency limiter semaphore got closed during S3 upload")?;
|
||||
|
||||
metrics::inc_put_object();
|
||||
self.client
|
||||
.put_object(PutObjectRequest {
|
||||
body: Some(StreamingBody::new_with_size(
|
||||
@@ -201,7 +275,11 @@ impl RemoteStorage for S3Bucket {
|
||||
metadata: metadata.map(|m| m.0),
|
||||
..PutObjectRequest::default()
|
||||
})
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
metrics::inc_put_object_fail();
|
||||
e
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -215,6 +293,9 @@ impl RemoteStorage for S3Bucket {
|
||||
.acquire()
|
||||
.await
|
||||
.context("Concurrency limiter semaphore got closed during S3 download")?;
|
||||
|
||||
metrics::inc_get_object();
|
||||
|
||||
let object_output = self
|
||||
.client
|
||||
.get_object(GetObjectRequest {
|
||||
@@ -222,7 +303,11 @@ impl RemoteStorage for S3Bucket {
|
||||
key: from.key().to_owned(),
|
||||
..GetObjectRequest::default()
|
||||
})
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
metrics::inc_get_object_fail();
|
||||
e
|
||||
})?;
|
||||
|
||||
if let Some(body) = object_output.body {
|
||||
let mut from = io::BufReader::new(body.into_async_read());
|
||||
@@ -251,6 +336,9 @@ impl RemoteStorage for S3Bucket {
|
||||
.acquire()
|
||||
.await
|
||||
.context("Concurrency limiter semaphore got closed during S3 range download")?;
|
||||
|
||||
metrics::inc_get_object();
|
||||
|
||||
let object_output = self
|
||||
.client
|
||||
.get_object(GetObjectRequest {
|
||||
@@ -259,7 +347,11 @@ impl RemoteStorage for S3Bucket {
|
||||
range,
|
||||
..GetObjectRequest::default()
|
||||
})
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
metrics::inc_get_object_fail();
|
||||
e
|
||||
})?;
|
||||
|
||||
if let Some(body) = object_output.body {
|
||||
let mut from = io::BufReader::new(body.into_async_read());
|
||||
@@ -275,13 +367,20 @@ impl RemoteStorage for S3Bucket {
|
||||
.acquire()
|
||||
.await
|
||||
.context("Concurrency limiter semaphore got closed during S3 delete")?;
|
||||
|
||||
metrics::inc_delete_object();
|
||||
|
||||
self.client
|
||||
.delete_object(DeleteObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: path.key().to_owned(),
|
||||
..DeleteObjectRequest::default()
|
||||
})
|
||||
.await?;
|
||||
.await
|
||||
.map_err(|e| {
|
||||
metrics::inc_delete_object_fail();
|
||||
e
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-env-changed=GIT_VERSION");
|
||||
}
|
||||
@@ -5,7 +5,7 @@ DATA_DIR=$3
|
||||
PORT=$4
|
||||
SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
|
||||
rm -fr $DATA_DIR
|
||||
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
|
||||
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
|
||||
echo port=$PORT >> $DATA_DIR/postgresql.conf
|
||||
REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
|
||||
declare -i WAL_SIZE=$REDO_POS+114
|
||||
|
||||
@@ -5,7 +5,7 @@ PORT=$4
|
||||
SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
|
||||
rm -fr $DATA_DIR /tmp/pg_wals
|
||||
mkdir /tmp/pg_wals
|
||||
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
|
||||
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
|
||||
echo port=$PORT >> $DATA_DIR/postgresql.conf
|
||||
REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
|
||||
declare -i WAL_SIZE=$REDO_POS+114
|
||||
|
||||
@@ -71,7 +71,7 @@ impl From<bincode::Error> for SerializeError {
|
||||
/// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01)
|
||||
///
|
||||
/// Does not allow trailing bytes in deserialization. If this is desired, you
|
||||
/// may set [`Options::allow_trailing_bytes`] to explicitly accomodate this.
|
||||
/// may set [`Options::allow_trailing_bytes`] to explicitly accommodate this.
|
||||
pub fn be_coder() -> impl Options {
|
||||
bincode::DefaultOptions::new()
|
||||
.with_big_endian()
|
||||
@@ -85,7 +85,7 @@ pub fn be_coder() -> impl Options {
|
||||
/// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01)
|
||||
///
|
||||
/// Does not allow trailing bytes in deserialization. If this is desired, you
|
||||
/// may set [`Options::allow_trailing_bytes`] to explicitly accomodate this.
|
||||
/// may set [`Options::allow_trailing_bytes`] to explicitly accommodate this.
|
||||
pub fn le_coder() -> impl Options {
|
||||
bincode::DefaultOptions::new()
|
||||
.with_little_endian()
|
||||
|
||||
@@ -5,7 +5,7 @@ use anyhow::anyhow;
|
||||
use hyper::header::AUTHORIZATION;
|
||||
use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
|
||||
use lazy_static::lazy_static;
|
||||
use metrics::{new_common_metric_name, register_int_counter, Encoder, IntCounter, TextEncoder};
|
||||
use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
|
||||
use routerify::ext::RequestExt;
|
||||
use routerify::RequestInfo;
|
||||
use routerify::{Middleware, Router, RouterBuilder, RouterService};
|
||||
@@ -18,7 +18,7 @@ use super::error::ApiError;
|
||||
|
||||
lazy_static! {
|
||||
static ref SERVE_METRICS_COUNT: IntCounter = register_int_counter!(
|
||||
new_common_metric_name("serve_metrics_count"),
|
||||
"libmetrics_metric_handler_requests_total",
|
||||
"Number of metric requests made"
|
||||
)
|
||||
.expect("failed to define a metric");
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user