mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-18 13:40:37 +00:00
Compare commits
114 Commits
sk-migrate
...
bojan-repl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d67fb28a59 | ||
|
|
faa15e32ee | ||
|
|
8dbd6313e5 | ||
|
|
2f83f793bc | ||
|
|
2f9b17b9e5 | ||
|
|
e7cba0b607 | ||
|
|
ff7e9a86c6 | ||
|
|
9ede38b6c4 | ||
|
|
62449d6068 | ||
|
|
baa59512b8 | ||
|
|
87a6c4d051 | ||
|
|
801b749e1d | ||
|
|
5cb501c2b3 | ||
|
|
ad25736f3a | ||
|
|
9a396e1feb | ||
|
|
0323bb5870 | ||
|
|
af0195b604 | ||
|
|
9df8915b03 | ||
|
|
4b1bd32e4a | ||
|
|
68ba6a58a0 | ||
|
|
8f479a712f | ||
|
|
2477d2f9e2 | ||
|
|
992874c916 | ||
|
|
3128e8c75c | ||
|
|
f3f12db2cb | ||
|
|
038ea4c128 | ||
|
|
7e1db8c8a1 | ||
|
|
aa933d3961 | ||
|
|
67b4e38092 | ||
|
|
05f8e6a050 | ||
|
|
76388abeb6 | ||
|
|
2911eb084a | ||
|
|
6cca57f95a | ||
|
|
4a46b01caf | ||
|
|
5c5c3c64f3 | ||
|
|
29539b0561 | ||
|
|
695b5f9d88 | ||
|
|
66694e736a | ||
|
|
091cefaa92 | ||
|
|
aeb4f81c3b | ||
|
|
6391862d8a | ||
|
|
b2e35fffa6 | ||
|
|
8b9d523f3c | ||
|
|
3fd234da07 | ||
|
|
778744d35c | ||
|
|
eabf6f89e4 | ||
|
|
fec050ce97 | ||
|
|
d060a97c54 | ||
|
|
78a6cb247f | ||
|
|
8f6a161271 | ||
|
|
56f6269a8e | ||
|
|
1fb3d08185 | ||
|
|
867aede715 | ||
|
|
d3f356e7a8 | ||
|
|
5f83c9290b | ||
|
|
a4700c9bbe | ||
|
|
dafdf9b952 | ||
|
|
263d60f12d | ||
|
|
abcd7a4b1f | ||
|
|
81cad6277a | ||
|
|
629688fd6c | ||
|
|
9d3779c124 | ||
|
|
334a1d6b5d | ||
|
|
e41ad3be0f | ||
|
|
e113c6fa8d | ||
|
|
cbdfd8c719 | ||
|
|
86bf4301b7 | ||
|
|
9eaa21317c | ||
|
|
e660e12f79 | ||
|
|
ac52f4f2d6 | ||
|
|
5e95338ee9 | ||
|
|
170badd626 | ||
|
|
91fb21225a | ||
|
|
3e6087a12f | ||
|
|
44bfc529f6 | ||
|
|
ef72eb84cf | ||
|
|
a1e34772e5 | ||
|
|
389bd1faeb | ||
|
|
c15aa04714 | ||
|
|
52e0816fa5 | ||
|
|
81417788c8 | ||
|
|
81879f8137 | ||
|
|
5b29774532 | ||
|
|
0ca2bd929b | ||
|
|
9b7dcc2bae | ||
|
|
3136a0754a | ||
|
|
787f0d33f0 | ||
|
|
ed5f9acca9 | ||
|
|
4bc338babc | ||
|
|
3ab090b43a | ||
|
|
7126979950 | ||
|
|
9946cd1125 | ||
|
|
ab20f2c491 | ||
|
|
c9d897f9b6 | ||
|
|
e97f94cc30 | ||
|
|
2cb39a1624 | ||
|
|
93e0ac2b7a | ||
|
|
d5ae9db997 | ||
|
|
9e4de6bed0 | ||
|
|
4a8c663452 | ||
|
|
a009fe912a | ||
|
|
19954dfd8a | ||
|
|
570db6f168 | ||
|
|
cdf04b6a9f | ||
|
|
a0781f229c | ||
|
|
1d36c5a39e | ||
|
|
49da76237b | ||
|
|
1fd08107ca | ||
|
|
58d5136a61 | ||
|
|
87020f8126 | ||
|
|
20414c4b16 | ||
|
|
9b7a8e67a4 | ||
|
|
4af87f3d60 | ||
|
|
0fbe657b2f |
2
.circleci/ansible/.gitignore
vendored
2
.circleci/ansible/.gitignore
vendored
@@ -1,2 +1,4 @@
|
||||
zenith_install.tar.gz
|
||||
.zenith_current_version
|
||||
neon_install.tar.gz
|
||||
.neon_current_version
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
- name: Upload Zenith binaries
|
||||
- name: Upload Neon binaries
|
||||
hosts: storage
|
||||
gather_facts: False
|
||||
remote_user: admin
|
||||
|
||||
tasks:
|
||||
|
||||
- name: get latest version of Zenith binaries
|
||||
- name: get latest version of Neon binaries
|
||||
register: current_version_file
|
||||
set_fact:
|
||||
current_version: "{{ lookup('file', '.zenith_current_version') | trim }}"
|
||||
current_version: "{{ lookup('file', '.neon_current_version') | trim }}"
|
||||
tags:
|
||||
- pageserver
|
||||
- safekeeper
|
||||
@@ -19,11 +19,11 @@
|
||||
- pageserver
|
||||
- safekeeper
|
||||
|
||||
- name: upload and extract Zenith binaries to /usr/local
|
||||
- name: upload and extract Neon binaries to /usr/local
|
||||
ansible.builtin.unarchive:
|
||||
owner: root
|
||||
group: root
|
||||
src: zenith_install.tar.gz
|
||||
src: neon_install.tar.gz
|
||||
dest: /usr/local
|
||||
become: true
|
||||
tags:
|
||||
@@ -63,21 +63,18 @@
|
||||
tags:
|
||||
- pageserver
|
||||
|
||||
# It seems that currently S3 integration does not play well
|
||||
# even with fresh pageserver without a burden of old data.
|
||||
# TODO: turn this back on once the issue is solved.
|
||||
# - name: update remote storage (s3) config
|
||||
# lineinfile:
|
||||
# path: /storage/pageserver/data/pageserver.toml
|
||||
# line: "{{ item }}"
|
||||
# loop:
|
||||
# - "[remote_storage]"
|
||||
# - "bucket_name = '{{ bucket_name }}'"
|
||||
# - "bucket_region = '{{ bucket_region }}'"
|
||||
# - "prefix_in_bucket = '{{ inventory_hostname }}'"
|
||||
# become: true
|
||||
# tags:
|
||||
# - pageserver
|
||||
- name: update remote storage (s3) config
|
||||
lineinfile:
|
||||
path: /storage/pageserver/data/pageserver.toml
|
||||
line: "{{ item }}"
|
||||
loop:
|
||||
- "[remote_storage]"
|
||||
- "bucket_name = '{{ bucket_name }}'"
|
||||
- "bucket_region = '{{ bucket_region }}'"
|
||||
- "prefix_in_bucket = '{{ inventory_hostname }}'"
|
||||
become: true
|
||||
tags:
|
||||
- pageserver
|
||||
|
||||
- name: upload systemd service definition
|
||||
ansible.builtin.template:
|
||||
|
||||
@@ -4,10 +4,10 @@ set -e
|
||||
|
||||
RELEASE=${RELEASE:-false}
|
||||
|
||||
# look at docker hub for latest tag fo zenith docker image
|
||||
# look at docker hub for latest tag for neon docker image
|
||||
if [ "${RELEASE}" = "true" ]; then
|
||||
echo "search latest relase tag"
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | tail -1)
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | tail -1)
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "no any docker tags found, exiting..."
|
||||
exit 1
|
||||
@@ -16,7 +16,7 @@ if [ "${RELEASE}" = "true" ]; then
|
||||
fi
|
||||
else
|
||||
echo "search latest dev tag"
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/zenithdb/zenith/tags |jq -r -S '.[].name' | grep -v release | tail -1)
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep -v release | tail -1)
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "no any docker tags found, exiting..."
|
||||
exit 1
|
||||
@@ -28,25 +28,25 @@ fi
|
||||
echo "found ${VERSION}"
|
||||
|
||||
# do initial cleanup
|
||||
rm -rf zenith_install postgres_install.tar.gz zenith_install.tar.gz .zenith_current_version
|
||||
mkdir zenith_install
|
||||
rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
|
||||
mkdir neon_install
|
||||
|
||||
# retrive binaries from docker image
|
||||
echo "getting binaries from docker image"
|
||||
docker pull --quiet zenithdb/zenith:${TAG}
|
||||
ID=$(docker create zenithdb/zenith:${TAG})
|
||||
docker pull --quiet neondatabase/neon:${TAG}
|
||||
ID=$(docker create neondatabase/neon:${TAG})
|
||||
docker cp ${ID}:/data/postgres_install.tar.gz .
|
||||
tar -xzf postgres_install.tar.gz -C zenith_install
|
||||
docker cp ${ID}:/usr/local/bin/pageserver zenith_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/safekeeper zenith_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/proxy zenith_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/postgres zenith_install/bin/
|
||||
tar -xzf postgres_install.tar.gz -C neon_install
|
||||
docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
|
||||
docker cp ${ID}:/usr/local/bin/postgres neon_install/bin/
|
||||
docker rm -vf ${ID}
|
||||
|
||||
# store version to file (for ansible playbooks) and create binaries tarball
|
||||
echo ${VERSION} > zenith_install/.zenith_current_version
|
||||
echo ${VERSION} > .zenith_current_version
|
||||
tar -czf zenith_install.tar.gz -C zenith_install .
|
||||
echo ${VERSION} > neon_install/.neon_current_version
|
||||
echo ${VERSION} > .neon_current_version
|
||||
tar -czf neon_install.tar.gz -C neon_install .
|
||||
|
||||
# do final cleaup
|
||||
rm -rf zenith_install postgres_install.tar.gz
|
||||
rm -rf neon_install postgres_install.tar.gz
|
||||
|
||||
@@ -14,3 +14,4 @@ safekeepers
|
||||
console_mgmt_base_url = http://console-release.local
|
||||
bucket_name = zenith-storage-oregon
|
||||
bucket_region = us-west-2
|
||||
etcd_endpoints = etcd-release.local:2379
|
||||
|
||||
@@ -5,7 +5,6 @@ zenith-us-stage-ps-2 console_region_id=27
|
||||
[safekeepers]
|
||||
zenith-us-stage-sk-1 console_region_id=27
|
||||
zenith-us-stage-sk-2 console_region_id=27
|
||||
zenith-us-stage-sk-3 console_region_id=27
|
||||
zenith-us-stage-sk-4 console_region_id=27
|
||||
|
||||
[storage:children]
|
||||
@@ -16,3 +15,4 @@ safekeepers
|
||||
console_mgmt_base_url = http://console-staging.local
|
||||
bucket_name = zenith-staging-storage-us-east-1
|
||||
bucket_region = us-east-1
|
||||
etcd_endpoints = etcd-staging.local:2379
|
||||
|
||||
@@ -6,7 +6,7 @@ After=network.target auditd.service
|
||||
Type=simple
|
||||
User=safekeeper
|
||||
Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }}
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
KillSignal=SIGINT
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
version: 2.1
|
||||
|
||||
executors:
|
||||
zenith-xlarge-executor:
|
||||
neon-xlarge-executor:
|
||||
resource_class: xlarge
|
||||
docker:
|
||||
# NB: when changed, do not forget to update rust image tag in all Dockerfiles
|
||||
- image: zimg/rust:1.56
|
||||
zenith-executor:
|
||||
- image: zimg/rust:1.58
|
||||
neon-executor:
|
||||
docker:
|
||||
- image: zimg/rust:1.56
|
||||
- image: zimg/rust:1.58
|
||||
|
||||
jobs:
|
||||
check-codestyle-rust:
|
||||
executor: zenith-xlarge-executor
|
||||
executor: neon-xlarge-executor
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
@@ -22,7 +22,7 @@ jobs:
|
||||
|
||||
# A job to build postgres
|
||||
build-postgres:
|
||||
executor: zenith-xlarge-executor
|
||||
executor: neon-xlarge-executor
|
||||
parameters:
|
||||
build_type:
|
||||
type: enum
|
||||
@@ -67,9 +67,9 @@ jobs:
|
||||
paths:
|
||||
- tmp_install
|
||||
|
||||
# A job to build zenith rust code
|
||||
build-zenith:
|
||||
executor: zenith-xlarge-executor
|
||||
# A job to build Neon rust code
|
||||
build-neon:
|
||||
executor: neon-xlarge-executor
|
||||
parameters:
|
||||
build_type:
|
||||
type: enum
|
||||
@@ -113,7 +113,7 @@ jobs:
|
||||
CARGO_FLAGS=
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=()
|
||||
CARGO_FLAGS=--release
|
||||
CARGO_FLAGS="--release --features profiling"
|
||||
fi
|
||||
|
||||
export CARGO_INCREMENTAL=0
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
export RUSTC_WRAPPER=cachepot
|
||||
export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
|
||||
export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
|
||||
"${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --bins --tests
|
||||
"${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
|
||||
cachepot -s
|
||||
|
||||
- save_cache:
|
||||
@@ -132,20 +132,6 @@ jobs:
|
||||
- ~/.cargo/git
|
||||
- target
|
||||
|
||||
# Run style checks
|
||||
# has to run separately from cargo fmt section
|
||||
# since needs to run with dependencies
|
||||
- run:
|
||||
name: cargo clippy
|
||||
command: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=()
|
||||
fi
|
||||
|
||||
"${cov_prefix[@]}" ./run_clippy.sh
|
||||
|
||||
# Run rust unit tests
|
||||
- run:
|
||||
name: cargo test
|
||||
@@ -223,7 +209,7 @@ jobs:
|
||||
- "*"
|
||||
|
||||
check-codestyle-python:
|
||||
executor: zenith-executor
|
||||
executor: neon-executor
|
||||
steps:
|
||||
- checkout
|
||||
- restore_cache:
|
||||
@@ -246,7 +232,7 @@ jobs:
|
||||
command: poetry run mypy .
|
||||
|
||||
run-pytest:
|
||||
executor: zenith-executor
|
||||
executor: neon-executor
|
||||
parameters:
|
||||
# pytest args to specify the tests to run.
|
||||
#
|
||||
@@ -369,7 +355,7 @@ jobs:
|
||||
when: always
|
||||
command: |
|
||||
du -sh /tmp/test_output/*
|
||||
find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" -delete
|
||||
find /tmp/test_output -type f ! -name "pg.log" ! -name "pageserver.log" ! -name "safekeeper.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" -delete
|
||||
du -sh /tmp/test_output/*
|
||||
- store_artifacts:
|
||||
path: /tmp/test_output
|
||||
@@ -390,7 +376,7 @@ jobs:
|
||||
- "*"
|
||||
|
||||
coverage-report:
|
||||
executor: zenith-xlarge-executor
|
||||
executor: neon-xlarge-executor
|
||||
steps:
|
||||
- attach_workspace:
|
||||
at: /tmp/zenith
|
||||
@@ -405,7 +391,7 @@ jobs:
|
||||
- run:
|
||||
name: Build coverage report
|
||||
command: |
|
||||
COMMIT_URL=https://github.com/zenithdb/zenith/commit/$CIRCLE_SHA1
|
||||
COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
|
||||
|
||||
scripts/coverage \
|
||||
--dir=/tmp/zenith/coverage report \
|
||||
@@ -416,11 +402,11 @@ jobs:
|
||||
name: Upload coverage report
|
||||
command: |
|
||||
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
|
||||
REPORT_URL=https://zenithdb.github.io/zenith-coverage-data/$CIRCLE_SHA1
|
||||
COMMIT_URL=https://github.com/zenithdb/zenith/commit/$CIRCLE_SHA1
|
||||
REPORT_URL=https://neondatabase.github.io/zenith-coverage-data/$CIRCLE_SHA1
|
||||
COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
|
||||
|
||||
scripts/git-upload \
|
||||
--repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/zenithdb/zenith-coverage-data.git \
|
||||
--repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
|
||||
--message="Add code coverage for $COMMIT_URL" \
|
||||
copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE
|
||||
|
||||
@@ -437,7 +423,7 @@ jobs:
|
||||
\"target_url\": \"$REPORT_URL\"
|
||||
}"
|
||||
|
||||
# Build zenithdb/zenith:latest image and push it to Docker hub
|
||||
# Build neondatabase/neon:latest image and push it to Docker hub
|
||||
docker-image:
|
||||
docker:
|
||||
- image: cimg/base:2021.04
|
||||
@@ -451,18 +437,18 @@ jobs:
|
||||
- run:
|
||||
name: Build and push Docker image
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||
docker build \
|
||||
--pull \
|
||||
--build-arg GIT_VERSION=${CIRCLE_SHA1} \
|
||||
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||
--tag zenithdb/zenith:${DOCKER_TAG} --tag zenithdb/zenith:latest .
|
||||
docker push zenithdb/zenith:${DOCKER_TAG}
|
||||
docker push zenithdb/zenith:latest
|
||||
--tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:latest .
|
||||
docker push neondatabase/neon:${DOCKER_TAG}
|
||||
docker push neondatabase/neon:latest
|
||||
|
||||
# Build zenithdb/compute-node:latest image and push it to Docker hub
|
||||
# Build neondatabase/compute-node:latest image and push it to Docker hub
|
||||
docker-image-compute:
|
||||
docker:
|
||||
- image: cimg/base:2021.04
|
||||
@@ -470,31 +456,31 @@ jobs:
|
||||
- checkout
|
||||
- setup_remote_docker:
|
||||
docker_layer_caching: true
|
||||
# Build zenithdb/compute-tools:latest image and push it to Docker hub
|
||||
# Build neondatabase/compute-tools:latest image and push it to Docker hub
|
||||
# TODO: this should probably also use versioned tag, not just :latest.
|
||||
# XXX: but should it? We build and use it only locally now.
|
||||
- run:
|
||||
name: Build and push compute-tools Docker image
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||
docker build \
|
||||
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||
--tag zenithdb/compute-tools:latest -f Dockerfile.compute-tools .
|
||||
docker push zenithdb/compute-tools:latest
|
||||
--tag neondatabase/compute-tools:latest -f Dockerfile.compute-tools .
|
||||
docker push neondatabase/compute-tools:latest
|
||||
- run:
|
||||
name: Init postgres submodule
|
||||
command: git submodule update --init --depth 1
|
||||
- run:
|
||||
name: Build and push compute-node Docker image
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||
docker build --tag zenithdb/compute-node:${DOCKER_TAG} --tag zenithdb/compute-node:latest vendor/postgres
|
||||
docker push zenithdb/compute-node:${DOCKER_TAG}
|
||||
docker push zenithdb/compute-node:latest
|
||||
docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:latest vendor/postgres
|
||||
docker push neondatabase/compute-node:${DOCKER_TAG}
|
||||
docker push neondatabase/compute-node:latest
|
||||
|
||||
# Build production zenithdb/zenith:release image and push it to Docker hub
|
||||
# Build production neondatabase/neon:release image and push it to Docker hub
|
||||
docker-image-release:
|
||||
docker:
|
||||
- image: cimg/base:2021.04
|
||||
@@ -508,18 +494,18 @@ jobs:
|
||||
- run:
|
||||
name: Build and push Docker image
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||
docker build \
|
||||
--pull \
|
||||
--build-arg GIT_VERSION=${CIRCLE_SHA1} \
|
||||
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||
--tag zenithdb/zenith:${DOCKER_TAG} --tag zenithdb/zenith:release .
|
||||
docker push zenithdb/zenith:${DOCKER_TAG}
|
||||
docker push zenithdb/zenith:release
|
||||
--tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:release .
|
||||
docker push neondatabase/neon:${DOCKER_TAG}
|
||||
docker push neondatabase/neon:release
|
||||
|
||||
# Build production zenithdb/compute-node:release image and push it to Docker hub
|
||||
# Build production neondatabase/compute-node:release image and push it to Docker hub
|
||||
docker-image-compute-release:
|
||||
docker:
|
||||
- image: cimg/base:2021.04
|
||||
@@ -527,29 +513,29 @@ jobs:
|
||||
- checkout
|
||||
- setup_remote_docker:
|
||||
docker_layer_caching: true
|
||||
# Build zenithdb/compute-tools:release image and push it to Docker hub
|
||||
# Build neondatabase/compute-tools:release image and push it to Docker hub
|
||||
# TODO: this should probably also use versioned tag, not just :latest.
|
||||
# XXX: but should it? We build and use it only locally now.
|
||||
- run:
|
||||
name: Build and push compute-tools Docker image
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||
docker build \
|
||||
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||
--tag zenithdb/compute-tools:release -f Dockerfile.compute-tools .
|
||||
docker push zenithdb/compute-tools:release
|
||||
--tag neondatabase/compute-tools:release -f Dockerfile.compute-tools .
|
||||
docker push neondatabase/compute-tools:release
|
||||
- run:
|
||||
name: Init postgres submodule
|
||||
command: git submodule update --init --depth 1
|
||||
- run:
|
||||
name: Build and push compute-node Docker image
|
||||
command: |
|
||||
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
|
||||
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||
docker build --tag zenithdb/compute-node:${DOCKER_TAG} --tag zenithdb/compute-node:release vendor/postgres
|
||||
docker push zenithdb/compute-node:${DOCKER_TAG}
|
||||
docker push zenithdb/compute-node:release
|
||||
docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:release vendor/postgres
|
||||
docker push neondatabase/compute-node:${DOCKER_TAG}
|
||||
docker push neondatabase/compute-node:release
|
||||
|
||||
deploy-staging:
|
||||
docker:
|
||||
@@ -575,7 +561,7 @@ jobs:
|
||||
rm -f ssh-key ssh-key-cert.pub
|
||||
|
||||
ansible-playbook deploy.yaml -i staging.hosts
|
||||
rm -f zenith_install.tar.gz .zenith_current_version
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-staging-proxy:
|
||||
docker:
|
||||
@@ -593,13 +579,13 @@ jobs:
|
||||
name: Setup helm v3
|
||||
command: |
|
||||
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
helm repo add zenithdb https://zenithdb.github.io/helm-charts
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
- run:
|
||||
name: Re-deploy proxy
|
||||
command: |
|
||||
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||
helm upgrade zenith-proxy zenithdb/zenith-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
|
||||
helm upgrade zenith-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||
|
||||
deploy-release:
|
||||
docker:
|
||||
@@ -625,7 +611,7 @@ jobs:
|
||||
rm -f ssh-key ssh-key-cert.pub
|
||||
|
||||
ansible-playbook deploy.yaml -i production.hosts
|
||||
rm -f zenith_install.tar.gz .zenith_current_version
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-release-proxy:
|
||||
docker:
|
||||
@@ -643,7 +629,7 @@ jobs:
|
||||
name: Setup helm v3
|
||||
command: |
|
||||
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
helm repo add zenithdb https://zenithdb.github.io/helm-charts
|
||||
helm repo add zenithdb https://neondatabase.github.io/helm-charts
|
||||
- run:
|
||||
name: Re-deploy proxy
|
||||
command: |
|
||||
@@ -672,7 +658,7 @@ jobs:
|
||||
--data \
|
||||
"{
|
||||
\"state\": \"pending\",
|
||||
\"context\": \"zenith-remote-ci\",
|
||||
\"context\": \"neon-cloud-e2e\",
|
||||
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
|
||||
}"
|
||||
- run:
|
||||
@@ -688,7 +674,7 @@ jobs:
|
||||
"{
|
||||
\"ref\": \"main\",
|
||||
\"inputs\": {
|
||||
\"ci_job_name\": \"zenith-remote-ci\",
|
||||
\"ci_job_name\": \"neon-cloud-e2e\",
|
||||
\"commit_hash\": \"$CIRCLE_SHA1\",
|
||||
\"remote_repo\": \"$LOCAL_REPO\"
|
||||
}
|
||||
@@ -704,8 +690,8 @@ workflows:
|
||||
matrix:
|
||||
parameters:
|
||||
build_type: ["debug", "release"]
|
||||
- build-zenith:
|
||||
name: build-zenith-<< matrix.build_type >>
|
||||
- build-neon:
|
||||
name: build-neon-<< matrix.build_type >>
|
||||
matrix:
|
||||
parameters:
|
||||
build_type: ["debug", "release"]
|
||||
@@ -720,7 +706,7 @@ workflows:
|
||||
test_selection: batch_pg_regress
|
||||
needs_postgres_source: true
|
||||
requires:
|
||||
- build-zenith-<< matrix.build_type >>
|
||||
- build-neon-<< matrix.build_type >>
|
||||
- run-pytest:
|
||||
name: other-tests-<< matrix.build_type >>
|
||||
matrix:
|
||||
@@ -728,7 +714,7 @@ workflows:
|
||||
build_type: ["debug", "release"]
|
||||
test_selection: batch_others
|
||||
requires:
|
||||
- build-zenith-<< matrix.build_type >>
|
||||
- build-neon-<< matrix.build_type >>
|
||||
- run-pytest:
|
||||
name: benchmarks
|
||||
context: PERF_TEST_RESULT_CONNSTR
|
||||
@@ -737,7 +723,7 @@ workflows:
|
||||
run_in_parallel: false
|
||||
save_perf_report: true
|
||||
requires:
|
||||
- build-zenith-release
|
||||
- build-neon-release
|
||||
- coverage-report:
|
||||
# Context passes credentials for gh api
|
||||
context: CI_ACCESS_TOKEN
|
||||
@@ -828,11 +814,11 @@ workflows:
|
||||
- remote-ci-trigger:
|
||||
# Context passes credentials for gh api
|
||||
context: CI_ACCESS_TOKEN
|
||||
remote_repo: "zenithdb/console"
|
||||
remote_repo: "neondatabase/cloud"
|
||||
requires:
|
||||
# XXX: Successful build doesn't mean everything is OK, but
|
||||
# the job to be triggered takes so much time to complete (~22 min)
|
||||
# that it's better not to wait for the commented-out steps
|
||||
- build-zenith-debug
|
||||
- build-neon-release
|
||||
# - pg_regress-tests-release
|
||||
# - other-tests-release
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
# Helm chart values for zenith-proxy.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
image:
|
||||
repository: neondatabase/neon
|
||||
|
||||
settings:
|
||||
authEndpoint: "https://console.zenith.tech/authenticate_proxy_request/"
|
||||
uri: "https://console.zenith.tech/psql_session/"
|
||||
authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
|
||||
uri: "https://console.neon.tech/psql_session/"
|
||||
|
||||
# -- Additional labels for zenith-proxy pods
|
||||
podLabels:
|
||||
@@ -25,7 +28,7 @@ exposedService:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: start.zenith.tech
|
||||
external-dns.alpha.kubernetes.io/hostname: start.zenith.tech,connect.neon.tech,pg.neon.tech
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
|
||||
30
.circleci/helm-values/staging.proxy-scram.yaml
Normal file
30
.circleci/helm-values/staging.proxy-scram.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
# Helm chart values for zenith-proxy.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
image:
|
||||
repository: neondatabase/neon
|
||||
|
||||
settings:
|
||||
authBackend: "console"
|
||||
authEndpoint: "http://console-staging.local:9095/management/api/v2"
|
||||
|
||||
# -- Additional labels for zenith-proxy pods
|
||||
podLabels:
|
||||
zenith_service: proxy-scram
|
||||
zenith_env: staging
|
||||
zenith_region: us-east-1
|
||||
zenith_region_slug: virginia
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: *.cloud.stage.neon.tech
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
@@ -1,9 +1,12 @@
|
||||
# Helm chart values for zenith-proxy.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
image:
|
||||
repository: neondatabase/neon
|
||||
|
||||
settings:
|
||||
authEndpoint: "https://console.stage.zenith.tech/authenticate_proxy_request/"
|
||||
uri: "https://console.stage.zenith.tech/psql_session/"
|
||||
authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
|
||||
uri: "https://console.stage.neon.tech/psql_session/"
|
||||
|
||||
# -- Additional labels for zenith-proxy pods
|
||||
podLabels:
|
||||
@@ -17,7 +20,7 @@ exposedService:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: start.stage.zenith.tech
|
||||
external-dns.alpha.kubernetes.io/hostname: connect.stage.neon.tech
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
|
||||
@@ -10,6 +10,8 @@ dep-format-version = "2"
|
||||
# Hakari works much better with the new feature resolver.
|
||||
# For more about the new feature resolver, see:
|
||||
# https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#cargos-new-feature-resolver
|
||||
# Have to keep the resolver still here since hakari requires this field,
|
||||
# despite it's now the default for 2021 edition & cargo.
|
||||
resolver = "2"
|
||||
|
||||
# Add triples corresponding to platforms commonly used by developers here.
|
||||
|
||||
13
.github/workflows/benchmarking.yml
vendored
13
.github/workflows/benchmarking.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
runs-on: [self-hosted, zenith-benchmarker]
|
||||
|
||||
env:
|
||||
PG_BIN: "/usr/pgsql-13/bin"
|
||||
POSTGRES_DISTRIB_DIR: "/usr/pgsql-13"
|
||||
|
||||
steps:
|
||||
- name: Checkout zenith repo
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
echo Poetry
|
||||
poetry --version
|
||||
echo Pgbench
|
||||
$PG_BIN/pgbench --version
|
||||
$POSTGRES_DISTRIB_DIR/bin/pgbench --version
|
||||
|
||||
# FIXME cluster setup is skipped due to various changes in console API
|
||||
# for now pre created cluster is used. When API gain some stability
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
|
||||
echo "Starting cluster"
|
||||
# wake up the cluster
|
||||
$PG_BIN/psql $BENCHMARK_CONNSTR -c "SELECT 1"
|
||||
$POSTGRES_DISTRIB_DIR/bin/psql $BENCHMARK_CONNSTR -c "SELECT 1"
|
||||
|
||||
- name: Run benchmark
|
||||
# pgbench is installed system wide from official repo
|
||||
@@ -83,8 +83,11 @@ jobs:
|
||||
# sudo yum install postgresql13-contrib
|
||||
# actual binaries are located in /usr/pgsql-13/bin/
|
||||
env:
|
||||
TEST_PG_BENCH_TRANSACTIONS_MATRIX: "5000,10000,20000"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10,15"
|
||||
# The pgbench test runs two tests of given duration against each scale.
|
||||
# So the total runtime with these parameters is 2 * 2 * 300 = 1200, or 20 minutes.
|
||||
# Plus time needed to initialize the test databases.
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
||||
PLATFORM: "zenith-staging"
|
||||
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
||||
REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
|
||||
|
||||
17
.github/workflows/testing.yml
vendored
17
.github/workflows/testing.yml
vendored
@@ -36,8 +36,7 @@ jobs:
|
||||
|
||||
- name: Install macOs postgres dependencies
|
||||
if: matrix.os == 'macos-latest'
|
||||
run: |
|
||||
brew install flex bison
|
||||
run: brew install flex bison
|
||||
|
||||
- name: Set pg revision for caching
|
||||
id: pg_ver
|
||||
@@ -53,8 +52,7 @@ jobs:
|
||||
|
||||
- name: Build postgres
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make postgres
|
||||
run: make postgres
|
||||
|
||||
- name: Cache cargo deps
|
||||
id: cache_cargo
|
||||
@@ -64,13 +62,10 @@ jobs:
|
||||
~/.cargo/registry
|
||||
~/.cargo/git
|
||||
target
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}
|
||||
|
||||
# Use `env CARGO_INCREMENTAL=0` to mitigate https://github.com/rust-lang/rust/issues/91696 for rustc 1.57.0
|
||||
- name: Run cargo build
|
||||
run: |
|
||||
env CARGO_INCREMENTAL=0 cargo build --workspace --bins --examples --tests
|
||||
- name: Run cargo clippy
|
||||
run: ./run_clippy.sh
|
||||
|
||||
- name: Run cargo test
|
||||
run: |
|
||||
env CARGO_INCREMENTAL=0 cargo test -- --nocapture --test-threads=1
|
||||
run: cargo test --all --all-targets
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -11,3 +11,6 @@ test_output/
|
||||
# Coverage
|
||||
*.profraw
|
||||
*.profdata
|
||||
|
||||
*.key
|
||||
*.crt
|
||||
|
||||
912
Cargo.lock
generated
912
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
13
Cargo.toml
13
Cargo.toml
@@ -3,22 +3,19 @@ members = [
|
||||
"compute_tools",
|
||||
"control_plane",
|
||||
"pageserver",
|
||||
"postgres_ffi",
|
||||
"proxy",
|
||||
"walkeeper",
|
||||
"safekeeper",
|
||||
"workspace_hack",
|
||||
"zenith",
|
||||
"zenith_metrics",
|
||||
"zenith_utils",
|
||||
"libs/*",
|
||||
]
|
||||
resolver = "2"
|
||||
|
||||
[profile.release]
|
||||
# This is useful for profiling and, to some extent, debug.
|
||||
# Besides, debug info should not affect the performance.
|
||||
debug = true
|
||||
|
||||
# This is only needed for proxy's tests
|
||||
# TODO: we should probably fork tokio-postgres-rustls instead
|
||||
# This is only needed for proxy's tests.
|
||||
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
||||
[patch.crates-io]
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
|
||||
15
Dockerfile
15
Dockerfile
@@ -1,7 +1,5 @@
|
||||
# Build Postgres
|
||||
#
|
||||
#FROM zimg/rust:1.56 AS pg-build
|
||||
FROM zenithdb/build:buster-20220309 AS pg-build
|
||||
FROM zimg/rust:1.58 AS pg-build
|
||||
WORKDIR /pg
|
||||
|
||||
USER root
|
||||
@@ -11,27 +9,26 @@ COPY Makefile Makefile
|
||||
|
||||
ENV BUILD_TYPE release
|
||||
RUN set -e \
|
||||
&& make -j $(nproc) -s postgres \
|
||||
&& mold -run make -j $(nproc) -s postgres \
|
||||
&& rm -rf tmp_install/build \
|
||||
&& tar -C tmp_install -czf /postgres_install.tar.gz .
|
||||
|
||||
# Build zenith binaries
|
||||
#
|
||||
#FROM zimg/rust:1.56 AS build
|
||||
FROM zenithdb/build:buster-20220309 AS build
|
||||
FROM zimg/rust:1.58 AS build
|
||||
ARG GIT_VERSION=local
|
||||
|
||||
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
|
||||
ARG AWS_ACCESS_KEY_ID
|
||||
ARG AWS_SECRET_ACCESS_KEY
|
||||
ENV RUSTC_WRAPPER /usr/local/cargo/bin/cachepot
|
||||
|
||||
COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
|
||||
COPY . .
|
||||
|
||||
# Show build caching stats to check if it was used in the end.
|
||||
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, loosing the compilation stats.
|
||||
RUN cargo build --release && /usr/local/cargo/bin/cachepot -s
|
||||
RUN set -e \
|
||||
&& sudo -E "PATH=$PATH" mold -run cargo build --release \
|
||||
&& cachepot -s
|
||||
|
||||
# Build final image
|
||||
#
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
FROM rust:1.56.1-slim-buster
|
||||
WORKDIR /home/circleci/project
|
||||
|
||||
RUN set -e \
|
||||
&& apt-get update \
|
||||
&& apt-get -yq install \
|
||||
automake \
|
||||
libtool \
|
||||
build-essential \
|
||||
bison \
|
||||
flex \
|
||||
libreadline-dev \
|
||||
zlib1g-dev \
|
||||
libxml2-dev \
|
||||
libseccomp-dev \
|
||||
pkg-config \
|
||||
libssl-dev \
|
||||
clang
|
||||
|
||||
RUN set -e \
|
||||
&& rustup component add clippy \
|
||||
&& cargo install cargo-audit \
|
||||
&& cargo install --git https://github.com/paritytech/cachepot
|
||||
@@ -1,19 +1,18 @@
|
||||
# First transient image to build compute_tools binaries
|
||||
# NB: keep in sync with rust image version in .circle/config.yml
|
||||
FROM zenithdb/build:buster-20220309 AS rust-build
|
||||
|
||||
WORKDIR /zenith
|
||||
FROM zimg/rust:1.58 AS rust-build
|
||||
|
||||
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
|
||||
ARG AWS_ACCESS_KEY_ID
|
||||
ARG AWS_SECRET_ACCESS_KEY
|
||||
ENV RUSTC_WRAPPER /usr/local/cargo/bin/cachepot
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN cargo build -p compute_tools --release && /usr/local/cargo/bin/cachepot -s
|
||||
RUN set -e \
|
||||
&& sudo -E "PATH=$PATH" mold -run cargo build -p compute_tools --release \
|
||||
&& cachepot -s
|
||||
|
||||
# Final image that only has one binary
|
||||
FROM debian:buster-slim
|
||||
|
||||
COPY --from=rust-build /zenith/target/release/zenith_ctl /usr/local/bin/zenith_ctl
|
||||
COPY --from=rust-build /home/circleci/project/target/release/zenith_ctl /usr/local/bin/zenith_ctl
|
||||
|
||||
31
README.md
31
README.md
@@ -1,19 +1,22 @@
|
||||
# Zenith
|
||||
# Neon
|
||||
|
||||
Zenith is a serverless open source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes PostgreSQL storage layer by redistributing data across a cluster of nodes.
|
||||
Neon is a serverless open source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes PostgreSQL storage layer by redistributing data across a cluster of nodes.
|
||||
|
||||
The project used to be called "Zenith". Many of the commands and code comments
|
||||
still refer to "zenith", but we are in the process of renaming things.
|
||||
|
||||
## Architecture overview
|
||||
|
||||
A Zenith installation consists of compute nodes and Zenith storage engine.
|
||||
A Neon installation consists of compute nodes and Neon storage engine.
|
||||
|
||||
Compute nodes are stateless PostgreSQL nodes, backed by Zenith storage engine.
|
||||
Compute nodes are stateless PostgreSQL nodes, backed by Neon storage engine.
|
||||
|
||||
Zenith storage engine consists of two major components:
|
||||
Neon storage engine consists of two major components:
|
||||
- Pageserver. Scalable storage backend for compute nodes.
|
||||
- WAL service. The service that receives WAL from compute node and ensures that it is stored durably.
|
||||
|
||||
Pageserver consists of:
|
||||
- Repository - Zenith storage implementation.
|
||||
- Repository - Neon storage implementation.
|
||||
- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
|
||||
- Page service - service that communicates with compute nodes and responds with pages from the repository.
|
||||
- WAL redo - service that builds pages from base images and WAL records on Page service request.
|
||||
@@ -28,17 +31,17 @@ apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libsec
|
||||
libssl-dev clang pkg-config libpq-dev
|
||||
```
|
||||
|
||||
[Rust] 1.56.1 or later is also required.
|
||||
[Rust] 1.58 or later is also required.
|
||||
|
||||
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively.
|
||||
|
||||
To run the integration tests or Python scripts (not required to use the code), install
|
||||
Python (3.7 or higher), and install python3 packages using `./scripts/pysync` (requires poetry) in the project directory.
|
||||
|
||||
2. Build zenith and patched postgres
|
||||
2. Build neon and patched postgres
|
||||
```sh
|
||||
git clone --recursive https://github.com/zenithdb/zenith.git
|
||||
cd zenith
|
||||
git clone --recursive https://github.com/neondatabase/neon.git
|
||||
cd neon
|
||||
make -j5
|
||||
```
|
||||
|
||||
@@ -126,7 +129,7 @@ INSERT 0 1
|
||||
## Running tests
|
||||
|
||||
```sh
|
||||
git clone --recursive https://github.com/zenithdb/zenith.git
|
||||
git clone --recursive https://github.com/neondatabase/neon.git
|
||||
make # builds also postgres and installs it to ./tmp_install
|
||||
./scripts/pytest
|
||||
```
|
||||
@@ -141,14 +144,14 @@ To view your `rustdoc` documentation in a browser, try running `cargo doc --no-d
|
||||
|
||||
### Postgres-specific terms
|
||||
|
||||
Due to Zenith's very close relation with PostgreSQL internals, there are numerous specific terms used.
|
||||
Due to Neon's very close relation with PostgreSQL internals, there are numerous specific terms used.
|
||||
Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, while MiB would be technically more correct, it's inconsistent with what PostgreSQL code and its documentation use.
|
||||
|
||||
To get more familiar with this aspect, refer to:
|
||||
|
||||
- [Zenith glossary](/docs/glossary.md)
|
||||
- [Neon glossary](/docs/glossary.md)
|
||||
- [PostgreSQL glossary](https://www.postgresql.org/docs/13/glossary.html)
|
||||
- Other PostgreSQL documentation and sources (Zenith fork sources can be found [here](https://github.com/zenithdb/postgres))
|
||||
- Other PostgreSQL documentation and sources (Neon fork sources can be found [here](https://github.com/neondatabase/postgres))
|
||||
|
||||
## Join the development
|
||||
|
||||
|
||||
@@ -11,10 +11,11 @@ clap = "3.0"
|
||||
env_logger = "0.9"
|
||||
hyper = { version = "0.14", features = ["full"] }
|
||||
log = { version = "0.4", features = ["std", "serde"] }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="9eb0dbfbeb6a6c1b79099b9f7ae4a8c021877858" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
regex = "1"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
tar = "0.4"
|
||||
tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
|
||||
@@ -38,6 +38,7 @@ use clap::Arg;
|
||||
use log::info;
|
||||
use postgres::{Client, NoTls};
|
||||
|
||||
use compute_tools::checker::create_writablity_check_data;
|
||||
use compute_tools::config;
|
||||
use compute_tools::http_api::launch_http_server;
|
||||
use compute_tools::logger::*;
|
||||
@@ -128,6 +129,8 @@ fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {
|
||||
|
||||
handle_roles(&read_state.spec, &mut client)?;
|
||||
handle_databases(&read_state.spec, &mut client)?;
|
||||
handle_grants(&read_state.spec, &mut client)?;
|
||||
create_writablity_check_data(&mut client)?;
|
||||
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
@@ -155,7 +158,7 @@ fn run_compute(state: &Arc<RwLock<ComputeState>>) -> Result<ExitStatus> {
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
// TODO: re-use `zenith_utils::logging` later
|
||||
// TODO: re-use `utils::logging` later
|
||||
init_logger(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
// Env variable is set by `cargo`
|
||||
|
||||
46
compute_tools/src/checker.rs
Normal file
46
compute_tools/src/checker.rs
Normal file
@@ -0,0 +1,46 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use log::error;
|
||||
use postgres::Client;
|
||||
use tokio_postgres::NoTls;
|
||||
|
||||
use crate::zenith::ComputeState;
|
||||
|
||||
pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
|
||||
let query = "
|
||||
CREATE TABLE IF NOT EXISTS health_check (
|
||||
id serial primary key,
|
||||
updated_at timestamptz default now()
|
||||
);
|
||||
INSERT INTO health_check VALUES (1, now())
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET updated_at = now();";
|
||||
let result = client.simple_query(query)?;
|
||||
if result.len() < 2 {
|
||||
return Err(anyhow::format_err!("executed {} queries", result.len()));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn check_writability(state: &Arc<RwLock<ComputeState>>) -> Result<()> {
|
||||
let connstr = state.read().unwrap().connstr.clone();
|
||||
let (client, connection) = tokio_postgres::connect(&connstr, NoTls).await?;
|
||||
if client.is_closed() {
|
||||
return Err(anyhow!("connection to postgres closed"));
|
||||
}
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
error!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
let result = client
|
||||
.simple_query("UPDATE health_check SET updated_at = now() WHERE id = 1;")
|
||||
.await?;
|
||||
|
||||
if result.len() != 1 {
|
||||
return Err(anyhow!("statement can't be executed"));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -11,7 +11,7 @@ use log::{error, info};
|
||||
use crate::zenith::*;
|
||||
|
||||
// Service function to handle all available routes.
|
||||
fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Response<Body> {
|
||||
async fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Response<Body> {
|
||||
match (req.method(), req.uri().path()) {
|
||||
// Timestamp of the last Postgres activity in the plain text.
|
||||
(&Method::GET, "/last_activity") => {
|
||||
@@ -29,6 +29,15 @@ fn routes(req: Request<Body>, state: Arc<RwLock<ComputeState>>) -> Response<Body
|
||||
Response::new(Body::from(format!("{}", state.ready)))
|
||||
}
|
||||
|
||||
(&Method::GET, "/check_writability") => {
|
||||
info!("serving /check_writability GET request");
|
||||
let res = crate::checker::check_writability(&state).await;
|
||||
match res {
|
||||
Ok(_) => Response::new(Body::from("true")),
|
||||
Err(e) => Response::new(Body::from(e.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
// Return the `404 Not Found` for any other routes.
|
||||
_ => {
|
||||
let mut not_found = Response::new(Body::from("404 Not Found"));
|
||||
@@ -48,7 +57,7 @@ async fn serve(state: Arc<RwLock<ComputeState>>) {
|
||||
async move {
|
||||
Ok::<_, Infallible>(service_fn(move |req: Request<Body>| {
|
||||
let state = state.clone();
|
||||
async move { Ok::<_, Infallible>(routes(req, state)) }
|
||||
async move { Ok::<_, Infallible>(routes(req, state).await) }
|
||||
}))
|
||||
}
|
||||
});
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
//! Various tools and helpers to handle cluster / compute node (Postgres)
|
||||
//! configuration.
|
||||
//!
|
||||
pub mod checker;
|
||||
pub mod config;
|
||||
pub mod http_api;
|
||||
#[macro_use]
|
||||
|
||||
@@ -132,7 +132,14 @@ impl Role {
|
||||
let mut params: String = "LOGIN".to_string();
|
||||
|
||||
if let Some(pass) = &self.encrypted_password {
|
||||
params.push_str(&format!(" PASSWORD 'md5{}'", pass));
|
||||
// Some time ago we supported only md5 and treated all encrypted_password as md5.
|
||||
// Now we also support SCRAM-SHA-256 and to preserve compatibility
|
||||
// we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
|
||||
if pass.starts_with("SCRAM-SHA-256") {
|
||||
params.push_str(&format!(" PASSWORD '{}'", pass));
|
||||
} else {
|
||||
params.push_str(&format!(" PASSWORD 'md5{}'", pass));
|
||||
}
|
||||
} else {
|
||||
params.push_str(" PASSWORD NULL");
|
||||
}
|
||||
|
||||
@@ -244,3 +244,24 @@ pub fn handle_databases(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Grant CREATE ON DATABASE to the database owner
|
||||
// to allow clients create trusted extensions.
|
||||
pub fn handle_grants(spec: &ClusterSpec, client: &mut Client) -> Result<()> {
|
||||
info!("cluster spec grants:");
|
||||
|
||||
for db in &spec.cluster.databases {
|
||||
let dbname = &db.name;
|
||||
|
||||
let query: String = format!(
|
||||
"GRANT CREATE ON DATABASE {} TO {}",
|
||||
dbname.quote(),
|
||||
db.owner.quote()
|
||||
);
|
||||
info!("grant query {}", &query);
|
||||
|
||||
client.execute(query.as_str(), &[])?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
tar = "0.4.33"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_with = "1.12.0"
|
||||
toml = "0.5"
|
||||
@@ -18,6 +18,6 @@ url = "2.2.2"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
|
||||
pageserver = { path = "../pageserver" }
|
||||
walkeeper = { path = "../walkeeper" }
|
||||
zenith_utils = { path = "../zenith_utils" }
|
||||
safekeeper = { path = "../safekeeper" }
|
||||
utils = { path = "../libs/utils" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
|
||||
@@ -11,11 +11,12 @@ use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use zenith_utils::connstring::connection_host_port;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::ZTenantId;
|
||||
use zenith_utils::zid::ZTimelineId;
|
||||
use utils::{
|
||||
connstring::connection_host_port,
|
||||
lsn::Lsn,
|
||||
postgres_backend::AuthType,
|
||||
zid::{ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::postgresql_conf::PostgresConf;
|
||||
@@ -272,12 +273,7 @@ impl PostgresNode {
|
||||
conf.append("wal_sender_timeout", "5s");
|
||||
conf.append("listen_addresses", &self.address.ip().to_string());
|
||||
conf.append("port", &self.address.port().to_string());
|
||||
|
||||
// Never clean up old WAL. TODO: We should use a replication
|
||||
// slot or something proper, to prevent the compute node
|
||||
// from removing WAL that hasn't been streamed to the safekeeper or
|
||||
// page server yet. (gh issue #349)
|
||||
conf.append("wal_keep_size", "10TB");
|
||||
conf.append("wal_keep_size", "0");
|
||||
|
||||
// Configure the node to fetch pages from pageserver
|
||||
let pageserver_connstr = {
|
||||
@@ -331,14 +327,14 @@ impl PostgresNode {
|
||||
// Configure the node to connect to the safekeepers
|
||||
conf.append("synchronous_standby_names", "walproposer");
|
||||
|
||||
let wal_acceptors = self
|
||||
let safekeepers = self
|
||||
.env
|
||||
.safekeepers
|
||||
.iter()
|
||||
.map(|sk| format!("localhost:{}", sk.pg_port))
|
||||
.collect::<Vec<String>>()
|
||||
.join(",");
|
||||
conf.append("wal_acceptors", &wal_acceptors);
|
||||
conf.append("wal_acceptors", &safekeepers);
|
||||
} else {
|
||||
// We only use setup without safekeepers for tests,
|
||||
// and don't care about data durability on pageserver,
|
||||
@@ -420,10 +416,15 @@ impl PostgresNode {
|
||||
if let Some(token) = auth_token {
|
||||
cmd.env("ZENITH_AUTH_TOKEN", token);
|
||||
}
|
||||
let pg_ctl = cmd.status().context("pg_ctl failed")?;
|
||||
|
||||
if !pg_ctl.success() {
|
||||
anyhow::bail!("pg_ctl failed");
|
||||
let pg_ctl = cmd.output().context("pg_ctl failed")?;
|
||||
if !pg_ctl.status.success() {
|
||||
anyhow::bail!(
|
||||
"pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
|
||||
pg_ctl.status,
|
||||
String::from_utf8_lossy(&pg_ctl.stdout),
|
||||
String::from_utf8_lossy(&pg_ctl.stderr),
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -11,9 +11,11 @@ use std::env;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use zenith_utils::auth::{encode_from_key_file, Claims, Scope};
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId};
|
||||
use utils::{
|
||||
auth::{encode_from_key_file, Claims, Scope},
|
||||
postgres_backend::AuthType,
|
||||
zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
|
||||
|
||||
@@ -13,15 +13,17 @@ use nix::unistd::Pid;
|
||||
use postgres::Config;
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use safekeeper::http::models::TimelineCreateRequest;
|
||||
use thiserror::Error;
|
||||
use walkeeper::http::models::TimelineCreateRequest;
|
||||
use zenith_utils::http::error::HttpErrorBody;
|
||||
use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
|
||||
use utils::{
|
||||
connstring::connection_address,
|
||||
http::error::HttpErrorBody,
|
||||
zid::{ZNodeId, ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::local_env::{LocalEnv, SafekeeperConf};
|
||||
use crate::storage::PageServerNode;
|
||||
use crate::{fill_rust_env_vars, read_pidfile};
|
||||
use zenith_utils::connstring::connection_address;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SafekeeperHttpError {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use std::net::TcpStream;
|
||||
use std::path::PathBuf;
|
||||
@@ -9,21 +10,23 @@ use anyhow::{bail, Context};
|
||||
use nix::errno::Errno;
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use pageserver::http::models::{TenantCreateRequest, TimelineCreateRequest};
|
||||
use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
|
||||
use pageserver::timelines::TimelineInfo;
|
||||
use postgres::{Config, NoTls};
|
||||
use reqwest::blocking::{Client, RequestBuilder, Response};
|
||||
use reqwest::{IntoUrl, Method};
|
||||
use thiserror::Error;
|
||||
use zenith_utils::http::error::HttpErrorBody;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::{ZTenantId, ZTimelineId};
|
||||
use utils::{
|
||||
connstring::connection_address,
|
||||
http::error::HttpErrorBody,
|
||||
lsn::Lsn,
|
||||
postgres_backend::AuthType,
|
||||
zid::{ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::{fill_rust_env_vars, read_pidfile};
|
||||
use pageserver::tenant_mgr::TenantInfo;
|
||||
use zenith_utils::connstring::connection_address;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum PageserverHttpError {
|
||||
@@ -342,10 +345,36 @@ impl PageServerNode {
|
||||
pub fn tenant_create(
|
||||
&self,
|
||||
new_tenant_id: Option<ZTenantId>,
|
||||
settings: HashMap<&str, &str>,
|
||||
) -> anyhow::Result<Option<ZTenantId>> {
|
||||
let tenant_id_string = self
|
||||
.http_request(Method::POST, format!("{}/tenant", self.http_base_url))
|
||||
.json(&TenantCreateRequest { new_tenant_id })
|
||||
.json(&TenantCreateRequest {
|
||||
new_tenant_id,
|
||||
checkpoint_distance: settings
|
||||
.get("checkpoint_distance")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()?,
|
||||
compaction_target_size: settings
|
||||
.get("compaction_target_size")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()?,
|
||||
compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
|
||||
compaction_threshold: settings
|
||||
.get("compaction_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()?,
|
||||
gc_horizon: settings
|
||||
.get("gc_horizon")
|
||||
.map(|x| x.parse::<u64>())
|
||||
.transpose()?,
|
||||
gc_period: settings.get("gc_period").map(|x| x.to_string()),
|
||||
image_creation_threshold: settings
|
||||
.get("image_creation_threshold")
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()?,
|
||||
pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?
|
||||
.json::<Option<String>>()?;
|
||||
@@ -362,6 +391,35 @@ impl PageServerNode {
|
||||
.transpose()
|
||||
}
|
||||
|
||||
pub fn tenant_config(&self, tenant_id: ZTenantId, settings: HashMap<&str, &str>) -> Result<()> {
|
||||
self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))
|
||||
.json(&TenantConfigRequest {
|
||||
tenant_id,
|
||||
checkpoint_distance: settings
|
||||
.get("checkpoint_distance")
|
||||
.map(|x| x.parse::<u64>().unwrap()),
|
||||
compaction_target_size: settings
|
||||
.get("compaction_target_size")
|
||||
.map(|x| x.parse::<u64>().unwrap()),
|
||||
compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
|
||||
compaction_threshold: settings
|
||||
.get("compaction_threshold")
|
||||
.map(|x| x.parse::<usize>().unwrap()),
|
||||
gc_horizon: settings
|
||||
.get("gc_horizon")
|
||||
.map(|x| x.parse::<u64>().unwrap()),
|
||||
gc_period: settings.get("gc_period").map(|x| x.to_string()),
|
||||
image_creation_threshold: settings
|
||||
.get("image_creation_threshold")
|
||||
.map(|x| x.parse::<usize>().unwrap()),
|
||||
pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
|
||||
})
|
||||
.send()?
|
||||
.error_from_body()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn timeline_list(&self, tenant_id: &ZTenantId) -> anyhow::Result<Vec<TimelineInfo>> {
|
||||
let timeline_infos: Vec<TimelineInfo> = self
|
||||
.http_request(
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
- [glossary.md](glossary.md) — Glossary of all the terms used in codebase.
|
||||
- [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
|
||||
- [sourcetree.md](sourcetree.md) — Overview of the source tree layeout.
|
||||
- [pageserver/README](/pageserver/README) — pageserver overview.
|
||||
- [postgres_ffi/README](/postgres_ffi/README) — Postgres FFI overview.
|
||||
- [pageserver/README.md](/pageserver/README.md) — pageserver overview.
|
||||
- [postgres_ffi/README.md](/libs/postgres_ffi/README.md) — Postgres FFI overview.
|
||||
- [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.
|
||||
- [walkeeper/README](/walkeeper/README) — WAL service overview.
|
||||
- [safekeeper/README.md](/safekeeper/README.md) — WAL service overview.
|
||||
- [core_changes.md](core_changes.md) - Description of Zenith changes in Postgres core
|
||||
|
||||
@@ -27,4 +27,4 @@ management_token = jwt.encode({"scope": "pageserverapi"}, auth_keys.priv, algori
|
||||
tenant_token = jwt.encode({"scope": "tenant", "tenant_id": ps.initial_tenant}, auth_keys.priv, algorithm="RS256")
|
||||
```
|
||||
|
||||
Utility functions to work with jwts in rust are located in zenith_utils/src/auth.rs
|
||||
Utility functions to work with jwts in rust are located in libs/utils/src/auth.rs
|
||||
|
||||
@@ -29,7 +29,7 @@ Each Branch lives in a corresponding timeline[] and has an ancestor[].
|
||||
|
||||
NOTE: This is an overloaded term.
|
||||
|
||||
A checkpoint record in the WAL marks a point in the WAL sequence at which it is guaranteed that all data files have been updated with all information from shared memory modified before that checkpoint;
|
||||
A checkpoint record in the WAL marks a point in the WAL sequence at which it is guaranteed that all data files have been updated with all information from shared memory modified before that checkpoint;
|
||||
|
||||
### Checkpoint (Layered repository)
|
||||
|
||||
@@ -108,10 +108,10 @@ PostgreSQL LSNs and functions to monitor them:
|
||||
* `pg_current_wal_lsn()` - Returns the current write-ahead log write location.
|
||||
* `pg_current_wal_flush_lsn()` - Returns the current write-ahead log flush location.
|
||||
* `pg_last_wal_receive_lsn()` - Returns the last write-ahead log location that has been received and synced to disk by streaming replication. While streaming replication is in progress this will increase monotonically.
|
||||
* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
|
||||
* `pg_last_wal_replay_lsn ()` - Returns the last write-ahead log location that has been replayed during recovery. If recovery is still in progress this will increase monotonically.
|
||||
[source PostgreSQL documentation](https://www.postgresql.org/docs/devel/functions-admin.html):
|
||||
|
||||
Zenith safekeeper LSNs. For more check [walkeeper/README_PROTO.md](/walkeeper/README_PROTO.md)
|
||||
Zenith safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/README_PROTO.md)
|
||||
* `CommitLSN`: position in WAL confirmed by quorum safekeepers.
|
||||
* `RestartLSN`: position in WAL confirmed by all safekeepers.
|
||||
* `FlushLSN`: part of WAL persisted to the disk by safekeeper.
|
||||
@@ -190,7 +190,7 @@ or we do not support them in zenith yet (pg_commit_ts).
|
||||
Tenant represents a single customer, interacting with Zenith.
|
||||
Wal redo[] activity, timelines[], layers[] are managed for each tenant independently.
|
||||
One pageserver[] can serve multiple tenants at once.
|
||||
One safekeeper
|
||||
One safekeeper
|
||||
|
||||
See `docs/multitenancy.md` for more.
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ Init empty pageserver using `initdb` in temporary directory.
|
||||
|
||||
`--storage_dest=FILE_PREFIX | S3_PREFIX |...` option defines object storage type, all other parameters are passed via env variables. Inspired by WAL-G style naming : https://wal-g.readthedocs.io/STORAGES/.
|
||||
|
||||
Save`storage_dest` and other parameters in config.
|
||||
Save`storage_dest` and other parameters in config.
|
||||
Push snapshots to `storage_dest` in background.
|
||||
|
||||
```
|
||||
@@ -21,7 +21,7 @@ zenith start
|
||||
```
|
||||
|
||||
#### 2. Restart pageserver (manually or crash-recovery).
|
||||
Take `storage_dest` from pageserver config, start pageserver from latest snapshot in `storage_dest`.
|
||||
Take `storage_dest` from pageserver config, start pageserver from latest snapshot in `storage_dest`.
|
||||
Push snapshots to `storage_dest` in background.
|
||||
|
||||
```
|
||||
@@ -32,7 +32,7 @@ zenith start
|
||||
Start pageserver from existing snapshot.
|
||||
Path to snapshot provided via `--snapshot_path=FILE_PREFIX | S3_PREFIX | ...`
|
||||
Do not save `snapshot_path` and `snapshot_format` in config, as it is a one-time operation.
|
||||
Save`storage_dest` parameters in config.
|
||||
Save`storage_dest` parameters in config.
|
||||
Push snapshots to `storage_dest` in background.
|
||||
```
|
||||
//I.e. we want to start zenith on top of existing $PGDATA and use s3 as a persistent storage.
|
||||
@@ -42,15 +42,15 @@ zenith start
|
||||
How to pass credentials needed for `snapshot_path`?
|
||||
|
||||
#### 4. Export.
|
||||
Manually push snapshot to `snapshot_path` which differs from `storage_dest`
|
||||
Manually push snapshot to `snapshot_path` which differs from `storage_dest`
|
||||
Optionally set `snapshot_format`, which can be plain pgdata format or zenith format.
|
||||
```
|
||||
zenith export --snapshot_path=FILE_PREFIX --snapshot_format=pgdata
|
||||
```
|
||||
|
||||
#### Notes and questions
|
||||
- walkeeper s3_offload should use same (similar) syntax for storage. How to set it in UI?
|
||||
- safekeeper s3_offload should use same (similar) syntax for storage. How to set it in UI?
|
||||
- Why do we need `zenith init` as a separate command? Can't we init everything at first start?
|
||||
- We can think of better names for all options.
|
||||
- Export to plain postgres format will be useless, if we are not 100% compatible on page level.
|
||||
I can recall at least one such difference - PD_WAL_LOGGED flag in pages.
|
||||
I can recall at least one such difference - PD_WAL_LOGGED flag in pages.
|
||||
|
||||
151
docs/rfcs/016-connection-routing.md
Normal file
151
docs/rfcs/016-connection-routing.md
Normal file
@@ -0,0 +1,151 @@
|
||||
# Dispatching a connection
|
||||
|
||||
For each client connection, Neon service needs to authenticate the
|
||||
connection, and route it to the right PostgreSQL instance.
|
||||
|
||||
## Authentication
|
||||
|
||||
There are three different ways to authenticate:
|
||||
|
||||
- anonymous; no authentication needed
|
||||
- PostgreSQL authentication
|
||||
- github single sign-on using browser
|
||||
|
||||
In anonymous access, the user doesn't need to perform any
|
||||
authentication at all. This can be used e.g. in interactive PostgreSQL
|
||||
documentation, allowing you to run the examples very quickly. Similar
|
||||
to sqlfiddle.com.
|
||||
|
||||
PostgreSQL authentication works the same as always. All the different
|
||||
PostgreSQL authentication options like SCRAM, kerberos, etc. are
|
||||
available. [1]
|
||||
|
||||
The third option is to authenticate with github single sign-on. When
|
||||
you open the connection in psql, you get a link that you open with
|
||||
your browser. Opening the link redirects you to github authentication,
|
||||
and lets the connection to proceed. This is also known as "Link auth" [2].
|
||||
|
||||
|
||||
## Routing the connection
|
||||
|
||||
When a client starts a connection, it needs to be routed to the
|
||||
correct PostgreSQL instance. Routing can be done by the proxy, acting
|
||||
as a man-in-the-middle, or the connection can be routed at the network
|
||||
level based on the hostname or IP address.
|
||||
|
||||
Either way, Neon needs to identify which PostgreSQL instance the
|
||||
connection should be routed to. If the instance is not already
|
||||
running, it needs to be started. Some connections always require a new
|
||||
PostgreSQL instance to be created, e.g. if you want to run a one-off
|
||||
query against a particular point-in-time.
|
||||
|
||||
The PostgreSQL instance is identified by:
|
||||
- Neon account (possibly anonymous)
|
||||
- cluster (known as tenant in the storage?)
|
||||
- branch or snapshot name
|
||||
- timestamp (PITR)
|
||||
- primary or read-replica
|
||||
- one-off read replica
|
||||
- one-off writeable branch
|
||||
|
||||
When you are using regular PostgreSQL authentication or anonymous
|
||||
access, the connection URL needs to contain all the information needed
|
||||
for the routing. With github single sign-on, the browser is involved
|
||||
and some details - the Neon account in particular - can be deduced
|
||||
from the authentication exchange.
|
||||
|
||||
There are three methods for identifying the PostgreSQL instance:
|
||||
|
||||
- Browser interaction (link auth)
|
||||
- Options in the connection URL and the domain name
|
||||
- A pre-defined endpoint, identified by domain name or IP address
|
||||
|
||||
### Link Auth
|
||||
|
||||
postgres://<username>@start.neon.tech/<dbname>
|
||||
|
||||
This gives you a link that you open in browser. Clicking the link
|
||||
performs github authentication, and the Neon account name is
|
||||
provided to the proxy behind the scenes. The proxy routes the
|
||||
connection to the primary PostgreSQL instance in cluster called
|
||||
"main", branch "main".
|
||||
|
||||
Further ideas:
|
||||
- You could pre-define a different target for link auth
|
||||
connections in the UI.
|
||||
- You could have a drop-down in the browser, allowing you to connect
|
||||
to any cluster you want. Link Auth can be like Teleport.
|
||||
|
||||
### Connection URL
|
||||
|
||||
The connection URL looks like this:
|
||||
|
||||
postgres://<username>@<cluster-id>.db.neon.tech/<dbname>
|
||||
|
||||
By default, this connects you to the primary PostgreSQL instance
|
||||
running on the "main" branch in the named cluster [3]. However, you can
|
||||
change that by specifying options in the connection URL. The following
|
||||
options are supported:
|
||||
|
||||
| option name | Description | Examples |
|
||||
| --- | --- | --- |
|
||||
| cluster | Cluster name | cluster:myproject |
|
||||
| branch | Branch name | branch:main |
|
||||
| timestamp | Connect to an instance at given point-in-time. | timestamp:2022-04-08 timestamp:2022-04-08T11:42:16Z |
|
||||
| lsn | Connect to an instance at given LSN | lsn:0/12FF0420 |
|
||||
| read-replica | Connect to a read-replica. If the parameter is 'new', a new instance is created for this session. | read-replica read-replica:new |
|
||||
|
||||
For example, to read branch 'testing' as it was on Mar 31, 2022, you could
|
||||
specify a timestamp in the connection URL [4]:
|
||||
|
||||
postgres://alice@cluster-1234.db.neon.tech/postgres?options=branch:testing,timestamp:2022-03-31
|
||||
|
||||
Connecting with cluster name and options can be disabled in the UI. If
|
||||
disabled, you can only connect using a pre-defined endpoint.
|
||||
|
||||
### Pre-defined Endpoint
|
||||
|
||||
Instead of providing the cluster name, branch, and all those options
|
||||
in the connection URL, you can define a named endpoint with the same
|
||||
options.
|
||||
|
||||
In the UI, click "create endpoint". Fill in the details:
|
||||
|
||||
- Cluster name
|
||||
- Branch
|
||||
- timestamp or LSN
|
||||
- is this for the primary or for a read replica
|
||||
- etc.
|
||||
|
||||
When you click Finish, a named endpoint is created. You can now use the endpoint ID to connect:
|
||||
|
||||
postgres://<username>@<endpoint-id>.endpoint.neon.tech/<dbname>
|
||||
|
||||
|
||||
An endpoint can be assigned a static or dynamic IP address, so that
|
||||
you can connect to it with clients that don't support TLS SNI. Maybe
|
||||
bypass the proxy altogether, but that ought to be invisible to the
|
||||
user.
|
||||
|
||||
You can limit the range of source IP addresses that are allowed to
|
||||
connect to an endpoint. An endpoint can also be exposed in an Amazon
|
||||
VPC, allowing direct connections from applications.
|
||||
|
||||
|
||||
# Footnotes
|
||||
|
||||
[1] I'm not sure how feasible it is to set up configure like Kerberos
|
||||
or LDAP in a cloud environment. But in principle I think we should
|
||||
allow customers to have the full power of PostgreSQL, including all
|
||||
authentication options. However, it's up to the customer to configure
|
||||
it correctly.
|
||||
|
||||
[2] Link is a way to both authenticate and to route the connection
|
||||
|
||||
[3] This assumes that cluster-ids are globally unique, across all
|
||||
Neon accounts.
|
||||
|
||||
[4] The syntax accepted in the connection URL is limited by libpq. The
|
||||
only way to pass arbitrary options to the server (or our proxy) is
|
||||
with the "options" keyword, and the options must be percent-encoded. I
|
||||
think the above would work but i haven't tested it
|
||||
79
docs/rfcs/cluster-size-limits.md
Normal file
79
docs/rfcs/cluster-size-limits.md
Normal file
@@ -0,0 +1,79 @@
|
||||
Cluster size limits
|
||||
==================
|
||||
|
||||
## Summary
|
||||
|
||||
One of the resource consumption limits for free-tier users is a cluster size limit.
|
||||
|
||||
To enforce it, we need to calculate the timeline size and check if the limit is reached before relation create/extend operations.
|
||||
If the limit is reached, the query must fail with some meaningful error/warning.
|
||||
We may want to exempt some operations from the quota to allow users free space to fit back into the limit.
|
||||
|
||||
The stateless compute node that performs validation is separate from the storage that calculates the usage, so we need to exchange cluster size information between those components.
|
||||
|
||||
## Motivation
|
||||
|
||||
Limit the maximum size of a PostgreSQL instance to limit free tier users (and other tiers in the future).
|
||||
First of all, this is needed to control our free tier production costs.
|
||||
Another reason to limit resources is risk management — we haven't (fully) tested and optimized zenith for big clusters,
|
||||
so we don't want to give users access to the functionality that we don't think is ready.
|
||||
|
||||
## Components
|
||||
|
||||
* pageserver - calculate the size consumed by a timeline and add it to the feedback message.
|
||||
* safekeeper - pass feedback message from pageserver to compute.
|
||||
* compute - receive feedback message, enforce size limit based on GUC `zenith.max_cluster_size`.
|
||||
* console - set and update `zenith.max_cluster_size` setting
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
First of all, it's necessary to define timeline size.
|
||||
|
||||
The current approach is to count all data, including SLRUs. (not including WAL)
|
||||
Here we think of it as a physical disk underneath the Postgres cluster.
|
||||
This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
|
||||
|
||||
Alternatively, we could count only relation data. As in pg_database_size().
|
||||
This approach is somewhat more user-friendly because it is the data that is really affected by the user.
|
||||
On the other hand, it puts us in a weaker position than other services, i.e., RDS.
|
||||
We will need to refactor the timeline_size counter or add another counter to implement it.
|
||||
|
||||
Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
|
||||
Then this size should be reported to compute node.
|
||||
|
||||
`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
|
||||
|
||||
(PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).
|
||||
|
||||
This message is received by the safekeeper and propagated to compute node as a part of `AppendResponse`.
|
||||
|
||||
Finally, when compute node receives the `current_timeline_size` from safekeeper (or from pageserver directly), it updates the global variable.
|
||||
|
||||
And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > zenith.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
|
||||
(see Postgres error codes [https://www.postgresql.org/docs/devel/errcodes-appendix.html](https://www.postgresql.org/docs/devel/errcodes-appendix.html))
|
||||
|
||||
TODO:
|
||||
We can allow autovacuum processes to bypass this check, simply checking `IsAutoVacuumWorkerProcess()`.
|
||||
It would be nice to allow manual VACUUM and VACUUM FULL to bypass the check, but it's uneasy to distinguish these operations at the low level.
|
||||
See issues https://github.com/neondatabase/neon/issues/1245
|
||||
https://github.com/zenithdb/zenith/issues/1445
|
||||
|
||||
TODO:
|
||||
We should warn users if the limit is soon to be reached.
|
||||
|
||||
### **Reliability, failure modes and corner cases**
|
||||
|
||||
1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
|
||||
|
||||
If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
|
||||
|
||||
So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
|
||||
|
||||
Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.
|
||||
|
||||
|
||||
### **Security implications**
|
||||
|
||||
We treat compute as an untrusted component. That's why we try to isolate it with secure container runtime or a VM.
|
||||
Malicious users may change the `zenith.max_cluster_size`, so we need an extra size limit check.
|
||||
To cover this case, we also monitor the compute node size in the console.
|
||||
@@ -74,6 +74,10 @@ Every `compaction_period` seconds, the page server checks if
|
||||
maintenance operations, like compaction, are needed on the layer
|
||||
files. Default is 1 s, which should be fine.
|
||||
|
||||
#### compaction_target_size
|
||||
|
||||
File sizes for L0 delta and L1 image layers. Default is 128MB.
|
||||
|
||||
#### gc_horizon
|
||||
|
||||
`gz_horizon` determines how much history is retained, to allow
|
||||
@@ -85,6 +89,14 @@ away.
|
||||
|
||||
Interval at which garbage collection is triggered. Default is 100 s.
|
||||
|
||||
#### image_creation_threshold
|
||||
|
||||
L0 delta layer threshold for L1 iamge layer creation. Default is 3.
|
||||
|
||||
#### pitr_interval
|
||||
|
||||
WAL retention duration for PITR branching. Default is 30 days.
|
||||
|
||||
#### initial_superuser_name
|
||||
|
||||
Name of the initial superuser role, passed to initdb when a new tenant
|
||||
@@ -156,6 +168,9 @@ access_key_id = 'SOMEKEYAAAAASADSAH*#'
|
||||
|
||||
# Secret access key to connect to the bucket ("password" part of the credentials)
|
||||
secret_access_key = 'SOMEsEcReTsd292v'
|
||||
|
||||
# S3 API query limit to avoid getting errors/throttling from AWS.
|
||||
concurrency_limit = 100
|
||||
```
|
||||
|
||||
###### General remote storage configuration
|
||||
@@ -167,8 +182,8 @@ Besides, there are parameters common for all types of remote storage that can be
|
||||
|
||||
```toml
|
||||
[remote_storage]
|
||||
# Max number of concurrent connections to open for uploading to or downloading from the remote storage.
|
||||
max_concurrent_sync = 100
|
||||
# Max number of concurrent timeline synchronized (layers uploaded or downloaded) with the remote storage at the same time.
|
||||
max_concurrent_timelines_sync = 50
|
||||
|
||||
# Max number of errors a single task can have before it's considered failed and not attempted to run anymore.
|
||||
max_sync_errors = 10
|
||||
|
||||
@@ -28,12 +28,7 @@ The pageserver has a few different duties:
|
||||
- Receive WAL from the WAL service and decode it.
|
||||
- Replay WAL that's applicable to the chunks that the Page Server maintains
|
||||
|
||||
For more detailed info, see `/pageserver/README`
|
||||
|
||||
`/postgres_ffi`:
|
||||
|
||||
Utility functions for interacting with PostgreSQL file formats.
|
||||
Misc constants, copied from PostgreSQL headers.
|
||||
For more detailed info, see [/pageserver/README](/pageserver/README.md)
|
||||
|
||||
`/proxy`:
|
||||
|
||||
@@ -57,12 +52,12 @@ PostgreSQL extension that implements storage manager API and network communicati
|
||||
|
||||
PostgreSQL extension that contains functions needed for testing and debugging.
|
||||
|
||||
`/walkeeper`:
|
||||
`/safekeeper`:
|
||||
|
||||
The zenith WAL service that receives WAL from a primary compute nodes and streams it to the pageserver.
|
||||
It acts as a holding area and redistribution center for recently generated WAL.
|
||||
|
||||
For more detailed info, see `/walkeeper/README`
|
||||
For more detailed info, see [/safekeeper/README](/safekeeper/README.md)
|
||||
|
||||
`/workspace_hack`:
|
||||
The workspace_hack crate exists only to pin down some dependencies.
|
||||
@@ -74,14 +69,21 @@ We use [cargo-hakari](https://crates.io/crates/cargo-hakari) for automation.
|
||||
Main entry point for the 'zenith' CLI utility.
|
||||
TODO: Doesn't it belong to control_plane?
|
||||
|
||||
`/zenith_metrics`:
|
||||
`/libs`:
|
||||
Unites granular neon helper crates under the hood.
|
||||
|
||||
`/libs/postgres_ffi`:
|
||||
|
||||
Utility functions for interacting with PostgreSQL file formats.
|
||||
Misc constants, copied from PostgreSQL headers.
|
||||
|
||||
`/libs/utils`:
|
||||
Generic helpers that are shared between other crates in this repository.
|
||||
A subject for future modularization.
|
||||
|
||||
`/libs/metrics`:
|
||||
Helpers for exposing Prometheus metrics from the server.
|
||||
|
||||
`/zenith_utils`:
|
||||
|
||||
Helpers that are shared between other crates in this repository.
|
||||
|
||||
## Using Python
|
||||
Note that Debian/Ubuntu Python packages are stale, as it commonly happens,
|
||||
so manual installation of dependencies is not recommended.
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "zenith_metrics"
|
||||
name = "metrics"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
@@ -8,4 +8,4 @@ prometheus = {version = "0.13", default_features=false} # removes protobuf depen
|
||||
libc = "0.2"
|
||||
lazy_static = "1.4"
|
||||
once_cell = "1.8.0"
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
@@ -8,8 +8,8 @@ use std::io::{Read, Result, Write};
|
||||
///
|
||||
/// ```
|
||||
/// # use std::io::{Result, Read};
|
||||
/// # use zenith_metrics::{register_int_counter, IntCounter};
|
||||
/// # use zenith_metrics::CountedReader;
|
||||
/// # use metrics::{register_int_counter, IntCounter};
|
||||
/// # use metrics::CountedReader;
|
||||
/// #
|
||||
/// # lazy_static::lazy_static! {
|
||||
/// # static ref INT_COUNTER: IntCounter = register_int_counter!(
|
||||
@@ -83,8 +83,8 @@ impl<T: Read> Read for CountedReader<'_, T> {
|
||||
///
|
||||
/// ```
|
||||
/// # use std::io::{Result, Write};
|
||||
/// # use zenith_metrics::{register_int_counter, IntCounter};
|
||||
/// # use zenith_metrics::CountedWriter;
|
||||
/// # use metrics::{register_int_counter, IntCounter};
|
||||
/// # use metrics::CountedWriter;
|
||||
/// #
|
||||
/// # lazy_static::lazy_static! {
|
||||
/// # static ref INT_COUNTER: IntCounter = register_int_counter!(
|
||||
@@ -17,8 +17,8 @@ log = "0.4.14"
|
||||
memoffset = "0.6.2"
|
||||
thiserror = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
zenith_utils = { path = "../zenith_utils" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
utils = { path = "../utils" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
[build-dependencies]
|
||||
bindgen = "0.59.1"
|
||||
@@ -88,8 +88,8 @@ fn main() {
|
||||
// 'pg_config --includedir-server' would perhaps be the more proper way to find it,
|
||||
// but this will do for now.
|
||||
//
|
||||
.clang_arg("-I../tmp_install/include/server")
|
||||
.clang_arg("-I../tmp_install/include/postgresql/server")
|
||||
.clang_arg("-I../../tmp_install/include/server")
|
||||
.clang_arg("-I../../tmp_install/include/postgresql/server")
|
||||
//
|
||||
// Finish the builder and generate the bindings.
|
||||
//
|
||||
@@ -43,7 +43,7 @@ impl ControlFileData {
|
||||
/// Interpret a slice of bytes as a Postgres control file.
|
||||
///
|
||||
pub fn decode(buf: &[u8]) -> Result<ControlFileData> {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
|
||||
// Check that the slice has the expected size. The control file is
|
||||
// padded with zeros up to a 512 byte sector size, so accept a
|
||||
@@ -77,7 +77,7 @@ impl ControlFileData {
|
||||
///
|
||||
/// The CRC is recomputed to match the contents of the fields.
|
||||
pub fn encode(&self) -> Bytes {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
|
||||
// Serialize into a new buffer.
|
||||
let b = self.ser().unwrap();
|
||||
@@ -4,7 +4,7 @@
|
||||
//! This understands the WAL page and record format, enough to figure out where the WAL record
|
||||
//! boundaries are, and to reassemble WAL records that cross page boundaries.
|
||||
//!
|
||||
//! This functionality is needed by both the pageserver and the walkeepers. The pageserver needs
|
||||
//! This functionality is needed by both the pageserver and the safekeepers. The pageserver needs
|
||||
//! to look deeper into the WAL records to also understand which blocks they modify, the code
|
||||
//! for that is in pageserver/src/walrecord.rs
|
||||
//!
|
||||
@@ -18,7 +18,7 @@ use crc32c::*;
|
||||
use log::*;
|
||||
use std::cmp::min;
|
||||
use thiserror::Error;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
pub struct WalStreamDecoder {
|
||||
lsn: Lsn,
|
||||
@@ -28,7 +28,7 @@ use std::io::prelude::*;
|
||||
use std::io::SeekFrom;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::SystemTime;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
pub const XLOG_FNAME_LEN: usize = 24;
|
||||
pub const XLOG_BLCKSZ: usize = 8192;
|
||||
@@ -118,11 +118,15 @@ pub fn normalize_lsn(lsn: Lsn, seg_sz: usize) -> Lsn {
|
||||
}
|
||||
|
||||
pub fn get_current_timestamp() -> TimestampTz {
|
||||
to_pg_timestamp(SystemTime::now())
|
||||
}
|
||||
|
||||
pub fn to_pg_timestamp(time: SystemTime) -> TimestampTz {
|
||||
const UNIX_EPOCH_JDATE: u64 = 2440588; /* == date2j(1970, 1, 1) */
|
||||
const POSTGRES_EPOCH_JDATE: u64 = 2451545; /* == date2j(2000, 1, 1) */
|
||||
const SECS_PER_DAY: u64 = 86400;
|
||||
const USECS_PER_SEC: u64 = 1000000;
|
||||
match SystemTime::now().duration_since(SystemTime::UNIX_EPOCH) {
|
||||
match time.duration_since(SystemTime::UNIX_EPOCH) {
|
||||
Ok(n) => {
|
||||
((n.as_secs() - ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY))
|
||||
* USECS_PER_SEC
|
||||
@@ -351,17 +355,17 @@ pub fn main() {
|
||||
|
||||
impl XLogRecord {
|
||||
pub fn from_slice(buf: &[u8]) -> XLogRecord {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
XLogRecord::des(buf).unwrap()
|
||||
}
|
||||
|
||||
pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogRecord {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
XLogRecord::des_from(&mut buf.reader()).unwrap()
|
||||
}
|
||||
|
||||
pub fn encode(&self) -> Bytes {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
self.ser().unwrap().into()
|
||||
}
|
||||
|
||||
@@ -373,19 +377,19 @@ impl XLogRecord {
|
||||
|
||||
impl XLogPageHeaderData {
|
||||
pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogPageHeaderData {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
XLogPageHeaderData::des_from(&mut buf.reader()).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl XLogLongPageHeaderData {
|
||||
pub fn from_bytes<B: Buf>(buf: &mut B) -> XLogLongPageHeaderData {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
XLogLongPageHeaderData::des_from(&mut buf.reader()).unwrap()
|
||||
}
|
||||
|
||||
pub fn encode(&self) -> Bytes {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
self.ser().unwrap().into()
|
||||
}
|
||||
}
|
||||
@@ -394,12 +398,12 @@ pub const SIZEOF_CHECKPOINT: usize = std::mem::size_of::<CheckPoint>();
|
||||
|
||||
impl CheckPoint {
|
||||
pub fn encode(&self) -> Bytes {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
self.ser().unwrap().into()
|
||||
}
|
||||
|
||||
pub fn decode(buf: &[u8]) -> Result<CheckPoint, anyhow::Error> {
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
Ok(CheckPoint::des(buf)?)
|
||||
}
|
||||
|
||||
@@ -477,7 +481,9 @@ mod tests {
|
||||
#[test]
|
||||
pub fn test_find_end_of_wal() {
|
||||
// 1. Run initdb to generate some WAL
|
||||
let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("..");
|
||||
let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("..")
|
||||
.join("..");
|
||||
let data_dir = top_path.join("test_output/test_find_end_of_wal");
|
||||
let initdb_path = top_path.join("tmp_install/bin/initdb");
|
||||
let lib_path = top_path.join("tmp_install/lib");
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "zenith_utils"
|
||||
name = "utils"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
@@ -10,8 +10,8 @@ bytes = "1.0.1"
|
||||
hyper = { version = "0.14.7", features = ["full"] }
|
||||
lazy_static = "1.4.0"
|
||||
pin-project-lite = "0.2.7"
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
routerify = "3"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
@@ -22,23 +22,23 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
nix = "0.23.0"
|
||||
signal-hook = "0.3.10"
|
||||
rand = "0.8.3"
|
||||
jsonwebtoken = "7"
|
||||
jsonwebtoken = "8"
|
||||
hex = { version = "0.4.3", features = ["serde"] }
|
||||
rustls = "0.19.1"
|
||||
rustls-split = "0.2.1"
|
||||
rustls = "0.20.2"
|
||||
rustls-split = "0.3.0"
|
||||
git-version = "0.3.5"
|
||||
serde_with = "1.12.0"
|
||||
|
||||
zenith_metrics = { path = "../zenith_metrics" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
metrics = { path = "../metrics" }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
byteorder = "1.4.3"
|
||||
bytes = "1.0.1"
|
||||
hex-literal = "0.3"
|
||||
tempfile = "3.2"
|
||||
webpki = "0.21"
|
||||
criterion = "0.3"
|
||||
rustls-pemfile = "0.2.1"
|
||||
|
||||
[[bench]]
|
||||
name = "benchmarks"
|
||||
@@ -1,7 +1,7 @@
|
||||
#![allow(unused)]
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use zenith_utils::zid;
|
||||
use utils::zid;
|
||||
|
||||
pub fn bench_zid_stringify(c: &mut Criterion) {
|
||||
// Can only use public methods.
|
||||
@@ -1,10 +1,11 @@
|
||||
#!/bin/bash
|
||||
PG_BIN=$1
|
||||
WAL_PATH=$2
|
||||
DATA_DIR=$3
|
||||
PORT=$4
|
||||
SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
|
||||
rm -fr $DATA_DIR
|
||||
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -D $DATA_DIR --sysid=$SYSID
|
||||
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
|
||||
echo port=$PORT >> $DATA_DIR/postgresql.conf
|
||||
REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
|
||||
declare -i WAL_SIZE=$REDO_POS+114
|
||||
@@ -5,7 +5,7 @@
|
||||
/// For example, to calculate the smallest value among some integers:
|
||||
///
|
||||
/// ```
|
||||
/// use zenith_utils::accum::Accum;
|
||||
/// use utils::accum::Accum;
|
||||
///
|
||||
/// let values = [1, 2, 3];
|
||||
///
|
||||
@@ -1,8 +1,6 @@
|
||||
// For details about authentication see docs/authentication.md
|
||||
// TODO there are two issues for our use case in jsonwebtoken library which will be resolved in next release
|
||||
// The first one is that there is no way to disable expiration claim, but it can be excluded from validation, so use this as a workaround for now.
|
||||
// Relevant issue: https://github.com/Keats/jsonwebtoken/issues/190
|
||||
// The second one is that we wanted to use ed25519 keys, but they are also not supported until next version. So we go with RSA keys for now.
|
||||
//
|
||||
// TODO: use ed25519 keys
|
||||
// Relevant issue: https://github.com/Keats/jsonwebtoken/issues/162
|
||||
|
||||
use serde;
|
||||
@@ -59,19 +57,19 @@ pub fn check_permission(claims: &Claims, tenantid: Option<ZTenantId>) -> Result<
|
||||
}
|
||||
|
||||
pub struct JwtAuth {
|
||||
decoding_key: DecodingKey<'static>,
|
||||
decoding_key: DecodingKey,
|
||||
validation: Validation,
|
||||
}
|
||||
|
||||
impl JwtAuth {
|
||||
pub fn new(decoding_key: DecodingKey<'_>) -> Self {
|
||||
pub fn new(decoding_key: DecodingKey) -> Self {
|
||||
let mut validation = Validation::new(JWT_ALGORITHM);
|
||||
// The default 'required_spec_claims' is 'exp'. But we don't want to require
|
||||
// expiration.
|
||||
validation.required_spec_claims = [].into();
|
||||
Self {
|
||||
decoding_key: decoding_key.into_static(),
|
||||
validation: Validation {
|
||||
algorithms: vec![JWT_ALGORITHM],
|
||||
validate_exp: false,
|
||||
..Default::default()
|
||||
},
|
||||
decoding_key,
|
||||
validation,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,12 +5,11 @@ use anyhow::anyhow;
|
||||
use hyper::header::AUTHORIZATION;
|
||||
use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
|
||||
use lazy_static::lazy_static;
|
||||
use metrics::{new_common_metric_name, register_int_counter, Encoder, IntCounter, TextEncoder};
|
||||
use routerify::ext::RequestExt;
|
||||
use routerify::RequestInfo;
|
||||
use routerify::{Middleware, Router, RouterBuilder, RouterService};
|
||||
use tracing::info;
|
||||
use zenith_metrics::{new_common_metric_name, register_int_counter, IntCounter};
|
||||
use zenith_metrics::{Encoder, TextEncoder};
|
||||
|
||||
use std::future::Future;
|
||||
use std::net::TcpListener;
|
||||
@@ -36,7 +35,7 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
|
||||
let mut buffer = vec![];
|
||||
let encoder = TextEncoder::new();
|
||||
|
||||
let metrics = zenith_metrics::gather();
|
||||
let metrics = metrics::gather();
|
||||
encoder.encode(&metrics, &mut buffer).unwrap();
|
||||
|
||||
let response = Response::builder()
|
||||
@@ -1,4 +1,4 @@
|
||||
//! zenith_utils is intended to be a place to put code that is shared
|
||||
//! `utils` is intended to be a place to put code that is shared
|
||||
//! between other crates in this repository.
|
||||
|
||||
#![allow(clippy::manual_range_contains)]
|
||||
@@ -70,7 +70,7 @@ pub mod signals;
|
||||
// So the build script will be run only when GIT_VERSION envvar has changed.
|
||||
//
|
||||
// Why not to use buildscript to get git commit sha directly without procmacro from different crate?
|
||||
// Caching and workspaces complicates that. In case zenith_utils is not
|
||||
// Caching and workspaces complicates that. In case `utils` is not
|
||||
// recompiled due to caching then version may become outdated.
|
||||
// git_version crate handles that case by introducing a dependency on .git internals via include_bytes! macro,
|
||||
// so if we changed the index state git_version will pick that up and rerun the macro.
|
||||
@@ -304,8 +304,8 @@ impl PostgresBackend {
|
||||
pub fn start_tls(&mut self) -> anyhow::Result<()> {
|
||||
match self.stream.take() {
|
||||
Some(Stream::Bidirectional(bidi_stream)) => {
|
||||
let session = rustls::ServerSession::new(&self.tls_config.clone().unwrap());
|
||||
self.stream = Some(Stream::Bidirectional(bidi_stream.start_tls(session)?));
|
||||
let conn = rustls::ServerConnection::new(self.tls_config.clone().unwrap())?;
|
||||
self.stream = Some(Stream::Bidirectional(bidi_stream.start_tls(conn)?));
|
||||
Ok(())
|
||||
}
|
||||
stream => {
|
||||
@@ -375,9 +375,8 @@ impl PostgresBackend {
|
||||
}
|
||||
AuthType::MD5 => {
|
||||
rand::thread_rng().fill(&mut self.md5_salt);
|
||||
let md5_salt = self.md5_salt;
|
||||
self.write_message(&BeMessage::AuthenticationMD5Password(
|
||||
&md5_salt,
|
||||
self.md5_salt,
|
||||
))?;
|
||||
self.state = ProtoState::Authentication;
|
||||
}
|
||||
@@ -100,6 +100,21 @@ pub struct FeExecuteMessage {
|
||||
#[derive(Debug)]
|
||||
pub struct FeCloseMessage {}
|
||||
|
||||
/// Retry a read on EINTR
|
||||
///
|
||||
/// This runs the enclosed expression, and if it returns
|
||||
/// Err(io::ErrorKind::Interrupted), retries it.
|
||||
macro_rules! retry_read {
|
||||
( $x:expr ) => {
|
||||
loop {
|
||||
match $x {
|
||||
Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
|
||||
res => break res,
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl FeMessage {
|
||||
/// Read one message from the stream.
|
||||
/// This function returns `Ok(None)` in case of EOF.
|
||||
@@ -107,7 +122,7 @@ impl FeMessage {
|
||||
///
|
||||
/// ```
|
||||
/// # use std::io;
|
||||
/// # use zenith_utils::pq_proto::FeMessage;
|
||||
/// # use utils::pq_proto::FeMessage;
|
||||
/// #
|
||||
/// # fn process_message(msg: FeMessage) -> anyhow::Result<()> {
|
||||
/// # Ok(())
|
||||
@@ -141,12 +156,12 @@ impl FeMessage {
|
||||
// Each libpq message begins with a message type byte, followed by message length
|
||||
// If the client closes the connection, return None. But if the client closes the
|
||||
// connection in the middle of a message, we will return an error.
|
||||
let tag = match stream.read_u8().await {
|
||||
let tag = match retry_read!(stream.read_u8().await) {
|
||||
Ok(b) => b,
|
||||
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
let len = stream.read_u32().await?;
|
||||
let len = retry_read!(stream.read_u32().await)?;
|
||||
|
||||
// The message length includes itself, so it better be at least 4
|
||||
let bodylen = len
|
||||
@@ -207,7 +222,7 @@ impl FeStartupPacket {
|
||||
// reading 4 bytes, to be precise), return None to indicate that the connection
|
||||
// was closed. This matches the PostgreSQL server's behavior, which avoids noise
|
||||
// in the log if the client opens connection but closes it immediately.
|
||||
let len = match stream.read_u32().await {
|
||||
let len = match retry_read!(stream.read_u32().await) {
|
||||
Ok(len) => len as usize,
|
||||
Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
|
||||
Err(e) => return Err(e.into()),
|
||||
@@ -217,7 +232,7 @@ impl FeStartupPacket {
|
||||
bail!("invalid message length");
|
||||
}
|
||||
|
||||
let request_code = stream.read_u32().await?;
|
||||
let request_code = retry_read!(stream.read_u32().await)?;
|
||||
|
||||
// the rest of startup packet are params
|
||||
let params_len = len - 8;
|
||||
@@ -401,7 +416,8 @@ fn read_null_terminated(buf: &mut Bytes) -> anyhow::Result<Bytes> {
|
||||
#[derive(Debug)]
|
||||
pub enum BeMessage<'a> {
|
||||
AuthenticationOk,
|
||||
AuthenticationMD5Password(&'a [u8; 4]),
|
||||
AuthenticationMD5Password([u8; 4]),
|
||||
AuthenticationSasl(BeAuthenticationSaslMessage<'a>),
|
||||
AuthenticationCleartextPassword,
|
||||
BackendKeyData(CancelKeyData),
|
||||
BindComplete,
|
||||
@@ -429,6 +445,13 @@ pub enum BeMessage<'a> {
|
||||
KeepAlive(WalSndKeepAlive),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BeAuthenticationSaslMessage<'a> {
|
||||
Methods(&'a [&'a str]),
|
||||
Continue(&'a [u8]),
|
||||
Final(&'a [u8]),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum BeParameterStatusMessage<'a> {
|
||||
Encoding(&'a str),
|
||||
@@ -480,6 +503,18 @@ impl RowDescriptor<'_> {
|
||||
formatcode: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn text_col(name: &[u8]) -> RowDescriptor {
|
||||
RowDescriptor {
|
||||
name,
|
||||
tableoid: 0,
|
||||
attnum: 0,
|
||||
typoid: TEXT_OID,
|
||||
typlen: -1,
|
||||
typmod: 0,
|
||||
formatcode: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -611,6 +646,32 @@ impl<'a> BeMessage<'a> {
|
||||
.unwrap(); // write into BytesMut can't fail
|
||||
}
|
||||
|
||||
BeMessage::AuthenticationSasl(msg) => {
|
||||
buf.put_u8(b'R');
|
||||
write_body(buf, |buf| {
|
||||
use BeAuthenticationSaslMessage::*;
|
||||
match msg {
|
||||
Methods(methods) => {
|
||||
buf.put_i32(10); // Specifies that SASL auth method is used.
|
||||
for method in methods.iter() {
|
||||
write_cstr(method.as_bytes(), buf)?;
|
||||
}
|
||||
buf.put_u8(0); // zero terminator for the list
|
||||
}
|
||||
Continue(extra) => {
|
||||
buf.put_i32(11); // Continue SASL auth.
|
||||
buf.put_slice(extra);
|
||||
}
|
||||
Final(extra) => {
|
||||
buf.put_i32(12); // Send final SASL message.
|
||||
buf.put_slice(extra);
|
||||
}
|
||||
}
|
||||
Ok::<_, io::Error>(())
|
||||
})
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
BeMessage::BackendKeyData(key_data) => {
|
||||
buf.put_u8(b'K');
|
||||
write_body(buf, |buf| {
|
||||
@@ -4,7 +4,7 @@ use std::{
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use rustls::Session;
|
||||
use rustls::Connection;
|
||||
|
||||
/// Wrapper supporting reads of a shared TcpStream.
|
||||
pub struct ArcTcpRead(Arc<TcpStream>);
|
||||
@@ -56,7 +56,7 @@ impl BufStream {
|
||||
|
||||
pub enum ReadStream {
|
||||
Tcp(BufReader<ArcTcpRead>),
|
||||
Tls(rustls_split::ReadHalf<rustls::ServerSession>),
|
||||
Tls(rustls_split::ReadHalf),
|
||||
}
|
||||
|
||||
impl io::Read for ReadStream {
|
||||
@@ -79,7 +79,7 @@ impl ReadStream {
|
||||
|
||||
pub enum WriteStream {
|
||||
Tcp(Arc<TcpStream>),
|
||||
Tls(rustls_split::WriteHalf<rustls::ServerSession>),
|
||||
Tls(rustls_split::WriteHalf),
|
||||
}
|
||||
|
||||
impl WriteStream {
|
||||
@@ -107,11 +107,11 @@ impl io::Write for WriteStream {
|
||||
}
|
||||
}
|
||||
|
||||
type TlsStream<T> = rustls::StreamOwned<rustls::ServerSession, T>;
|
||||
type TlsStream<T> = rustls::StreamOwned<rustls::ServerConnection, T>;
|
||||
|
||||
pub enum BidiStream {
|
||||
Tcp(BufStream),
|
||||
/// This variant is boxed, because [`rustls::ServerSession`] is quite larger than [`BufStream`].
|
||||
/// This variant is boxed, because [`rustls::ServerConnection`] is quite larger than [`BufStream`].
|
||||
Tls(Box<TlsStream<BufStream>>),
|
||||
}
|
||||
|
||||
@@ -127,7 +127,7 @@ impl BidiStream {
|
||||
if how == Shutdown::Read {
|
||||
tls_boxed.sock.get_ref().shutdown(how)
|
||||
} else {
|
||||
tls_boxed.sess.send_close_notify();
|
||||
tls_boxed.conn.send_close_notify();
|
||||
let res = tls_boxed.flush();
|
||||
tls_boxed.sock.get_ref().shutdown(how)?;
|
||||
res
|
||||
@@ -154,19 +154,23 @@ impl BidiStream {
|
||||
// TODO would be nice to avoid the Arc here
|
||||
let socket = Arc::try_unwrap(reader.into_inner().0).unwrap();
|
||||
|
||||
let (read_half, write_half) =
|
||||
rustls_split::split(socket, tls_boxed.sess, read_buf_cfg, write_buf_cfg);
|
||||
let (read_half, write_half) = rustls_split::split(
|
||||
socket,
|
||||
Connection::Server(tls_boxed.conn),
|
||||
read_buf_cfg,
|
||||
write_buf_cfg,
|
||||
);
|
||||
(ReadStream::Tls(read_half), WriteStream::Tls(write_half))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn start_tls(self, mut session: rustls::ServerSession) -> io::Result<Self> {
|
||||
pub fn start_tls(self, mut conn: rustls::ServerConnection) -> io::Result<Self> {
|
||||
match self {
|
||||
Self::Tcp(mut stream) => {
|
||||
session.complete_io(&mut stream)?;
|
||||
assert!(!session.is_handshaking());
|
||||
Ok(Self::Tls(Box::new(TlsStream::new(session, stream))))
|
||||
conn.complete_io(&mut stream)?;
|
||||
assert!(!conn.is_handshaking());
|
||||
Ok(Self::Tls(Box::new(TlsStream::new(conn, stream))))
|
||||
}
|
||||
Self::Tls { .. } => Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
@@ -29,7 +29,7 @@ impl<S, T: Future> SyncFuture<S, T> {
|
||||
/// Example:
|
||||
///
|
||||
/// ```
|
||||
/// # use zenith_utils::sync::SyncFuture;
|
||||
/// # use utils::sync::SyncFuture;
|
||||
/// # use std::future::Future;
|
||||
/// # use tokio::io::AsyncReadExt;
|
||||
/// #
|
||||
@@ -2,7 +2,7 @@ use bytes::{Buf, BytesMut};
|
||||
use hex_literal::hex;
|
||||
use serde::Deserialize;
|
||||
use std::io::Read;
|
||||
use zenith_utils::bin_ser::LeSer;
|
||||
use utils::bin_ser::LeSer;
|
||||
|
||||
#[derive(Debug, PartialEq, Deserialize)]
|
||||
pub struct HeaderData {
|
||||
@@ -8,9 +8,8 @@ use std::{
|
||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use lazy_static::lazy_static;
|
||||
use rustls::Session;
|
||||
|
||||
use zenith_utils::postgres_backend::{AuthType, Handler, PostgresBackend};
|
||||
use utils::postgres_backend::{AuthType, Handler, PostgresBackend};
|
||||
|
||||
fn make_tcp_pair() -> (TcpStream, TcpStream) {
|
||||
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
|
||||
@@ -23,11 +22,11 @@ fn make_tcp_pair() -> (TcpStream, TcpStream) {
|
||||
lazy_static! {
|
||||
static ref KEY: rustls::PrivateKey = {
|
||||
let mut cursor = Cursor::new(include_bytes!("key.pem"));
|
||||
rustls::internal::pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone()
|
||||
rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
|
||||
};
|
||||
static ref CERT: rustls::Certificate = {
|
||||
let mut cursor = Cursor::new(include_bytes!("cert.pem"));
|
||||
rustls::internal::pemfile::certs(&mut cursor).unwrap()[0].clone()
|
||||
rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
|
||||
};
|
||||
}
|
||||
|
||||
@@ -45,17 +44,23 @@ fn ssl() {
|
||||
let ssl_response = client_sock.read_u8().unwrap();
|
||||
assert_eq!(b'S', ssl_response);
|
||||
|
||||
let mut cfg = rustls::ClientConfig::new();
|
||||
cfg.root_store.add(&CERT).unwrap();
|
||||
let cfg = rustls::ClientConfig::builder()
|
||||
.with_safe_defaults()
|
||||
.with_root_certificates({
|
||||
let mut store = rustls::RootCertStore::empty();
|
||||
store.add(&CERT).unwrap();
|
||||
store
|
||||
})
|
||||
.with_no_client_auth();
|
||||
let client_config = Arc::new(cfg);
|
||||
|
||||
let dns_name = webpki::DNSNameRef::try_from_ascii_str("localhost").unwrap();
|
||||
let mut session = rustls::ClientSession::new(&client_config, dns_name);
|
||||
let dns_name = "localhost".try_into().unwrap();
|
||||
let mut conn = rustls::ClientConnection::new(client_config, dns_name).unwrap();
|
||||
|
||||
session.complete_io(&mut client_sock).unwrap();
|
||||
assert!(!session.is_handshaking());
|
||||
conn.complete_io(&mut client_sock).unwrap();
|
||||
assert!(!conn.is_handshaking());
|
||||
|
||||
let mut stream = rustls::Stream::new(&mut session, &mut client_sock);
|
||||
let mut stream = rustls::Stream::new(&mut conn, &mut client_sock);
|
||||
|
||||
// StartupMessage
|
||||
stream.write_u32::<BigEndian>(9).unwrap();
|
||||
@@ -105,8 +110,10 @@ fn ssl() {
|
||||
}
|
||||
let mut handler = TestHandler { got_query: false };
|
||||
|
||||
let mut cfg = rustls::ServerConfig::new(rustls::NoClientAuth::new());
|
||||
cfg.set_single_cert(vec![CERT.clone()], KEY.clone())
|
||||
let cfg = rustls::ServerConfig::builder()
|
||||
.with_safe_defaults()
|
||||
.with_no_client_auth()
|
||||
.with_single_cert(vec![CERT.clone()], KEY.clone())
|
||||
.unwrap();
|
||||
let tls_config = Some(Arc::new(cfg));
|
||||
|
||||
@@ -209,8 +216,10 @@ fn server_forces_ssl() {
|
||||
}
|
||||
let mut handler = TestHandler;
|
||||
|
||||
let mut cfg = rustls::ServerConfig::new(rustls::NoClientAuth::new());
|
||||
cfg.set_single_cert(vec![CERT.clone()], KEY.clone())
|
||||
let cfg = rustls::ServerConfig::builder()
|
||||
.with_safe_defaults()
|
||||
.with_no_client_auth()
|
||||
.with_single_cert(vec![CERT.clone()], KEY.clone())
|
||||
.unwrap();
|
||||
let tls_config = Some(Arc::new(cfg));
|
||||
|
||||
@@ -3,6 +3,14 @@ name = "pageserver"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[features]
|
||||
# It is simpler infra-wise to have failpoints enabled by default
|
||||
# It shouldnt affect perf in any way because failpoints
|
||||
# are not placed in hot code paths
|
||||
default = ["failpoints"]
|
||||
profiling = ["pprof"]
|
||||
failpoints = ["fail/failpoints"]
|
||||
|
||||
[dependencies]
|
||||
chrono = "0.4.19"
|
||||
rand = "0.8.3"
|
||||
@@ -14,15 +22,14 @@ hex = "0.4.3"
|
||||
hyper = "0.14"
|
||||
itertools = "0.10.3"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.14"
|
||||
clap = "3.0"
|
||||
daemonize = "0.4.1"
|
||||
tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
|
||||
tokio-util = { version = "0.7", features = ["io"] }
|
||||
postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="2949d98df52587d562986aad155dd4e889e408b7" }
|
||||
postgres-types = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
postgres-protocol = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||
tokio-stream = "0.1.8"
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
crc32c = "0.6.0"
|
||||
@@ -32,12 +39,14 @@ humantime = "2.1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_with = "1.12.0"
|
||||
humantime-serde = "1.1.1"
|
||||
|
||||
pprof = { git = "https://github.com/neondatabase/pprof-rs.git", branch = "wallclock-profiling", features = ["flamegraph"], optional = true }
|
||||
|
||||
toml_edit = { version = "0.13", features = ["easy"] }
|
||||
scopeguard = "1.1.0"
|
||||
const_format = "0.2.21"
|
||||
tracing = "0.1.27"
|
||||
tracing-futures = "0.2"
|
||||
signal-hook = "0.3.10"
|
||||
url = "2"
|
||||
nix = "0.23"
|
||||
@@ -48,11 +57,10 @@ fail = "0.5.0"
|
||||
rusoto_core = "0.47"
|
||||
rusoto_s3 = "0.47"
|
||||
async-trait = "0.1"
|
||||
async-compression = {version = "0.3", features = ["zstd", "tokio"]}
|
||||
|
||||
postgres_ffi = { path = "../postgres_ffi" }
|
||||
zenith_metrics = { path = "../zenith_metrics" }
|
||||
zenith_utils = { path = "../zenith_utils" }
|
||||
postgres_ffi = { path = "../libs/postgres_ffi" }
|
||||
metrics = { path = "../libs/metrics" }
|
||||
utils = { path = "../libs/utils" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -12,20 +12,20 @@
|
||||
//!
|
||||
use anyhow::{ensure, Context, Result};
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use log::*;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::sync::Arc;
|
||||
use std::time::SystemTime;
|
||||
use tar::{Builder, EntryType, Header};
|
||||
use tracing::*;
|
||||
|
||||
use crate::reltag::SlruKind;
|
||||
use crate::repository::Timeline;
|
||||
use crate::DatadirTimelineImpl;
|
||||
use postgres_ffi::xlog_utils::*;
|
||||
use postgres_ffi::*;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
/// This is short-living object only for the time of tarball creation,
|
||||
/// created mostly to avoid passing a lot of parameters between various functions
|
||||
@@ -154,9 +154,17 @@ impl<'a> Basebackup<'a> {
|
||||
let img = self
|
||||
.timeline
|
||||
.get_slru_page_at_lsn(slru, segno, blknum, self.lsn)?;
|
||||
ensure!(img.len() == pg_constants::BLCKSZ as usize);
|
||||
|
||||
slru_buf.extend_from_slice(&img);
|
||||
if slru == SlruKind::Clog {
|
||||
ensure!(
|
||||
img.len() == pg_constants::BLCKSZ as usize
|
||||
|| img.len() == pg_constants::BLCKSZ as usize + 8
|
||||
);
|
||||
} else {
|
||||
ensure!(img.len() == pg_constants::BLCKSZ as usize);
|
||||
}
|
||||
|
||||
slru_buf.extend_from_slice(&img[..pg_constants::BLCKSZ as usize]);
|
||||
}
|
||||
|
||||
let segname = format!("{}/{:>04X}", slru.to_str(), segno);
|
||||
|
||||
@@ -7,7 +7,7 @@ use pageserver::layered_repository::dump_layerfile_from_path;
|
||||
use pageserver::page_cache;
|
||||
use pageserver::virtual_file;
|
||||
use std::path::PathBuf;
|
||||
use zenith_utils::GIT_VERSION;
|
||||
use utils::GIT_VERSION;
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let arg_matches = App::new("Zenith dump_layerfile utility")
|
||||
|
||||
@@ -2,38 +2,45 @@
|
||||
|
||||
use std::{env, path::Path, str::FromStr};
|
||||
use tracing::*;
|
||||
use zenith_utils::{
|
||||
auth::JwtAuth,
|
||||
logging,
|
||||
postgres_backend::AuthType,
|
||||
tcp_listener,
|
||||
zid::{ZTenantId, ZTimelineId},
|
||||
GIT_VERSION,
|
||||
};
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
|
||||
use clap::{App, Arg};
|
||||
use daemonize::Daemonize;
|
||||
|
||||
use fail::FailScenario;
|
||||
use pageserver::{
|
||||
config::{defaults::*, PageServerConf},
|
||||
http, page_cache, page_service,
|
||||
remote_storage::{self, SyncStartupData},
|
||||
repository::{Repository, TimelineSyncStatusUpdate},
|
||||
tenant_mgr, thread_mgr,
|
||||
http, page_cache, page_service, profiling, tenant_mgr, thread_mgr,
|
||||
thread_mgr::ThreadKind,
|
||||
timelines, virtual_file, LOG_FILE_NAME,
|
||||
};
|
||||
use zenith_utils::http::endpoint;
|
||||
use zenith_utils::shutdown::exit_now;
|
||||
use zenith_utils::signals::{self, Signal};
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
http::endpoint,
|
||||
logging,
|
||||
postgres_backend::AuthType,
|
||||
shutdown::exit_now,
|
||||
signals::{self, Signal},
|
||||
tcp_listener,
|
||||
zid::{ZTenantId, ZTimelineId},
|
||||
GIT_VERSION,
|
||||
};
|
||||
|
||||
fn version() -> String {
|
||||
format!(
|
||||
"{} profiling:{} failpoints:{}",
|
||||
GIT_VERSION,
|
||||
cfg!(feature = "profiling"),
|
||||
fail::has_failpoints()
|
||||
)
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
zenith_metrics::set_common_metrics_prefix("pageserver");
|
||||
metrics::set_common_metrics_prefix("pageserver");
|
||||
let arg_matches = App::new("Zenith page server")
|
||||
.about("Materializes WAL stream to pages and serves them to the postgres")
|
||||
.version(GIT_VERSION)
|
||||
.version(&*version())
|
||||
.arg(
|
||||
Arg::new("daemonize")
|
||||
.short('d')
|
||||
@@ -78,8 +85,23 @@ fn main() -> anyhow::Result<()> {
|
||||
.help("Additional configuration overrides of the ones from the toml config file (or new ones to add there).
|
||||
Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("enabled-features")
|
||||
.long("enabled-features")
|
||||
.takes_value(false)
|
||||
.help("Show enabled compile time features"),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
if arg_matches.is_present("enabled-features") {
|
||||
let features: &[&str] = &[
|
||||
#[cfg(feature = "failpoints")]
|
||||
"failpoints",
|
||||
];
|
||||
println!("{{\"features\": {features:?} }}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let workdir = Path::new(arg_matches.value_of("workdir").unwrap_or(".zenith"));
|
||||
let workdir = workdir
|
||||
.canonicalize()
|
||||
@@ -160,6 +182,14 @@ fn main() -> anyhow::Result<()> {
|
||||
// as a ref.
|
||||
let conf: &'static PageServerConf = Box::leak(Box::new(conf));
|
||||
|
||||
// If failpoints are used, terminate the whole pageserver process if they are hit.
|
||||
let scenario = FailScenario::setup();
|
||||
if fail::has_failpoints() {
|
||||
std::panic::set_hook(Box::new(|_| {
|
||||
std::process::exit(1);
|
||||
}));
|
||||
}
|
||||
|
||||
// Basic initialization of things that don't change after startup
|
||||
virtual_file::init(conf.max_file_descriptors);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
@@ -175,10 +205,12 @@ fn main() -> anyhow::Result<()> {
|
||||
cfg_file_path.display()
|
||||
)
|
||||
})?;
|
||||
Ok(())
|
||||
} else {
|
||||
start_pageserver(conf, daemonize).context("Failed to start pageserver")
|
||||
start_pageserver(conf, daemonize).context("Failed to start pageserver")?;
|
||||
}
|
||||
|
||||
scenario.teardown();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()> {
|
||||
@@ -231,46 +263,8 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
|
||||
let signals = signals::install_shutdown_handlers()?;
|
||||
|
||||
// Initialize repositories with locally available timelines.
|
||||
// Timelines that are only partially available locally (remote storage has more data than this pageserver)
|
||||
// are scheduled for download and added to the repository once download is completed.
|
||||
let SyncStartupData {
|
||||
remote_index,
|
||||
local_timeline_init_statuses,
|
||||
} = remote_storage::start_local_timeline_sync(conf)
|
||||
.context("Failed to set up local files sync with external storage")?;
|
||||
|
||||
for (tenant_id, local_timeline_init_statuses) in local_timeline_init_statuses {
|
||||
// initialize local tenant
|
||||
let repo = tenant_mgr::load_local_repo(conf, tenant_id, &remote_index);
|
||||
for (timeline_id, init_status) in local_timeline_init_statuses {
|
||||
match init_status {
|
||||
remote_storage::LocalTimelineInitStatus::LocallyComplete => {
|
||||
debug!("timeline {} for tenant {} is locally complete, registering it in repository", tenant_id, timeline_id);
|
||||
// Lets fail here loudly to be on the safe side.
|
||||
// XXX: It may be a better api to actually distinguish between repository startup
|
||||
// and processing of newly downloaded timelines.
|
||||
repo.apply_timeline_remote_sync_status_update(
|
||||
timeline_id,
|
||||
TimelineSyncStatusUpdate::Downloaded,
|
||||
)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to bootstrap timeline {} for tenant {}",
|
||||
timeline_id, tenant_id
|
||||
)
|
||||
})?
|
||||
}
|
||||
remote_storage::LocalTimelineInitStatus::NeedsSync => {
|
||||
debug!(
|
||||
"timeline {} for tenant {} needs sync, \
|
||||
so skipped for adding into repository until sync is finished",
|
||||
tenant_id, timeline_id
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// start profiler (if enabled)
|
||||
let profiler_guard = profiling::init_profiler(conf);
|
||||
|
||||
// initialize authentication for incoming connections
|
||||
let auth = match &conf.auth_type {
|
||||
@@ -283,6 +277,8 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
};
|
||||
info!("Using auth: {:#?}", conf.auth_type);
|
||||
|
||||
let remote_index = tenant_mgr::init_tenant_mgr(conf)?;
|
||||
|
||||
// Spawn a new thread for the http endpoint
|
||||
// bind before launching separate thread so the error reported before startup exits
|
||||
let auth_cloned = auth.clone();
|
||||
@@ -293,7 +289,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
"http_endpoint_thread",
|
||||
false,
|
||||
move || {
|
||||
let router = http::make_router(conf, auth_cloned, remote_index);
|
||||
let router = http::make_router(conf, auth_cloned, remote_index)?;
|
||||
endpoint::serve_thread_main(router, http_listener, thread_mgr::shutdown_watcher())
|
||||
},
|
||||
)?;
|
||||
@@ -315,6 +311,7 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
"Got {}. Terminating in immediate shutdown mode",
|
||||
signal.name()
|
||||
);
|
||||
profiling::exit_profiler(conf, &profiler_guard);
|
||||
std::process::exit(111);
|
||||
}
|
||||
|
||||
@@ -323,7 +320,8 @@ fn start_pageserver(conf: &'static PageServerConf, daemonize: bool) -> Result<()
|
||||
"Got {}. Terminating gracefully in fast shutdown mode",
|
||||
signal.name()
|
||||
);
|
||||
pageserver::shutdown_pageserver();
|
||||
profiling::exit_profiler(conf, &profiler_guard);
|
||||
pageserver::shutdown_pageserver(0);
|
||||
unreachable!()
|
||||
}
|
||||
})
|
||||
|
||||
@@ -1,334 +0,0 @@
|
||||
//! A CLI helper to deal with remote storage (S3, usually) blobs as archives.
|
||||
//! See [`compression`] for more details about the archives.
|
||||
|
||||
use std::{collections::BTreeSet, path::Path};
|
||||
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use clap::{App, Arg};
|
||||
use pageserver::{
|
||||
layered_repository::metadata::{TimelineMetadata, METADATA_FILE_NAME},
|
||||
remote_storage::compression,
|
||||
};
|
||||
use tokio::{fs, io};
|
||||
use zenith_utils::GIT_VERSION;
|
||||
|
||||
const LIST_SUBCOMMAND: &str = "list";
|
||||
const ARCHIVE_ARG_NAME: &str = "archive";
|
||||
|
||||
const EXTRACT_SUBCOMMAND: &str = "extract";
|
||||
const TARGET_DIRECTORY_ARG_NAME: &str = "target_directory";
|
||||
|
||||
const CREATE_SUBCOMMAND: &str = "create";
|
||||
const SOURCE_DIRECTORY_ARG_NAME: &str = "source_directory";
|
||||
|
||||
#[tokio::main(flavor = "current_thread")]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let arg_matches = App::new("pageserver zst blob [un]compressor utility")
|
||||
.version(GIT_VERSION)
|
||||
.subcommands(vec![
|
||||
App::new(LIST_SUBCOMMAND)
|
||||
.about("List the archive contents")
|
||||
.arg(
|
||||
Arg::new(ARCHIVE_ARG_NAME)
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.help("An archive to list the contents of"),
|
||||
),
|
||||
App::new(EXTRACT_SUBCOMMAND)
|
||||
.about("Extracts the archive into the directory")
|
||||
.arg(
|
||||
Arg::new(ARCHIVE_ARG_NAME)
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.help("An archive to extract"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(TARGET_DIRECTORY_ARG_NAME)
|
||||
.required(false)
|
||||
.takes_value(true)
|
||||
.help("A directory to extract the archive into. Optional, will use the current directory if not specified"),
|
||||
),
|
||||
App::new(CREATE_SUBCOMMAND)
|
||||
.about("Creates an archive with the contents of a directory (only the first level files are taken, metadata file has to be present in the same directory)")
|
||||
.arg(
|
||||
Arg::new(SOURCE_DIRECTORY_ARG_NAME)
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.help("A directory to use for creating the archive"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new(TARGET_DIRECTORY_ARG_NAME)
|
||||
.required(false)
|
||||
.takes_value(true)
|
||||
.help("A directory to create the archive in. Optional, will use the current directory if not specified"),
|
||||
),
|
||||
])
|
||||
.get_matches();
|
||||
|
||||
let subcommand_name = match arg_matches.subcommand_name() {
|
||||
Some(name) => name,
|
||||
None => bail!("No subcommand specified"),
|
||||
};
|
||||
|
||||
let subcommand_matches = match arg_matches.subcommand_matches(subcommand_name) {
|
||||
Some(matches) => matches,
|
||||
None => bail!(
|
||||
"No subcommand arguments were recognized for subcommand '{}'",
|
||||
subcommand_name
|
||||
),
|
||||
};
|
||||
|
||||
let target_dir = Path::new(
|
||||
subcommand_matches
|
||||
.value_of(TARGET_DIRECTORY_ARG_NAME)
|
||||
.unwrap_or("./"),
|
||||
);
|
||||
|
||||
match subcommand_name {
|
||||
LIST_SUBCOMMAND => {
|
||||
let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
|
||||
Some(archive) => Path::new(archive),
|
||||
None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
|
||||
};
|
||||
list_archive(archive).await
|
||||
}
|
||||
EXTRACT_SUBCOMMAND => {
|
||||
let archive = match subcommand_matches.value_of(ARCHIVE_ARG_NAME) {
|
||||
Some(archive) => Path::new(archive),
|
||||
None => bail!("No '{}' argument is specified", ARCHIVE_ARG_NAME),
|
||||
};
|
||||
extract_archive(archive, target_dir).await
|
||||
}
|
||||
CREATE_SUBCOMMAND => {
|
||||
let source_dir = match subcommand_matches.value_of(SOURCE_DIRECTORY_ARG_NAME) {
|
||||
Some(source) => Path::new(source),
|
||||
None => bail!("No '{}' argument is specified", SOURCE_DIRECTORY_ARG_NAME),
|
||||
};
|
||||
create_archive(source_dir, target_dir).await
|
||||
}
|
||||
unknown => bail!("Unknown subcommand {}", unknown),
|
||||
}
|
||||
}
|
||||
|
||||
async fn list_archive(archive: &Path) -> anyhow::Result<()> {
|
||||
let archive = archive.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to get the absolute path for the archive path '{}'",
|
||||
archive.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
archive.is_file(),
|
||||
"Path '{}' is not an archive file",
|
||||
archive.display()
|
||||
);
|
||||
println!("Listing an archive at path '{}'", archive.display());
|
||||
let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
|
||||
Some(name) => name,
|
||||
None => bail!(
|
||||
"Failed to get the archive name from the path '{}'",
|
||||
archive.display()
|
||||
),
|
||||
};
|
||||
|
||||
let archive_bytes = fs::read(&archive)
|
||||
.await
|
||||
.context("Failed to read the archive bytes")?;
|
||||
|
||||
let header = compression::read_archive_header(archive_name, &mut archive_bytes.as_slice())
|
||||
.await
|
||||
.context("Failed to read the archive header")?;
|
||||
|
||||
let empty_path = Path::new("");
|
||||
println!("-------------------------------");
|
||||
|
||||
let longest_path_in_archive = header
|
||||
.files
|
||||
.iter()
|
||||
.filter_map(|file| Some(file.subpath.as_path(empty_path).to_str()?.len()))
|
||||
.max()
|
||||
.unwrap_or_default()
|
||||
.max(METADATA_FILE_NAME.len());
|
||||
|
||||
for regular_file in &header.files {
|
||||
println!(
|
||||
"File: {:width$} uncompressed size: {} bytes",
|
||||
regular_file.subpath.as_path(empty_path).display(),
|
||||
regular_file.size,
|
||||
width = longest_path_in_archive,
|
||||
)
|
||||
}
|
||||
println!(
|
||||
"File: {:width$} uncompressed size: {} bytes",
|
||||
METADATA_FILE_NAME,
|
||||
header.metadata_file_size,
|
||||
width = longest_path_in_archive,
|
||||
);
|
||||
println!("-------------------------------");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn extract_archive(archive: &Path, target_dir: &Path) -> anyhow::Result<()> {
|
||||
let archive = archive.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to get the absolute path for the archive path '{}'",
|
||||
archive.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
archive.is_file(),
|
||||
"Path '{}' is not an archive file",
|
||||
archive.display()
|
||||
);
|
||||
let archive_name = match archive.file_name().and_then(|name| name.to_str()) {
|
||||
Some(name) => name,
|
||||
None => bail!(
|
||||
"Failed to get the archive name from the path '{}'",
|
||||
archive.display()
|
||||
),
|
||||
};
|
||||
|
||||
if !target_dir.exists() {
|
||||
fs::create_dir_all(target_dir).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to create the target dir at path '{}'",
|
||||
target_dir.display()
|
||||
)
|
||||
})?;
|
||||
}
|
||||
let target_dir = target_dir.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to get the absolute path for the target dir path '{}'",
|
||||
target_dir.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
target_dir.is_dir(),
|
||||
"Path '{}' is not a directory",
|
||||
target_dir.display()
|
||||
);
|
||||
let mut dir_contents = fs::read_dir(&target_dir)
|
||||
.await
|
||||
.context("Failed to list the target directory contents")?;
|
||||
let dir_entry = dir_contents
|
||||
.next_entry()
|
||||
.await
|
||||
.context("Failed to list the target directory contents")?;
|
||||
ensure!(
|
||||
dir_entry.is_none(),
|
||||
"Target directory '{}' is not empty",
|
||||
target_dir.display()
|
||||
);
|
||||
|
||||
println!(
|
||||
"Extracting an archive at path '{}' into directory '{}'",
|
||||
archive.display(),
|
||||
target_dir.display()
|
||||
);
|
||||
|
||||
let mut archive_file = fs::File::open(&archive).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to get the archive name from the path '{}'",
|
||||
archive.display()
|
||||
)
|
||||
})?;
|
||||
let header = compression::read_archive_header(archive_name, &mut archive_file)
|
||||
.await
|
||||
.context("Failed to read the archive header")?;
|
||||
compression::uncompress_with_header(&BTreeSet::new(), &target_dir, header, &mut archive_file)
|
||||
.await
|
||||
.context("Failed to extract the archive")
|
||||
}
|
||||
|
||||
async fn create_archive(source_dir: &Path, target_dir: &Path) -> anyhow::Result<()> {
|
||||
let source_dir = source_dir.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to get the absolute path for the source dir path '{}'",
|
||||
source_dir.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
source_dir.is_dir(),
|
||||
"Path '{}' is not a directory",
|
||||
source_dir.display()
|
||||
);
|
||||
|
||||
if !target_dir.exists() {
|
||||
fs::create_dir_all(target_dir).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to create the target dir at path '{}'",
|
||||
target_dir.display()
|
||||
)
|
||||
})?;
|
||||
}
|
||||
let target_dir = target_dir.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to get the absolute path for the target dir path '{}'",
|
||||
target_dir.display()
|
||||
)
|
||||
})?;
|
||||
ensure!(
|
||||
target_dir.is_dir(),
|
||||
"Path '{}' is not a directory",
|
||||
target_dir.display()
|
||||
);
|
||||
|
||||
println!(
|
||||
"Compressing directory '{}' and creating resulting archive in directory '{}'",
|
||||
source_dir.display(),
|
||||
target_dir.display()
|
||||
);
|
||||
|
||||
let mut metadata_file_contents = None;
|
||||
let mut files_co_archive = Vec::new();
|
||||
|
||||
let mut source_dir_contents = fs::read_dir(&source_dir)
|
||||
.await
|
||||
.context("Failed to read the source directory contents")?;
|
||||
|
||||
while let Some(source_dir_entry) = source_dir_contents
|
||||
.next_entry()
|
||||
.await
|
||||
.context("Failed to read a source dir entry")?
|
||||
{
|
||||
let entry_path = source_dir_entry.path();
|
||||
if entry_path.is_file() {
|
||||
if entry_path.file_name().and_then(|name| name.to_str()) == Some(METADATA_FILE_NAME) {
|
||||
let metadata_bytes = fs::read(entry_path)
|
||||
.await
|
||||
.context("Failed to read metata file bytes in the source dir")?;
|
||||
metadata_file_contents = Some(
|
||||
TimelineMetadata::from_bytes(&metadata_bytes)
|
||||
.context("Failed to parse metata file contents in the source dir")?,
|
||||
);
|
||||
} else {
|
||||
files_co_archive.push(entry_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let metadata = match metadata_file_contents {
|
||||
Some(metadata) => metadata,
|
||||
None => bail!(
|
||||
"No metadata file found in the source dir '{}', cannot create the archive",
|
||||
source_dir.display()
|
||||
),
|
||||
};
|
||||
|
||||
let _ = compression::archive_files_as_stream(
|
||||
&source_dir,
|
||||
files_co_archive.iter(),
|
||||
&metadata,
|
||||
move |mut archive_streamer, archive_name| async move {
|
||||
let archive_target = target_dir.join(&archive_name);
|
||||
let mut archive_file = fs::File::create(&archive_target).await?;
|
||||
io::copy(&mut archive_streamer, &mut archive_file).await?;
|
||||
Ok(archive_target)
|
||||
},
|
||||
)
|
||||
.await
|
||||
.context("Failed to create an archive")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
75
pageserver/src/bin/replay.rs
Normal file
75
pageserver/src/bin/replay.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::Result;
|
||||
use postgres_ffi::{pg_constants::WAL_SEGMENT_SIZE, waldecoder::WalStreamDecoder};
|
||||
use utils::zid::{ZTenantId, ZTimelineId};
|
||||
use tokio::net::TcpStream;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
|
||||
struct PageServiceApi {
|
||||
stream: TcpStream,
|
||||
}
|
||||
|
||||
impl PageServiceApi {
|
||||
async fn connect(tenant: &ZTenantId, timeline: &ZTimelineId, connstr: &str) -> Result<Self> {
|
||||
let mut stream = TcpStream::connect("localhost:15000").await?;
|
||||
|
||||
// Connect to pageserver
|
||||
// TODO read host, port, dbname, user from command line
|
||||
let (client, conn) = tokio_postgres::Config::new()
|
||||
.host("127.0.0.1")
|
||||
.port(15000)
|
||||
.dbname("postgres")
|
||||
.user("zenith_admin")
|
||||
.connect_raw(&mut stream, tokio_postgres::NoTls)
|
||||
.await?;
|
||||
|
||||
let init_query = format!("callmemaybe {} {} {}", tenant, timeline, connstr);
|
||||
tokio::select! {
|
||||
_ = conn => panic!("connection closed during callmemaybe"),
|
||||
_ = client.query(init_query.as_str(), &[]) => (),
|
||||
};
|
||||
|
||||
Ok(Self { stream })
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
use clap::{App, Arg};
|
||||
let arg_matches = App::new("Replay")
|
||||
.arg(
|
||||
Arg::new("tenant")
|
||||
.long("tenant")
|
||||
.takes_value(true)
|
||||
)
|
||||
.arg(
|
||||
Arg::new("timeline")
|
||||
.long("timeline")
|
||||
.takes_value(true)
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let partial_path = "/home/bojan/tmp/sk_wal";
|
||||
let startpos = Lsn(23761464); // I got this by grepping sk log for "restart decoder"
|
||||
let xlogoff: usize = startpos.segment_offset(WAL_SEGMENT_SIZE);
|
||||
|
||||
let mut decoder = WalStreamDecoder::new(startpos);
|
||||
let bytes = std::fs::read(partial_path)?;
|
||||
decoder.feed_bytes(&bytes[xlogoff..(xlogoff+10000)]);
|
||||
|
||||
while let Some((lsn, rec)) = decoder.poll_decode()? {
|
||||
println!("lsn: {}", lsn);
|
||||
}
|
||||
|
||||
// TODO start replication server, get connstr
|
||||
|
||||
let tenant = ZTenantId::from_str(arg_matches.value_of("tenant").unwrap())?;
|
||||
let timeline = ZTimelineId::from_str(arg_matches.value_of("timeline").unwrap())?;
|
||||
let connstr = "lol";
|
||||
let mut api = PageServiceApi::connect(&tenant, &timeline, connstr).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -6,8 +6,7 @@ use clap::{App, Arg};
|
||||
use pageserver::layered_repository::metadata::TimelineMetadata;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use zenith_utils::lsn::Lsn;
|
||||
use zenith_utils::GIT_VERSION;
|
||||
use utils::{lsn::Lsn, GIT_VERSION};
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let arg_matches = App::new("Zenith update metadata utility")
|
||||
|
||||
@@ -4,22 +4,24 @@
|
||||
//! file, or on the command line.
|
||||
//! See also `settings.md` for better description on every parameter.
|
||||
|
||||
use anyhow::{bail, ensure, Context, Result};
|
||||
use toml_edit;
|
||||
use toml_edit::{Document, Item};
|
||||
use zenith_utils::postgres_backend::AuthType;
|
||||
use zenith_utils::zid::{ZNodeId, ZTenantId, ZTimelineId};
|
||||
|
||||
use std::convert::TryInto;
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use std::env;
|
||||
use std::num::{NonZeroU32, NonZeroUsize};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
use toml_edit;
|
||||
use toml_edit::{Document, Item};
|
||||
use utils::{
|
||||
postgres_backend::AuthType,
|
||||
zid::{ZNodeId, ZTenantId, ZTimelineId},
|
||||
};
|
||||
|
||||
use crate::layered_repository::TIMELINES_SEGMENT_NAME;
|
||||
use crate::tenant_config::{TenantConf, TenantConfOpt};
|
||||
|
||||
pub mod defaults {
|
||||
use crate::tenant_config::defaults::*;
|
||||
use const_format::formatcp;
|
||||
|
||||
pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
|
||||
@@ -27,27 +29,22 @@ pub mod defaults {
|
||||
pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
|
||||
pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
|
||||
|
||||
// FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
|
||||
// would be more appropriate. But a low value forces the code to be exercised more,
|
||||
// which is good for now to trigger bugs.
|
||||
// This parameter actually determines L0 layer file size.
|
||||
pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
|
||||
|
||||
// Target file size, when creating image and delta layers.
|
||||
// This parameter determines L1 layer file size.
|
||||
pub const DEFAULT_COMPACTION_TARGET_SIZE: u64 = 128 * 1024 * 1024;
|
||||
|
||||
pub const DEFAULT_COMPACTION_PERIOD: &str = "1 s";
|
||||
|
||||
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
|
||||
pub const DEFAULT_GC_PERIOD: &str = "100 s";
|
||||
|
||||
pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "60 s";
|
||||
pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s";
|
||||
|
||||
pub const DEFAULT_SUPERUSER: &str = "zenith_admin";
|
||||
pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC: usize = 10;
|
||||
/// How many different timelines can be processed simultaneously when synchronizing layers with the remote storage.
|
||||
/// During regular work, pageserver produces one layer file per timeline checkpoint, with bursts of concurrency
|
||||
/// during start (where local and remote timelines are compared and initial sync tasks are scheduled) and timeline attach.
|
||||
/// Both cases may trigger timeline download, that might download a lot of layers. This concurrency is limited by the clients internally, if needed.
|
||||
pub const DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_TIMELINES_SYNC: usize = 50;
|
||||
pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
|
||||
/// Currently, sync happens with AWS S3, that has two limits on requests per second:
|
||||
/// ~200 RPS for IAM services
|
||||
/// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html
|
||||
/// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
|
||||
/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
|
||||
pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
|
||||
|
||||
pub const DEFAULT_PAGE_CACHE_SIZE: usize = 8192;
|
||||
pub const DEFAULT_MAX_FILE_DESCRIPTORS: usize = 100;
|
||||
@@ -62,13 +59,6 @@ pub mod defaults {
|
||||
#listen_pg_addr = '{DEFAULT_PG_LISTEN_ADDR}'
|
||||
#listen_http_addr = '{DEFAULT_HTTP_LISTEN_ADDR}'
|
||||
|
||||
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
||||
#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
|
||||
#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
|
||||
|
||||
#gc_period = '{DEFAULT_GC_PERIOD}'
|
||||
#gc_horizon = {DEFAULT_GC_HORIZON}
|
||||
|
||||
#wait_lsn_timeout = '{DEFAULT_WAIT_LSN_TIMEOUT}'
|
||||
#wal_redo_timeout = '{DEFAULT_WAL_REDO_TIMEOUT}'
|
||||
|
||||
@@ -77,6 +67,17 @@ pub mod defaults {
|
||||
# initial superuser role name to use when creating a new tenant
|
||||
#initial_superuser_name = '{DEFAULT_SUPERUSER}'
|
||||
|
||||
# [tenant_config]
|
||||
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
||||
#compaction_target_size = {DEFAULT_COMPACTION_TARGET_SIZE} # in bytes
|
||||
#compaction_period = '{DEFAULT_COMPACTION_PERIOD}'
|
||||
#compaction_threshold = '{DEFAULT_COMPACTION_THRESHOLD}'
|
||||
|
||||
#gc_period = '{DEFAULT_GC_PERIOD}'
|
||||
#gc_horizon = {DEFAULT_GC_HORIZON}
|
||||
#image_creation_threshold = {DEFAULT_IMAGE_CREATION_THRESHOLD}
|
||||
#pitr_interval = '{DEFAULT_PITR_INTERVAL}'
|
||||
|
||||
# [remote_storage]
|
||||
|
||||
"###
|
||||
@@ -94,22 +95,6 @@ pub struct PageServerConf {
|
||||
/// Example (default): 127.0.0.1:9898
|
||||
pub listen_http_addr: String,
|
||||
|
||||
// Flush out an inmemory layer, if it's holding WAL older than this
|
||||
// This puts a backstop on how much WAL needs to be re-digested if the
|
||||
// page server crashes.
|
||||
// This parameter actually determines L0 layer file size.
|
||||
pub checkpoint_distance: u64,
|
||||
|
||||
// Target file size, when creating image and delta layers.
|
||||
// This parameter determines L1 layer file size.
|
||||
pub compaction_target_size: u64,
|
||||
|
||||
// How often to check if there's compaction work to be done.
|
||||
pub compaction_period: Duration,
|
||||
|
||||
pub gc_horizon: u64,
|
||||
pub gc_period: Duration,
|
||||
|
||||
// Timeout when waiting for WAL receiver to catch up to an LSN given in a GetPage@LSN call.
|
||||
pub wait_lsn_timeout: Duration,
|
||||
// How long to wait for WAL redo to complete.
|
||||
@@ -134,6 +119,28 @@ pub struct PageServerConf {
|
||||
|
||||
pub auth_validation_public_key_path: Option<PathBuf>,
|
||||
pub remote_storage_config: Option<RemoteStorageConfig>,
|
||||
|
||||
pub profiling: ProfilingConfig,
|
||||
pub default_tenant_conf: TenantConf,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ProfilingConfig {
|
||||
Disabled,
|
||||
PageRequests,
|
||||
}
|
||||
|
||||
impl FromStr for ProfilingConfig {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<ProfilingConfig, Self::Err> {
|
||||
let result = match s {
|
||||
"disabled" => ProfilingConfig::Disabled,
|
||||
"page_requests" => ProfilingConfig::PageRequests,
|
||||
_ => bail!("invalid value \"{s}\" for profiling option, valid values are \"disabled\" and \"page_requests\""),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
// use dedicated enum for builder to better indicate the intention
|
||||
@@ -158,14 +165,6 @@ struct PageServerConfigBuilder {
|
||||
|
||||
listen_http_addr: BuilderValue<String>,
|
||||
|
||||
checkpoint_distance: BuilderValue<u64>,
|
||||
|
||||
compaction_target_size: BuilderValue<u64>,
|
||||
compaction_period: BuilderValue<Duration>,
|
||||
|
||||
gc_horizon: BuilderValue<u64>,
|
||||
gc_period: BuilderValue<Duration>,
|
||||
|
||||
wait_lsn_timeout: BuilderValue<Duration>,
|
||||
wal_redo_timeout: BuilderValue<Duration>,
|
||||
|
||||
@@ -185,6 +184,8 @@ struct PageServerConfigBuilder {
|
||||
remote_storage_config: BuilderValue<Option<RemoteStorageConfig>>,
|
||||
|
||||
id: BuilderValue<ZNodeId>,
|
||||
|
||||
profiling: BuilderValue<ProfilingConfig>,
|
||||
}
|
||||
|
||||
impl Default for PageServerConfigBuilder {
|
||||
@@ -194,13 +195,6 @@ impl Default for PageServerConfigBuilder {
|
||||
Self {
|
||||
listen_pg_addr: Set(DEFAULT_PG_LISTEN_ADDR.to_string()),
|
||||
listen_http_addr: Set(DEFAULT_HTTP_LISTEN_ADDR.to_string()),
|
||||
checkpoint_distance: Set(DEFAULT_CHECKPOINT_DISTANCE),
|
||||
compaction_target_size: Set(DEFAULT_COMPACTION_TARGET_SIZE),
|
||||
compaction_period: Set(humantime::parse_duration(DEFAULT_COMPACTION_PERIOD)
|
||||
.expect("cannot parse default compaction period")),
|
||||
gc_horizon: Set(DEFAULT_GC_HORIZON),
|
||||
gc_period: Set(humantime::parse_duration(DEFAULT_GC_PERIOD)
|
||||
.expect("cannot parse default gc period")),
|
||||
wait_lsn_timeout: Set(humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT)
|
||||
.expect("cannot parse default wait lsn timeout")),
|
||||
wal_redo_timeout: Set(humantime::parse_duration(DEFAULT_WAL_REDO_TIMEOUT)
|
||||
@@ -216,6 +210,7 @@ impl Default for PageServerConfigBuilder {
|
||||
auth_validation_public_key_path: Set(None),
|
||||
remote_storage_config: Set(None),
|
||||
id: NotSet,
|
||||
profiling: Set(ProfilingConfig::Disabled),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -229,26 +224,6 @@ impl PageServerConfigBuilder {
|
||||
self.listen_http_addr = BuilderValue::Set(listen_http_addr)
|
||||
}
|
||||
|
||||
pub fn checkpoint_distance(&mut self, checkpoint_distance: u64) {
|
||||
self.checkpoint_distance = BuilderValue::Set(checkpoint_distance)
|
||||
}
|
||||
|
||||
pub fn compaction_target_size(&mut self, compaction_target_size: u64) {
|
||||
self.compaction_target_size = BuilderValue::Set(compaction_target_size)
|
||||
}
|
||||
|
||||
pub fn compaction_period(&mut self, compaction_period: Duration) {
|
||||
self.compaction_period = BuilderValue::Set(compaction_period)
|
||||
}
|
||||
|
||||
pub fn gc_horizon(&mut self, gc_horizon: u64) {
|
||||
self.gc_horizon = BuilderValue::Set(gc_horizon)
|
||||
}
|
||||
|
||||
pub fn gc_period(&mut self, gc_period: Duration) {
|
||||
self.gc_period = BuilderValue::Set(gc_period)
|
||||
}
|
||||
|
||||
pub fn wait_lsn_timeout(&mut self, wait_lsn_timeout: Duration) {
|
||||
self.wait_lsn_timeout = BuilderValue::Set(wait_lsn_timeout)
|
||||
}
|
||||
@@ -296,52 +271,46 @@ impl PageServerConfigBuilder {
|
||||
self.id = BuilderValue::Set(node_id)
|
||||
}
|
||||
|
||||
pub fn profiling(&mut self, profiling: ProfilingConfig) {
|
||||
self.profiling = BuilderValue::Set(profiling)
|
||||
}
|
||||
|
||||
pub fn build(self) -> Result<PageServerConf> {
|
||||
Ok(PageServerConf {
|
||||
listen_pg_addr: self
|
||||
.listen_pg_addr
|
||||
.ok_or(anyhow::anyhow!("missing listen_pg_addr"))?,
|
||||
.ok_or(anyhow!("missing listen_pg_addr"))?,
|
||||
listen_http_addr: self
|
||||
.listen_http_addr
|
||||
.ok_or(anyhow::anyhow!("missing listen_http_addr"))?,
|
||||
checkpoint_distance: self
|
||||
.checkpoint_distance
|
||||
.ok_or(anyhow::anyhow!("missing checkpoint_distance"))?,
|
||||
compaction_target_size: self
|
||||
.compaction_target_size
|
||||
.ok_or(anyhow::anyhow!("missing compaction_target_size"))?,
|
||||
compaction_period: self
|
||||
.compaction_period
|
||||
.ok_or(anyhow::anyhow!("missing compaction_period"))?,
|
||||
gc_horizon: self
|
||||
.gc_horizon
|
||||
.ok_or(anyhow::anyhow!("missing gc_horizon"))?,
|
||||
gc_period: self.gc_period.ok_or(anyhow::anyhow!("missing gc_period"))?,
|
||||
.ok_or(anyhow!("missing listen_http_addr"))?,
|
||||
wait_lsn_timeout: self
|
||||
.wait_lsn_timeout
|
||||
.ok_or(anyhow::anyhow!("missing wait_lsn_timeout"))?,
|
||||
.ok_or(anyhow!("missing wait_lsn_timeout"))?,
|
||||
wal_redo_timeout: self
|
||||
.wal_redo_timeout
|
||||
.ok_or(anyhow::anyhow!("missing wal_redo_timeout"))?,
|
||||
superuser: self.superuser.ok_or(anyhow::anyhow!("missing superuser"))?,
|
||||
.ok_or(anyhow!("missing wal_redo_timeout"))?,
|
||||
superuser: self.superuser.ok_or(anyhow!("missing superuser"))?,
|
||||
page_cache_size: self
|
||||
.page_cache_size
|
||||
.ok_or(anyhow::anyhow!("missing page_cache_size"))?,
|
||||
.ok_or(anyhow!("missing page_cache_size"))?,
|
||||
max_file_descriptors: self
|
||||
.max_file_descriptors
|
||||
.ok_or(anyhow::anyhow!("missing max_file_descriptors"))?,
|
||||
workdir: self.workdir.ok_or(anyhow::anyhow!("missing workdir"))?,
|
||||
.ok_or(anyhow!("missing max_file_descriptors"))?,
|
||||
workdir: self.workdir.ok_or(anyhow!("missing workdir"))?,
|
||||
pg_distrib_dir: self
|
||||
.pg_distrib_dir
|
||||
.ok_or(anyhow::anyhow!("missing pg_distrib_dir"))?,
|
||||
auth_type: self.auth_type.ok_or(anyhow::anyhow!("missing auth_type"))?,
|
||||
.ok_or(anyhow!("missing pg_distrib_dir"))?,
|
||||
auth_type: self.auth_type.ok_or(anyhow!("missing auth_type"))?,
|
||||
auth_validation_public_key_path: self
|
||||
.auth_validation_public_key_path
|
||||
.ok_or(anyhow::anyhow!("missing auth_validation_public_key_path"))?,
|
||||
.ok_or(anyhow!("missing auth_validation_public_key_path"))?,
|
||||
remote_storage_config: self
|
||||
.remote_storage_config
|
||||
.ok_or(anyhow::anyhow!("missing remote_storage_config"))?,
|
||||
id: self.id.ok_or(anyhow::anyhow!("missing id"))?,
|
||||
.ok_or(anyhow!("missing remote_storage_config"))?,
|
||||
id: self.id.ok_or(anyhow!("missing id"))?,
|
||||
profiling: self.profiling.ok_or(anyhow!("missing profiling"))?,
|
||||
// TenantConf is handled separately
|
||||
default_tenant_conf: TenantConf::default(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -350,7 +319,7 @@ impl PageServerConfigBuilder {
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct RemoteStorageConfig {
|
||||
/// Max allowed number of concurrent sync operations between pageserver and the remote storage.
|
||||
pub max_concurrent_sync: NonZeroUsize,
|
||||
pub max_concurrent_timelines_sync: NonZeroUsize,
|
||||
/// Max allowed errors before the sync task is considered failed and evicted.
|
||||
pub max_sync_errors: NonZeroU32,
|
||||
/// The storage connection configuration.
|
||||
@@ -391,6 +360,9 @@ pub struct S3Config {
|
||||
///
|
||||
/// Example: `http://127.0.0.1:5000`
|
||||
pub endpoint: Option<String>,
|
||||
/// AWS S3 has various limits on its API calls, we need not to exceed those.
|
||||
/// See [`defaults::DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT`] for more details.
|
||||
pub concurrency_limit: NonZeroUsize,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for S3Config {
|
||||
@@ -399,6 +371,7 @@ impl std::fmt::Debug for S3Config {
|
||||
.field("bucket_name", &self.bucket_name)
|
||||
.field("bucket_region", &self.bucket_region)
|
||||
.field("prefix_in_bucket", &self.prefix_in_bucket)
|
||||
.field("concurrency_limit", &self.concurrency_limit)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
@@ -444,17 +417,12 @@ impl PageServerConf {
|
||||
let mut builder = PageServerConfigBuilder::default();
|
||||
builder.workdir(workdir.to_owned());
|
||||
|
||||
let mut t_conf: TenantConfOpt = Default::default();
|
||||
|
||||
for (key, item) in toml.iter() {
|
||||
match key {
|
||||
"listen_pg_addr" => builder.listen_pg_addr(parse_toml_string(key, item)?),
|
||||
"listen_http_addr" => builder.listen_http_addr(parse_toml_string(key, item)?),
|
||||
"checkpoint_distance" => builder.checkpoint_distance(parse_toml_u64(key, item)?),
|
||||
"compaction_target_size" => {
|
||||
builder.compaction_target_size(parse_toml_u64(key, item)?)
|
||||
}
|
||||
"compaction_period" => builder.compaction_period(parse_toml_duration(key, item)?),
|
||||
"gc_horizon" => builder.gc_horizon(parse_toml_u64(key, item)?),
|
||||
"gc_period" => builder.gc_period(parse_toml_duration(key, item)?),
|
||||
"wait_lsn_timeout" => builder.wait_lsn_timeout(parse_toml_duration(key, item)?),
|
||||
"wal_redo_timeout" => builder.wal_redo_timeout(parse_toml_duration(key, item)?),
|
||||
"initial_superuser_name" => builder.superuser(parse_toml_string(key, item)?),
|
||||
@@ -468,12 +436,16 @@ impl PageServerConf {
|
||||
"auth_validation_public_key_path" => builder.auth_validation_public_key_path(Some(
|
||||
PathBuf::from(parse_toml_string(key, item)?),
|
||||
)),
|
||||
"auth_type" => builder.auth_type(parse_toml_auth_type(key, item)?),
|
||||
"auth_type" => builder.auth_type(parse_toml_from_str(key, item)?),
|
||||
"remote_storage" => {
|
||||
builder.remote_storage_config(Some(Self::parse_remote_storage_config(item)?))
|
||||
}
|
||||
"tenant_config" => {
|
||||
t_conf = Self::parse_toml_tenant_conf(item)?;
|
||||
}
|
||||
"id" => builder.id(ZNodeId(parse_toml_u64(key, item)?)),
|
||||
_ => bail!("unrecognized pageserver option '{}'", key),
|
||||
"profiling" => builder.profiling(parse_toml_from_str(key, item)?),
|
||||
_ => bail!("unrecognized pageserver option '{key}'"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -499,41 +471,75 @@ impl PageServerConf {
|
||||
);
|
||||
}
|
||||
|
||||
conf.default_tenant_conf = t_conf.merge(TenantConf::default());
|
||||
|
||||
Ok(conf)
|
||||
}
|
||||
|
||||
// subroutine of parse_and_validate to parse `[tenant_conf]` section
|
||||
|
||||
pub fn parse_toml_tenant_conf(item: &toml_edit::Item) -> Result<TenantConfOpt> {
|
||||
let mut t_conf: TenantConfOpt = Default::default();
|
||||
if let Some(checkpoint_distance) = item.get("checkpoint_distance") {
|
||||
t_conf.checkpoint_distance =
|
||||
Some(parse_toml_u64("checkpoint_distance", checkpoint_distance)?);
|
||||
}
|
||||
|
||||
if let Some(compaction_target_size) = item.get("compaction_target_size") {
|
||||
t_conf.compaction_target_size = Some(parse_toml_u64(
|
||||
"compaction_target_size",
|
||||
compaction_target_size,
|
||||
)?);
|
||||
}
|
||||
|
||||
if let Some(compaction_period) = item.get("compaction_period") {
|
||||
t_conf.compaction_period =
|
||||
Some(parse_toml_duration("compaction_period", compaction_period)?);
|
||||
}
|
||||
|
||||
if let Some(compaction_threshold) = item.get("compaction_threshold") {
|
||||
t_conf.compaction_threshold =
|
||||
Some(parse_toml_u64("compaction_threshold", compaction_threshold)?.try_into()?);
|
||||
}
|
||||
|
||||
if let Some(gc_horizon) = item.get("gc_horizon") {
|
||||
t_conf.gc_horizon = Some(parse_toml_u64("gc_horizon", gc_horizon)?);
|
||||
}
|
||||
|
||||
if let Some(gc_period) = item.get("gc_period") {
|
||||
t_conf.gc_period = Some(parse_toml_duration("gc_period", gc_period)?);
|
||||
}
|
||||
|
||||
if let Some(pitr_interval) = item.get("pitr_interval") {
|
||||
t_conf.pitr_interval = Some(parse_toml_duration("pitr_interval", pitr_interval)?);
|
||||
}
|
||||
|
||||
Ok(t_conf)
|
||||
}
|
||||
|
||||
/// subroutine of parse_config(), to parse the `[remote_storage]` table.
|
||||
fn parse_remote_storage_config(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
|
||||
let local_path = toml.get("local_path");
|
||||
let bucket_name = toml.get("bucket_name");
|
||||
let bucket_region = toml.get("bucket_region");
|
||||
|
||||
let max_concurrent_sync: NonZeroUsize = if let Some(s) = toml.get("max_concurrent_sync") {
|
||||
parse_toml_u64("max_concurrent_sync", s)
|
||||
.and_then(|toml_u64| {
|
||||
toml_u64.try_into().with_context(|| {
|
||||
format!("'max_concurrent_sync' value {} is too large", toml_u64)
|
||||
})
|
||||
})
|
||||
.ok()
|
||||
.and_then(NonZeroUsize::new)
|
||||
.context("'max_concurrent_sync' must be a non-zero positive integer")?
|
||||
} else {
|
||||
NonZeroUsize::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC).unwrap()
|
||||
};
|
||||
let max_sync_errors: NonZeroU32 = if let Some(s) = toml.get("max_sync_errors") {
|
||||
parse_toml_u64("max_sync_errors", s)
|
||||
.and_then(|toml_u64| {
|
||||
toml_u64.try_into().with_context(|| {
|
||||
format!("'max_sync_errors' value {} is too large", toml_u64)
|
||||
})
|
||||
})
|
||||
.ok()
|
||||
.and_then(NonZeroU32::new)
|
||||
.context("'max_sync_errors' must be a non-zero positive integer")?
|
||||
} else {
|
||||
NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS).unwrap()
|
||||
};
|
||||
let max_concurrent_timelines_sync = NonZeroUsize::new(
|
||||
parse_optional_integer("max_concurrent_timelines_sync", toml)?
|
||||
.unwrap_or(defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_TIMELINES_SYNC),
|
||||
)
|
||||
.context("Failed to parse 'max_concurrent_timelines_sync' as a positive integer")?;
|
||||
|
||||
let max_sync_errors = NonZeroU32::new(
|
||||
parse_optional_integer("max_sync_errors", toml)?
|
||||
.unwrap_or(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS),
|
||||
)
|
||||
.context("Failed to parse 'max_sync_errors' as a positive integer")?;
|
||||
|
||||
let concurrency_limit = NonZeroUsize::new(
|
||||
parse_optional_integer("concurrency_limit", toml)?
|
||||
.unwrap_or(defaults::DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT),
|
||||
)
|
||||
.context("Failed to parse 'concurrency_limit' as a positive integer")?;
|
||||
|
||||
let storage = match (local_path, bucket_name, bucket_region) {
|
||||
(None, None, None) => bail!("no 'local_path' nor 'bucket_name' option"),
|
||||
@@ -564,6 +570,7 @@ impl PageServerConf {
|
||||
.get("endpoint")
|
||||
.map(|endpoint| parse_toml_string("endpoint", endpoint))
|
||||
.transpose()?,
|
||||
concurrency_limit,
|
||||
}),
|
||||
(Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
|
||||
parse_toml_string("local_path", local_path)?,
|
||||
@@ -572,7 +579,7 @@ impl PageServerConf {
|
||||
};
|
||||
|
||||
Ok(RemoteStorageConfig {
|
||||
max_concurrent_sync,
|
||||
max_concurrent_timelines_sync,
|
||||
max_sync_errors,
|
||||
storage,
|
||||
})
|
||||
@@ -580,18 +587,13 @@ impl PageServerConf {
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn test_repo_dir(test_name: &str) -> PathBuf {
|
||||
PathBuf::from(format!("../tmp_check/test_{}", test_name))
|
||||
PathBuf::from(format!("../tmp_check/test_{test_name}"))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn dummy_conf(repo_dir: PathBuf) -> Self {
|
||||
PageServerConf {
|
||||
id: ZNodeId(0),
|
||||
checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
|
||||
compaction_target_size: 4 * 1024 * 1024,
|
||||
compaction_period: Duration::from_secs(10),
|
||||
gc_horizon: defaults::DEFAULT_GC_HORIZON,
|
||||
gc_period: Duration::from_secs(10),
|
||||
wait_lsn_timeout: Duration::from_secs(60),
|
||||
wal_redo_timeout: Duration::from_secs(60),
|
||||
page_cache_size: defaults::DEFAULT_PAGE_CACHE_SIZE,
|
||||
@@ -604,6 +606,8 @@ impl PageServerConf {
|
||||
auth_type: AuthType::Trust,
|
||||
auth_validation_public_key_path: None,
|
||||
remote_storage_config: None,
|
||||
profiling: ProfilingConfig::Disabled,
|
||||
default_tenant_conf: TenantConf::dummy_conf(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -613,7 +617,7 @@ impl PageServerConf {
|
||||
fn parse_toml_string(name: &str, item: &Item) -> Result<String> {
|
||||
let s = item
|
||||
.as_str()
|
||||
.with_context(|| format!("configure option {} is not a string", name))?;
|
||||
.with_context(|| format!("configure option {name} is not a string"))?;
|
||||
Ok(s.to_string())
|
||||
}
|
||||
|
||||
@@ -622,26 +626,46 @@ fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
|
||||
// for our use, though.
|
||||
let i: i64 = item
|
||||
.as_integer()
|
||||
.with_context(|| format!("configure option {} is not an integer", name))?;
|
||||
.with_context(|| format!("configure option {name} is not an integer"))?;
|
||||
if i < 0 {
|
||||
bail!("configure option {} cannot be negative", name);
|
||||
bail!("configure option {name} cannot be negative");
|
||||
}
|
||||
Ok(i as u64)
|
||||
}
|
||||
|
||||
fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
|
||||
where
|
||||
I: TryFrom<i64, Error = E>,
|
||||
E: std::error::Error + Send + Sync + 'static,
|
||||
{
|
||||
let toml_integer = match item.get(name) {
|
||||
Some(item) => item
|
||||
.as_integer()
|
||||
.with_context(|| format!("configure option {name} is not an integer"))?,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
I::try_from(toml_integer)
|
||||
.map(Some)
|
||||
.with_context(|| format!("configure option {name} is too large"))
|
||||
}
|
||||
|
||||
fn parse_toml_duration(name: &str, item: &Item) -> Result<Duration> {
|
||||
let s = item
|
||||
.as_str()
|
||||
.with_context(|| format!("configure option {} is not a string", name))?;
|
||||
.with_context(|| format!("configure option {name} is not a string"))?;
|
||||
|
||||
Ok(humantime::parse_duration(s)?)
|
||||
}
|
||||
|
||||
fn parse_toml_auth_type(name: &str, item: &Item) -> Result<AuthType> {
|
||||
fn parse_toml_from_str<T>(name: &str, item: &Item) -> Result<T>
|
||||
where
|
||||
T: FromStr<Err = anyhow::Error>,
|
||||
{
|
||||
let v = item
|
||||
.as_str()
|
||||
.with_context(|| format!("configure option {} is not a string", name))?;
|
||||
AuthType::from_str(v)
|
||||
.with_context(|| format!("configure option {name} is not a string"))?;
|
||||
T::from_str(v)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -658,14 +682,6 @@ mod tests {
|
||||
listen_pg_addr = '127.0.0.1:64000'
|
||||
listen_http_addr = '127.0.0.1:9898'
|
||||
|
||||
checkpoint_distance = 111 # in bytes
|
||||
|
||||
compaction_target_size = 111 # in bytes
|
||||
compaction_period = '111 s'
|
||||
|
||||
gc_period = '222 s'
|
||||
gc_horizon = 222
|
||||
|
||||
wait_lsn_timeout = '111 s'
|
||||
wal_redo_timeout = '111 s'
|
||||
|
||||
@@ -686,10 +702,8 @@ id = 10
|
||||
let config_string = format!("pg_distrib_dir='{}'\nid=10", pg_distrib_dir.display());
|
||||
let toml = config_string.parse()?;
|
||||
|
||||
let parsed_config =
|
||||
PageServerConf::parse_and_validate(&toml, &workdir).unwrap_or_else(|e| {
|
||||
panic!("Failed to parse config '{}', reason: {}", config_string, e)
|
||||
});
|
||||
let parsed_config = PageServerConf::parse_and_validate(&toml, &workdir)
|
||||
.unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e}"));
|
||||
|
||||
assert_eq!(
|
||||
parsed_config,
|
||||
@@ -697,11 +711,6 @@ id = 10
|
||||
id: ZNodeId(10),
|
||||
listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(),
|
||||
listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(),
|
||||
checkpoint_distance: defaults::DEFAULT_CHECKPOINT_DISTANCE,
|
||||
compaction_target_size: defaults::DEFAULT_COMPACTION_TARGET_SIZE,
|
||||
compaction_period: humantime::parse_duration(defaults::DEFAULT_COMPACTION_PERIOD)?,
|
||||
gc_horizon: defaults::DEFAULT_GC_HORIZON,
|
||||
gc_period: humantime::parse_duration(defaults::DEFAULT_GC_PERIOD)?,
|
||||
wait_lsn_timeout: humantime::parse_duration(defaults::DEFAULT_WAIT_LSN_TIMEOUT)?,
|
||||
wal_redo_timeout: humantime::parse_duration(defaults::DEFAULT_WAL_REDO_TIMEOUT)?,
|
||||
superuser: defaults::DEFAULT_SUPERUSER.to_string(),
|
||||
@@ -712,6 +721,8 @@ id = 10
|
||||
auth_type: AuthType::Trust,
|
||||
auth_validation_public_key_path: None,
|
||||
remote_storage_config: None,
|
||||
profiling: ProfilingConfig::Disabled,
|
||||
default_tenant_conf: TenantConf::default(),
|
||||
},
|
||||
"Correct defaults should be used when no config values are provided"
|
||||
);
|
||||
@@ -725,16 +736,13 @@ id = 10
|
||||
let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
|
||||
|
||||
let config_string = format!(
|
||||
"{}pg_distrib_dir='{}'",
|
||||
ALL_BASE_VALUES_TOML,
|
||||
"{ALL_BASE_VALUES_TOML}pg_distrib_dir='{}'",
|
||||
pg_distrib_dir.display()
|
||||
);
|
||||
let toml = config_string.parse()?;
|
||||
|
||||
let parsed_config =
|
||||
PageServerConf::parse_and_validate(&toml, &workdir).unwrap_or_else(|e| {
|
||||
panic!("Failed to parse config '{}', reason: {}", config_string, e)
|
||||
});
|
||||
let parsed_config = PageServerConf::parse_and_validate(&toml, &workdir)
|
||||
.unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e}"));
|
||||
|
||||
assert_eq!(
|
||||
parsed_config,
|
||||
@@ -742,11 +750,6 @@ id = 10
|
||||
id: ZNodeId(10),
|
||||
listen_pg_addr: "127.0.0.1:64000".to_string(),
|
||||
listen_http_addr: "127.0.0.1:9898".to_string(),
|
||||
checkpoint_distance: 111,
|
||||
compaction_target_size: 111,
|
||||
compaction_period: Duration::from_secs(111),
|
||||
gc_horizon: 222,
|
||||
gc_period: Duration::from_secs(222),
|
||||
wait_lsn_timeout: Duration::from_secs(111),
|
||||
wal_redo_timeout: Duration::from_secs(111),
|
||||
superuser: "zzzz".to_string(),
|
||||
@@ -757,6 +760,8 @@ id = 10
|
||||
auth_type: AuthType::Trust,
|
||||
auth_validation_public_key_path: None,
|
||||
remote_storage_config: None,
|
||||
profiling: ProfilingConfig::Disabled,
|
||||
default_tenant_conf: TenantConf::default(),
|
||||
},
|
||||
"Should be able to parse all basic config values correctly"
|
||||
);
|
||||
@@ -785,37 +790,33 @@ local_path = '{}'"#,
|
||||
|
||||
for remote_storage_config_str in identical_toml_declarations {
|
||||
let config_string = format!(
|
||||
r#"{}
|
||||
r#"{ALL_BASE_VALUES_TOML}
|
||||
pg_distrib_dir='{}'
|
||||
|
||||
{}"#,
|
||||
ALL_BASE_VALUES_TOML,
|
||||
{remote_storage_config_str}"#,
|
||||
pg_distrib_dir.display(),
|
||||
remote_storage_config_str,
|
||||
);
|
||||
|
||||
let toml = config_string.parse()?;
|
||||
|
||||
let parsed_remote_storage_config = PageServerConf::parse_and_validate(&toml, &workdir)
|
||||
.unwrap_or_else(|e| {
|
||||
panic!("Failed to parse config '{}', reason: {}", config_string, e)
|
||||
})
|
||||
.unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e}"))
|
||||
.remote_storage_config
|
||||
.expect("Should have remote storage config for the local FS");
|
||||
|
||||
assert_eq!(
|
||||
parsed_remote_storage_config,
|
||||
RemoteStorageConfig {
|
||||
max_concurrent_sync: NonZeroUsize::new(
|
||||
defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNC
|
||||
)
|
||||
.unwrap(),
|
||||
max_sync_errors: NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS)
|
||||
parsed_remote_storage_config,
|
||||
RemoteStorageConfig {
|
||||
max_concurrent_timelines_sync: NonZeroUsize::new(
|
||||
defaults::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_TIMELINES_SYNC
|
||||
)
|
||||
.unwrap(),
|
||||
storage: RemoteStorageKind::LocalFs(local_storage_path.clone()),
|
||||
},
|
||||
"Remote storage config should correctly parse the local FS config and fill other storage defaults"
|
||||
);
|
||||
max_sync_errors: NonZeroU32::new(defaults::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS)
|
||||
.unwrap(),
|
||||
storage: RemoteStorageKind::LocalFs(local_storage_path.clone()),
|
||||
},
|
||||
"Remote storage config should correctly parse the local FS config and fill other storage defaults"
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -831,52 +832,49 @@ pg_distrib_dir='{}'
|
||||
let access_key_id = "SOMEKEYAAAAASADSAH*#".to_string();
|
||||
let secret_access_key = "SOMEsEcReTsd292v".to_string();
|
||||
let endpoint = "http://localhost:5000".to_string();
|
||||
let max_concurrent_sync = NonZeroUsize::new(111).unwrap();
|
||||
let max_concurrent_timelines_sync = NonZeroUsize::new(111).unwrap();
|
||||
let max_sync_errors = NonZeroU32::new(222).unwrap();
|
||||
let s3_concurrency_limit = NonZeroUsize::new(333).unwrap();
|
||||
|
||||
let identical_toml_declarations = &[
|
||||
format!(
|
||||
r#"[remote_storage]
|
||||
max_concurrent_sync = {}
|
||||
max_sync_errors = {}
|
||||
bucket_name = '{}'
|
||||
bucket_region = '{}'
|
||||
prefix_in_bucket = '{}'
|
||||
access_key_id = '{}'
|
||||
secret_access_key = '{}'
|
||||
endpoint = '{}'"#,
|
||||
max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
|
||||
max_concurrent_timelines_sync = {max_concurrent_timelines_sync}
|
||||
max_sync_errors = {max_sync_errors}
|
||||
bucket_name = '{bucket_name}'
|
||||
bucket_region = '{bucket_region}'
|
||||
prefix_in_bucket = '{prefix_in_bucket}'
|
||||
access_key_id = '{access_key_id}'
|
||||
secret_access_key = '{secret_access_key}'
|
||||
endpoint = '{endpoint}'
|
||||
concurrency_limit = {s3_concurrency_limit}"#
|
||||
),
|
||||
format!(
|
||||
"remote_storage={{max_concurrent_sync={}, max_sync_errors={}, bucket_name='{}', bucket_region='{}', prefix_in_bucket='{}', access_key_id='{}', secret_access_key='{}', endpoint='{}'}}",
|
||||
max_concurrent_sync, max_sync_errors, bucket_name, bucket_region, prefix_in_bucket, access_key_id, secret_access_key, endpoint
|
||||
"remote_storage={{max_concurrent_timelines_sync={max_concurrent_timelines_sync}, max_sync_errors={max_sync_errors}, bucket_name='{bucket_name}',\
|
||||
bucket_region='{bucket_region}', prefix_in_bucket='{prefix_in_bucket}', access_key_id='{access_key_id}', secret_access_key='{secret_access_key}', endpoint='{endpoint}', concurrency_limit={s3_concurrency_limit}}}",
|
||||
),
|
||||
];
|
||||
|
||||
for remote_storage_config_str in identical_toml_declarations {
|
||||
let config_string = format!(
|
||||
r#"{}
|
||||
r#"{ALL_BASE_VALUES_TOML}
|
||||
pg_distrib_dir='{}'
|
||||
|
||||
{}"#,
|
||||
ALL_BASE_VALUES_TOML,
|
||||
{remote_storage_config_str}"#,
|
||||
pg_distrib_dir.display(),
|
||||
remote_storage_config_str,
|
||||
);
|
||||
|
||||
let toml = config_string.parse()?;
|
||||
|
||||
let parsed_remote_storage_config = PageServerConf::parse_and_validate(&toml, &workdir)
|
||||
.unwrap_or_else(|e| {
|
||||
panic!("Failed to parse config '{}', reason: {}", config_string, e)
|
||||
})
|
||||
.unwrap_or_else(|e| panic!("Failed to parse config '{config_string}', reason: {e}"))
|
||||
.remote_storage_config
|
||||
.expect("Should have remote storage config for S3");
|
||||
|
||||
assert_eq!(
|
||||
parsed_remote_storage_config,
|
||||
RemoteStorageConfig {
|
||||
max_concurrent_sync,
|
||||
max_concurrent_timelines_sync,
|
||||
max_sync_errors,
|
||||
storage: RemoteStorageKind::AwsS3(S3Config {
|
||||
bucket_name: bucket_name.clone(),
|
||||
@@ -884,7 +882,8 @@ pg_distrib_dir='{}'
|
||||
access_key_id: Some(access_key_id.clone()),
|
||||
secret_access_key: Some(secret_access_key.clone()),
|
||||
prefix_in_bucket: Some(prefix_in_bucket.clone()),
|
||||
endpoint: Some(endpoint.clone())
|
||||
endpoint: Some(endpoint.clone()),
|
||||
concurrency_limit: s3_concurrency_limit,
|
||||
}),
|
||||
},
|
||||
"Remote storage config should correctly parse the S3 config"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
use zenith_utils::{
|
||||
use utils::{
|
||||
lsn::Lsn,
|
||||
zid::{ZNodeId, ZTenantId, ZTimelineId},
|
||||
};
|
||||
@@ -20,11 +20,19 @@ pub struct TimelineCreateRequest {
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[derive(Serialize, Deserialize, Default)]
|
||||
pub struct TenantCreateRequest {
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub new_tenant_id: Option<ZTenantId>,
|
||||
pub checkpoint_distance: Option<u64>,
|
||||
pub compaction_target_size: Option<u64>,
|
||||
pub compaction_period: Option<String>,
|
||||
pub compaction_threshold: Option<usize>,
|
||||
pub gc_horizon: Option<u64>,
|
||||
pub gc_period: Option<String>,
|
||||
pub image_creation_threshold: Option<usize>,
|
||||
pub pitr_interval: Option<String>,
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
@@ -36,3 +44,44 @@ pub struct TenantCreateResponse(#[serde_as(as = "DisplayFromStr")] pub ZTenantId
|
||||
pub struct StatusResponse {
|
||||
pub id: ZNodeId,
|
||||
}
|
||||
|
||||
impl TenantCreateRequest {
|
||||
pub fn new(new_tenant_id: Option<ZTenantId>) -> TenantCreateRequest {
|
||||
TenantCreateRequest {
|
||||
new_tenant_id,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[serde_as]
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TenantConfigRequest {
|
||||
pub tenant_id: ZTenantId,
|
||||
#[serde(default)]
|
||||
#[serde_as(as = "Option<DisplayFromStr>")]
|
||||
pub checkpoint_distance: Option<u64>,
|
||||
pub compaction_target_size: Option<u64>,
|
||||
pub compaction_period: Option<String>,
|
||||
pub compaction_threshold: Option<usize>,
|
||||
pub gc_horizon: Option<u64>,
|
||||
pub gc_period: Option<String>,
|
||||
pub image_creation_threshold: Option<usize>,
|
||||
pub pitr_interval: Option<String>,
|
||||
}
|
||||
|
||||
impl TenantConfigRequest {
|
||||
pub fn new(tenant_id: ZTenantId) -> TenantConfigRequest {
|
||||
TenantConfigRequest {
|
||||
tenant_id,
|
||||
checkpoint_distance: None,
|
||||
compaction_target_size: None,
|
||||
compaction_period: None,
|
||||
compaction_threshold: None,
|
||||
gc_horizon: None,
|
||||
gc_period: None,
|
||||
image_creation_threshold: None,
|
||||
pitr_interval: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -328,11 +328,7 @@ paths:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
new_tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
$ref: "#/components/schemas/TenantCreateInfo"
|
||||
responses:
|
||||
"201":
|
||||
description: New tenant created successfully
|
||||
@@ -371,7 +367,48 @@ paths:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/config:
|
||||
put:
|
||||
description: |
|
||||
Update tenant's config.
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/TenantConfigInfo"
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/TenantInfo"
|
||||
"400":
|
||||
description: Malformed tenant config request
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
components:
|
||||
securitySchemes:
|
||||
JWT:
|
||||
@@ -389,6 +426,45 @@ components:
|
||||
type: string
|
||||
state:
|
||||
type: string
|
||||
TenantCreateInfo:
|
||||
type: object
|
||||
properties:
|
||||
new_tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
gc_period:
|
||||
type: string
|
||||
gc_horizon:
|
||||
type: integer
|
||||
pitr_interval:
|
||||
type: string
|
||||
checkpoint_distance:
|
||||
type: integer
|
||||
compaction_period:
|
||||
type: string
|
||||
compaction_threshold:
|
||||
type: string
|
||||
TenantConfigInfo:
|
||||
type: object
|
||||
properties:
|
||||
tenant_id:
|
||||
type: string
|
||||
format: hex
|
||||
gc_period:
|
||||
type: string
|
||||
gc_horizon:
|
||||
type: integer
|
||||
pitr_interval:
|
||||
type: string
|
||||
checkpoint_distance:
|
||||
type: integer
|
||||
compaction_period:
|
||||
type: string
|
||||
compaction_threshold:
|
||||
type: string
|
||||
TimelineInfo:
|
||||
type: object
|
||||
required:
|
||||
@@ -409,6 +485,7 @@ components:
|
||||
type: object
|
||||
required:
|
||||
- awaits_download
|
||||
- remote_consistent_lsn
|
||||
properties:
|
||||
awaits_download:
|
||||
type: boolean
|
||||
|
||||
@@ -1,36 +1,45 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{Context, Result};
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response, Uri};
|
||||
use tracing::*;
|
||||
use zenith_utils::auth::JwtAuth;
|
||||
use zenith_utils::http::endpoint::attach_openapi_ui;
|
||||
use zenith_utils::http::endpoint::auth_middleware;
|
||||
use zenith_utils::http::endpoint::check_permission;
|
||||
use zenith_utils::http::error::ApiError;
|
||||
use zenith_utils::http::{
|
||||
endpoint,
|
||||
error::HttpErrorBody,
|
||||
json::{json_request, json_response},
|
||||
request::parse_request_param,
|
||||
};
|
||||
use zenith_utils::http::{RequestExt, RouterBuilder};
|
||||
use zenith_utils::zid::{ZTenantTimelineId, ZTimelineId};
|
||||
|
||||
use super::models::{
|
||||
StatusResponse, TenantCreateRequest, TenantCreateResponse, TimelineCreateRequest,
|
||||
StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse,
|
||||
TimelineCreateRequest,
|
||||
};
|
||||
use crate::config::RemoteStorageKind;
|
||||
use crate::remote_storage::{
|
||||
download_index_part, schedule_timeline_download, LocalFs, RemoteIndex, RemoteTimeline, S3Bucket,
|
||||
};
|
||||
use crate::remote_storage::{schedule_timeline_download, RemoteIndex};
|
||||
use crate::repository::Repository;
|
||||
use crate::tenant_config::TenantConfOpt;
|
||||
use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
|
||||
use crate::{config::PageServerConf, tenant_mgr, timelines, ZTenantId};
|
||||
use crate::{config::PageServerConf, tenant_mgr, timelines};
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
http::{
|
||||
endpoint::{self, attach_openapi_ui, auth_middleware, check_permission},
|
||||
error::{ApiError, HttpErrorBody},
|
||||
json::{json_request, json_response},
|
||||
request::parse_request_param,
|
||||
RequestExt, RouterBuilder,
|
||||
},
|
||||
zid::{ZTenantId, ZTenantTimelineId, ZTimelineId},
|
||||
};
|
||||
|
||||
struct State {
|
||||
conf: &'static PageServerConf,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
remote_index: RemoteIndex,
|
||||
allowlist_routes: Vec<Uri>,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
}
|
||||
|
||||
enum GenericRemoteStorage {
|
||||
Local(LocalFs),
|
||||
S3(S3Bucket),
|
||||
}
|
||||
|
||||
impl State {
|
||||
@@ -38,17 +47,34 @@ impl State {
|
||||
conf: &'static PageServerConf,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
remote_index: RemoteIndex,
|
||||
) -> Self {
|
||||
) -> anyhow::Result<Self> {
|
||||
let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml"]
|
||||
.iter()
|
||||
.map(|v| v.parse().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
Self {
|
||||
// Note that this remote storage is created separately from the main one in the sync_loop.
|
||||
// It's fine since it's stateless and some code duplication saves us from bloating the code around with generics.
|
||||
let remote_storage = conf
|
||||
.remote_storage_config
|
||||
.as_ref()
|
||||
.map(|storage_config| match &storage_config.storage {
|
||||
RemoteStorageKind::LocalFs(root) => {
|
||||
LocalFs::new(root.clone(), &conf.workdir).map(GenericRemoteStorage::Local)
|
||||
}
|
||||
RemoteStorageKind::AwsS3(s3_config) => {
|
||||
S3Bucket::new(s3_config, &conf.workdir).map(GenericRemoteStorage::S3)
|
||||
}
|
||||
})
|
||||
.transpose()
|
||||
.context("Failed to init generic remote storage")?;
|
||||
|
||||
Ok(Self {
|
||||
conf,
|
||||
auth,
|
||||
allowlist_routes,
|
||||
remote_index,
|
||||
}
|
||||
remote_storage,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,8 +148,8 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
timeline_id,
|
||||
})
|
||||
.map(|remote_entry| RemoteTimelineInfo {
|
||||
remote_consistent_lsn: remote_entry.disk_consistent_lsn(),
|
||||
awaits_download: remote_entry.get_awaits_download(),
|
||||
remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
|
||||
awaits_download: remote_entry.awaits_download,
|
||||
}),
|
||||
})
|
||||
}
|
||||
@@ -153,43 +179,47 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let include_non_incremental_logical_size = get_include_non_incremental_logical_size(&request);
|
||||
|
||||
let span = info_span!("timeline_detail_handler", tenant = %tenant_id, timeline = %timeline_id);
|
||||
let (local_timeline_info, remote_timeline_info) = async {
|
||||
// any error here will render local timeline as None
|
||||
// XXX .in_current_span does not attach messages in spawn_blocking future to current future's span
|
||||
let local_timeline_info = tokio::task::spawn_blocking(move || {
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||
let local_timeline = {
|
||||
repo.get_timeline(timeline_id)
|
||||
.as_ref()
|
||||
.map(|timeline| {
|
||||
LocalTimelineInfo::from_repo_timeline(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
timeline,
|
||||
include_non_incremental_logical_size,
|
||||
)
|
||||
})
|
||||
.transpose()?
|
||||
};
|
||||
Ok::<_, anyhow::Error>(local_timeline)
|
||||
})
|
||||
.await
|
||||
.ok()
|
||||
.and_then(|r| r.ok())
|
||||
.flatten();
|
||||
|
||||
let (local_timeline_info, span) = tokio::task::spawn_blocking(move || {
|
||||
let entered = span.entered();
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||
let local_timeline = {
|
||||
repo.get_timeline(timeline_id)
|
||||
.as_ref()
|
||||
.map(|timeline| {
|
||||
LocalTimelineInfo::from_repo_timeline(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
timeline,
|
||||
include_non_incremental_logical_size,
|
||||
)
|
||||
let remote_timeline_info = {
|
||||
let remote_index_read = get_state(&request).remote_index.read().await;
|
||||
remote_index_read
|
||||
.timeline_entry(&ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
})
|
||||
.map(|remote_entry| RemoteTimelineInfo {
|
||||
remote_consistent_lsn: remote_entry.metadata.disk_consistent_lsn(),
|
||||
awaits_download: remote_entry.awaits_download,
|
||||
})
|
||||
.transpose()?
|
||||
};
|
||||
Ok::<_, anyhow::Error>((local_timeline, entered.exit()))
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
let remote_timeline_info = {
|
||||
let remote_index_read = get_state(&request).remote_index.read().await;
|
||||
remote_index_read
|
||||
.timeline_entry(&ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
})
|
||||
.map(|remote_entry| RemoteTimelineInfo {
|
||||
remote_consistent_lsn: remote_entry.disk_consistent_lsn(),
|
||||
awaits_download: remote_entry.get_awaits_download(),
|
||||
})
|
||||
};
|
||||
|
||||
let _enter = span.entered();
|
||||
(local_timeline_info, remote_timeline_info)
|
||||
}
|
||||
.instrument(info_span!("timeline_detail_handler", tenant = %tenant_id, timeline = %timeline_id))
|
||||
.await;
|
||||
|
||||
if local_timeline_info.is_none() && remote_timeline_info.is_none() {
|
||||
return Err(ApiError::NotFound(
|
||||
@@ -212,41 +242,105 @@ async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
let span = info_span!("timeline_attach_handler", tenant = %tenant_id, timeline = %timeline_id);
|
||||
info!(
|
||||
"Handling timeline {} attach for tenant: {}",
|
||||
timeline_id, tenant_id,
|
||||
);
|
||||
|
||||
let span = tokio::task::spawn_blocking(move || {
|
||||
let entered = span.entered();
|
||||
if tenant_mgr::get_timeline_for_tenant_load(tenant_id, timeline_id).is_ok() {
|
||||
tokio::task::spawn_blocking(move || {
|
||||
if tenant_mgr::get_local_timeline_with_load(tenant_id, timeline_id).is_ok() {
|
||||
// TODO: maybe answer with 309 Not Modified here?
|
||||
anyhow::bail!("Timeline is already present locally")
|
||||
};
|
||||
Ok(entered.exit())
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
let mut remote_index_write = get_state(&request).remote_index.write().await;
|
||||
let sync_id = ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
};
|
||||
let state = get_state(&request);
|
||||
let remote_index = &state.remote_index;
|
||||
|
||||
let _enter = span.entered(); // entered guard cannot live across awaits (non Send)
|
||||
let index_entry = remote_index_write
|
||||
.timeline_entry_mut(&ZTenantTimelineId {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
})
|
||||
.ok_or_else(|| ApiError::NotFound("Unknown remote timeline".to_string()))?;
|
||||
let mut index_accessor = remote_index.write().await;
|
||||
if let Some(remote_timeline) = index_accessor.timeline_entry_mut(&sync_id) {
|
||||
if remote_timeline.awaits_download {
|
||||
return Err(ApiError::Conflict(
|
||||
"Timeline download is already in progress".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if index_entry.get_awaits_download() {
|
||||
return Err(ApiError::Conflict(
|
||||
"Timeline download is already in progress".to_string(),
|
||||
));
|
||||
remote_timeline.awaits_download = true;
|
||||
schedule_timeline_download(tenant_id, timeline_id);
|
||||
return json_response(StatusCode::ACCEPTED, ());
|
||||
} else {
|
||||
// no timeline in the index, release the lock to make the potentially lengthy download opetation
|
||||
drop(index_accessor);
|
||||
}
|
||||
|
||||
index_entry.set_awaits_download(true);
|
||||
schedule_timeline_download(tenant_id, timeline_id);
|
||||
let new_timeline = match try_download_shard_data(state, sync_id).await {
|
||||
Ok(Some(mut new_timeline)) => {
|
||||
tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
|
||||
.await
|
||||
.context("Failed to create new timeline directory")?;
|
||||
new_timeline.awaits_download = true;
|
||||
new_timeline
|
||||
}
|
||||
Ok(None) => return Err(ApiError::NotFound("Unknown remote timeline".to_string())),
|
||||
Err(e) => {
|
||||
error!("Failed to retrieve remote timeline data: {:?}", e);
|
||||
return Err(ApiError::NotFound(
|
||||
"Failed to retrieve remote timeline".to_string(),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let mut index_accessor = remote_index.write().await;
|
||||
match index_accessor.timeline_entry_mut(&sync_id) {
|
||||
Some(remote_timeline) => {
|
||||
if remote_timeline.awaits_download {
|
||||
return Err(ApiError::Conflict(
|
||||
"Timeline download is already in progress".to_string(),
|
||||
));
|
||||
}
|
||||
remote_timeline.awaits_download = true;
|
||||
}
|
||||
None => index_accessor.add_timeline_entry(sync_id, new_timeline),
|
||||
}
|
||||
schedule_timeline_download(tenant_id, timeline_id);
|
||||
json_response(StatusCode::ACCEPTED, ())
|
||||
}
|
||||
|
||||
async fn try_download_shard_data(
|
||||
state: &State,
|
||||
sync_id: ZTenantTimelineId,
|
||||
) -> anyhow::Result<Option<RemoteTimeline>> {
|
||||
let shard = match state.remote_storage.as_ref() {
|
||||
Some(GenericRemoteStorage::Local(local_storage)) => {
|
||||
download_index_part(state.conf, local_storage, sync_id).await
|
||||
}
|
||||
Some(GenericRemoteStorage::S3(s3_storage)) => {
|
||||
download_index_part(state.conf, s3_storage, sync_id).await
|
||||
}
|
||||
None => return Ok(None),
|
||||
}
|
||||
.with_context(|| format!("Failed to download index shard for timeline {}", sync_id))?;
|
||||
|
||||
let timeline_path = state
|
||||
.conf
|
||||
.timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
|
||||
RemoteTimeline::from_index_part(&timeline_path, shard)
|
||||
.map(Some)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to convert index shard into remote timeline for timeline {}",
|
||||
sync_id
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
@@ -257,8 +351,8 @@ async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body
|
||||
let _enter =
|
||||
info_span!("timeline_detach_handler", tenant = %tenant_id, timeline = %timeline_id)
|
||||
.entered();
|
||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||
repo.detach_timeline(timeline_id)
|
||||
let state = get_state(&request);
|
||||
tenant_mgr::detach_timeline(state.conf, tenant_id, timeline_id)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
@@ -275,7 +369,7 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
crate::tenant_mgr::list_tenants()
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
.map_err(ApiError::from_err)?;
|
||||
|
||||
json_response(StatusCode::OK, response_data)
|
||||
}
|
||||
@@ -287,6 +381,28 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
let request_data: TenantCreateRequest = json_request(&mut request).await?;
|
||||
let remote_index = get_state(&request).remote_index.clone();
|
||||
|
||||
let mut tenant_conf = TenantConfOpt::default();
|
||||
if let Some(gc_period) = request_data.gc_period {
|
||||
tenant_conf.gc_period =
|
||||
Some(humantime::parse_duration(&gc_period).map_err(ApiError::from_err)?);
|
||||
}
|
||||
tenant_conf.gc_horizon = request_data.gc_horizon;
|
||||
tenant_conf.image_creation_threshold = request_data.image_creation_threshold;
|
||||
|
||||
if let Some(pitr_interval) = request_data.pitr_interval {
|
||||
tenant_conf.pitr_interval =
|
||||
Some(humantime::parse_duration(&pitr_interval).map_err(ApiError::from_err)?);
|
||||
}
|
||||
|
||||
tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
|
||||
tenant_conf.compaction_target_size = request_data.compaction_target_size;
|
||||
tenant_conf.compaction_threshold = request_data.compaction_threshold;
|
||||
|
||||
if let Some(compaction_period) = request_data.compaction_period {
|
||||
tenant_conf.compaction_period =
|
||||
Some(humantime::parse_duration(&compaction_period).map_err(ApiError::from_err)?);
|
||||
}
|
||||
|
||||
let target_tenant_id = request_data
|
||||
.new_tenant_id
|
||||
.map(ZTenantId::from)
|
||||
@@ -294,8 +410,9 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
|
||||
let new_tenant_id = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_create", tenant = ?target_tenant_id).entered();
|
||||
let conf = get_config(&request);
|
||||
|
||||
tenant_mgr::create_tenant_repository(get_config(&request), target_tenant_id, remote_index)
|
||||
tenant_mgr::create_tenant_repository(conf, tenant_conf, target_tenant_id, remote_index)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
@@ -306,6 +423,45 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
})
|
||||
}
|
||||
|
||||
async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let request_data: TenantConfigRequest = json_request(&mut request).await?;
|
||||
let tenant_id = request_data.tenant_id;
|
||||
// check for management permission
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let mut tenant_conf: TenantConfOpt = Default::default();
|
||||
if let Some(gc_period) = request_data.gc_period {
|
||||
tenant_conf.gc_period =
|
||||
Some(humantime::parse_duration(&gc_period).map_err(ApiError::from_err)?);
|
||||
}
|
||||
tenant_conf.gc_horizon = request_data.gc_horizon;
|
||||
tenant_conf.image_creation_threshold = request_data.image_creation_threshold;
|
||||
|
||||
if let Some(pitr_interval) = request_data.pitr_interval {
|
||||
tenant_conf.pitr_interval =
|
||||
Some(humantime::parse_duration(&pitr_interval).map_err(ApiError::from_err)?);
|
||||
}
|
||||
|
||||
tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
|
||||
tenant_conf.compaction_target_size = request_data.compaction_target_size;
|
||||
tenant_conf.compaction_threshold = request_data.compaction_threshold;
|
||||
|
||||
if let Some(compaction_period) = request_data.compaction_period {
|
||||
tenant_conf.compaction_period =
|
||||
Some(humantime::parse_duration(&compaction_period).map_err(ApiError::from_err)?);
|
||||
}
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_config", tenant = ?tenant_id).entered();
|
||||
|
||||
tenant_mgr::update_tenant_config(tenant_conf, tenant_id)
|
||||
})
|
||||
.await
|
||||
.map_err(ApiError::from_err)??;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
json_response(
|
||||
StatusCode::NOT_FOUND,
|
||||
@@ -317,7 +473,7 @@ pub fn make_router(
|
||||
conf: &'static PageServerConf,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
remote_index: RemoteIndex,
|
||||
) -> RouterBuilder<hyper::Body, ApiError> {
|
||||
) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
|
||||
let spec = include_bytes!("openapi_spec.yml");
|
||||
let mut router = attach_openapi_ui(endpoint::make_router(), spec, "/swagger.yml", "/v1/doc");
|
||||
if auth.is_some() {
|
||||
@@ -331,11 +487,14 @@ pub fn make_router(
|
||||
}))
|
||||
}
|
||||
|
||||
router
|
||||
.data(Arc::new(State::new(conf, auth, remote_index)))
|
||||
Ok(router
|
||||
.data(Arc::new(
|
||||
State::new(conf, auth, remote_index).context("Failed to initialize router state")?,
|
||||
))
|
||||
.get("/v1/status", status_handler)
|
||||
.get("/v1/tenant", tenant_list_handler)
|
||||
.post("/v1/tenant", tenant_create_handler)
|
||||
.put("/v1/tenant/config", tenant_config_handler)
|
||||
.get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
|
||||
.post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
|
||||
.get(
|
||||
@@ -350,5 +509,5 @@ pub fn make_router(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
|
||||
timeline_detach_handler,
|
||||
)
|
||||
.any(handler_404)
|
||||
.any(handler_404))
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user