Compare commits

..

24 Commits

Author SHA1 Message Date
Bojan Serafimov
8c78a5e9a6 Clippy 2022-05-25 11:31:22 -04:00
Bojan Serafimov
a8f0124af7 Make condvar shutdown-aware 2022-05-25 11:28:50 -04:00
Bojan Serafimov
88154f9871 clipppy 2022-05-24 23:50:21 -04:00
Bojan Serafimov
14b8dcab5b clippy 2022-05-24 23:24:03 -04:00
Bojan Serafimov
aee8615b30 Merge branch 'main' into ps-thread-pool-2 2022-05-24 22:00:59 -04:00
Bojan Serafimov
079871f8e3 Fmt 2022-05-24 21:54:55 -04:00
Bojan Serafimov
f9c1fc8657 Add metric 2022-05-24 21:48:45 -04:00
Bojan Serafimov
32b4e0fda2 Fix 2022-05-24 15:14:33 -04:00
Bojan Serafimov
6b4e17ef89 Start workers 2022-05-24 15:03:18 -04:00
Bojan Serafimov
07e6dd809d Start jobs when tenant activates 2022-05-24 14:25:15 -04:00
Bojan Serafimov
b028f12b06 Fix 2022-05-24 14:14:29 -04:00
Bojan Serafimov
3975417cae Add metadata 2022-05-24 14:12:53 -04:00
Bojan Serafimov
98d8d65c83 Cleanup 2022-05-24 14:03:07 -04:00
Bojan Serafimov
1e9b628c25 Add period 2022-05-24 12:49:30 -04:00
Bojan Serafimov
31dc9e6abd Use queue 2022-05-24 10:50:56 -04:00
Bojan Serafimov
0d5fee3ab7 Add gc pool 2022-05-23 23:42:25 -04:00
Bojan Serafimov
dc47f9ccf1 Simplify 2022-05-23 23:18:25 -04:00
Bojan Serafimov
fefbff8981 Add job status 2022-05-23 22:44:48 -04:00
Bojan Serafimov
eefc7aa792 Add todo 2022-05-23 22:33:53 -04:00
Bojan Serafimov
f3c71899be Add recurring job 2022-05-23 22:32:14 -04:00
Bojan Serafimov
de55b2f139 Handle panic 2022-05-22 16:12:00 -04:00
Bojan Serafimov
5a19ac02c9 Add todo 2022-05-22 15:53:52 -04:00
Bojan Serafimov
13b374a2df Add shutdown command 2022-05-22 12:34:53 -04:00
Bojan Serafimov
b9814222f9 WIP 2022-05-12 12:28:46 -04:00
302 changed files with 6534 additions and 14473 deletions

View File

@@ -6,7 +6,5 @@ timeout = 30
[ssh_connection]
ssh_args = -F ./ansible.ssh.cfg
# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
# and scp neither worked for me
transfer_method = piped
scp_if_ssh = True
pipelining = True

View File

@@ -1,7 +1,3 @@
# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
# (use pre 8.5 option name to cope with old ssh in CI)
PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
Host tele.zenith.tech
User admin
Port 3023

View File

@@ -57,7 +57,7 @@
args:
creates: "/storage/pageserver/data/tenants"
environment:
NEON_REPO_DIR: "/storage/pageserver/data"
ZENITH_REPO_DIR: "/storage/pageserver/data"
LD_LIBRARY_PATH: "/usr/local/lib"
become: true
tags:
@@ -131,7 +131,7 @@
args:
creates: "/storage/safekeeper/data/safekeeper.id"
environment:
NEON_REPO_DIR: "/storage/safekeeper/data"
ZENITH_REPO_DIR: "/storage/safekeeper/data"
LD_LIBRARY_PATH: "/usr/local/lib"
become: true
tags:

View File

@@ -6,7 +6,7 @@ RELEASE=${RELEASE:-false}
# look at docker hub for latest tag for neon docker image
if [ "${RELEASE}" = "true" ]; then
echo "search latest release tag"
echo "search latest relase tag"
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | grep -E '^[0-9]+$' | sort -n | tail -1)
if [ -z "${VERSION}" ]; then
echo "no any docker tags found, exiting..."
@@ -31,7 +31,7 @@ echo "found ${VERSION}"
rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
mkdir neon_install
# retrieve binaries from docker image
# retrive binaries from docker image
echo "getting binaries from docker image"
docker pull --quiet neondatabase/neon:${TAG}
ID=$(docker create neondatabase/neon:${TAG})

View File

@@ -12,7 +12,6 @@ pageservers
safekeepers
[storage:vars]
env_name = neon-stress
console_mgmt_base_url = http://neon-stress-console.local
bucket_name = neon-storage-ireland
bucket_region = eu-west-1

View File

@@ -1,7 +1,5 @@
[pageservers]
#zenith-1-ps-1 console_region_id=1
zenith-1-ps-2 console_region_id=1
zenith-1-ps-3 console_region_id=1
zenith-1-ps-1 console_region_id=1
[safekeepers]
zenith-1-sk-1 console_region_id=1
@@ -13,8 +11,8 @@ pageservers
safekeepers
[storage:vars]
env_name = prod-1
console_mgmt_base_url = http://console-release.local
bucket_name = zenith-storage-oregon
bucket_region = us-west-2
etcd_endpoints = etcd-release.local:2379
safekeeper_enable_s3_offload = true

View File

@@ -1,20 +1,19 @@
[pageservers]
#zenith-us-stage-ps-1 console_region_id=27
zenith-us-stage-ps-2 console_region_id=27
zenith-us-stage-ps-3 console_region_id=27
[safekeepers]
zenith-us-stage-sk-1 console_region_id=27
zenith-us-stage-sk-4 console_region_id=27
zenith-us-stage-sk-5 console_region_id=27
zenith-us-stage-sk-6 console_region_id=27
[storage:children]
pageservers
safekeepers
[storage:vars]
env_name = us-stage
console_mgmt_base_url = http://console-staging.local
bucket_name = zenith-staging-storage-us-east-1
bucket_region = us-east-1
etcd_endpoints = etcd-staging.local:2379
safekeeper_enable_s3_offload = false

View File

@@ -5,7 +5,7 @@ After=network.target auditd.service
[Service]
Type=simple
User=pageserver
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/lib
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed

View File

@@ -0,0 +1,18 @@
[Unit]
Description=Zenith safekeeper
After=network.target auditd.service
[Service]
Type=simple
User=safekeeper
Environment=RUST_BACKTRACE=1 ZENITH_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -p {{ first_pageserver }}:6400 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --enable-s3-offload={{ safekeeper_enable_s3_offload }}
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT
Restart=on-failure
TimeoutSec=10
LimitNOFILE=30000000
[Install]
WantedBy=multi-user.target

View File

@@ -5,12 +5,21 @@ executors:
resource_class: xlarge
docker:
# NB: when changed, do not forget to update rust image tag in all Dockerfiles
- image: neondatabase/rust:1.58
- image: zimg/rust:1.58
neon-executor:
docker:
- image: neondatabase/rust:1.58
- image: zimg/rust:1.58
jobs:
check-codestyle-rust:
executor: neon-xlarge-executor
steps:
- checkout
- run:
name: rustfmt
when: always
command: cargo fmt --all -- --check
# A job to build postgres
build-postgres:
executor: neon-xlarge-executor
@@ -37,7 +46,7 @@ jobs:
name: Restore postgres cache
keys:
# Restore ONLY if the rev key matches exactly
- v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
- v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
# Build postgres if the restore_cache didn't find a build.
# `make` can't figure out whether the cache is valid, since
@@ -54,7 +63,7 @@ jobs:
- save_cache:
name: Save postgres cache
key: v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
key: v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
paths:
- tmp_install
@@ -85,7 +94,7 @@ jobs:
name: Restore postgres cache
keys:
# Restore ONLY if the rev key matches exactly
- v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
- v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
- restore_cache:
name: Restore rust cache
@@ -93,29 +102,31 @@ jobs:
# Require an exact match. While an out of date cache might speed up the build,
# there's no way to clean out old packages, so the cache grows every time something
# changes.
- v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
- v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
# Build the rust code, including test binaries
- run:
name: Rust build << parameters.build_type >>
command: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
CARGO_FLAGS=
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
CARGO_FLAGS="--release --features profiling"
fi
export CARGO_INCREMENTAL=0
export CACHEPOT_BUCKET=zenith-rust-cachepot
export RUSTC_WRAPPER=""
export RUSTC_WRAPPER=cachepot
export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
"${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
cachepot -s
- save_cache:
name: Save rust cache
key: v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
key: v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
paths:
- ~/.cargo/registry
- ~/.cargo/git
@@ -126,22 +137,35 @@ jobs:
name: cargo test
command: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
CARGO_FLAGS=
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
CARGO_FLAGS=--release
fi
cargo test $CARGO_FLAGS
"${cov_prefix[@]}" cargo test $CARGO_FLAGS
# Install the rust binaries, for use by test jobs
- run:
name: Install rust binaries
command: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
fi
binaries=$(
cargo metadata --format-version=1 --no-deps |
"${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
)
test_exe_paths=$(
"${cov_prefix[@]}" cargo test --message-format=json --no-run |
jq -r '.executable | select(. != null)'
)
mkdir -p /tmp/zenith/bin
mkdir -p /tmp/zenith/test_bin
mkdir -p /tmp/zenith/etc
@@ -151,15 +175,34 @@ jobs:
SRC=target/$BUILD_TYPE/$bin
DST=/tmp/zenith/bin/$bin
cp $SRC $DST
echo $DST >> /tmp/zenith/etc/binaries.list
done
# Install test executables (for code coverage)
if [[ $BUILD_TYPE == "debug" ]]; then
for bin in $test_exe_paths; do
SRC=$bin
DST=/tmp/zenith/test_bin/$(basename $bin)
cp $SRC $DST
echo $DST >> /tmp/zenith/etc/binaries.list
done
fi
# Install the postgres binaries, for use by test jobs
- run:
name: Install postgres binaries
command: |
cp -a tmp_install /tmp/zenith/pg_install
# Save rust binaries for other jobs in the workflow
- run:
name: Merge coverage data
command: |
# This will speed up workspace uploads
if [[ $BUILD_TYPE == "debug" ]]; then
scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
fi
# Save the rust binaries and coverage data for other jobs in this workflow.
- persist_to_workspace:
root: /tmp/zenith
paths:
@@ -252,7 +295,7 @@ jobs:
# no_output_timeout, specified here.
no_output_timeout: 10m
environment:
- NEON_BIN: /tmp/zenith/bin
- ZENITH_BIN: /tmp/zenith/bin
- POSTGRES_DISTRIB_DIR: /tmp/zenith/pg_install
- TEST_OUTPUT: /tmp/test_output
# this variable will be embedded in perf test report
@@ -280,6 +323,12 @@ jobs:
export GITHUB_SHA=$CIRCLE_SHA1
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
fi
# Run the tests.
#
# The junit.xml file allows CircleCI to display more fine-grained test information
@@ -290,7 +339,7 @@ jobs:
# -n4 uses four processes to run tests via pytest-xdist
# -s is not used to prevent pytest from capturing output, because tests are running
# in parallel and logs are mixed between different tests
./scripts/pytest \
"${cov_prefix[@]}" ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
--verbose \
@@ -319,15 +368,379 @@ jobs:
# The store_test_results step tells CircleCI where to find the junit.xml file.
- store_test_results:
path: /tmp/test_output
# Save data (if any)
- run:
name: Merge coverage data
command: |
# This will speed up workspace uploads
if [[ $BUILD_TYPE == "debug" ]]; then
scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
fi
# Save coverage data (if any)
- persist_to_workspace:
root: /tmp/zenith
paths:
- "*"
coverage-report:
executor: neon-xlarge-executor
steps:
- attach_workspace:
at: /tmp/zenith
- checkout
- restore_cache:
name: Restore rust cache
keys:
# Require an exact match. While an out of date cache might speed up the build,
# there's no way to clean out old packages, so the cache grows every time something
# changes.
- v04-rust-cache-deps-debug-{{ checksum "Cargo.lock" }}
- run:
name: Build coverage report
command: |
COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
scripts/coverage \
--dir=/tmp/zenith/coverage report \
--input-objects=/tmp/zenith/etc/binaries.list \
--commit-url=$COMMIT_URL \
--format=github
- run:
name: Upload coverage report
command: |
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
REPORT_URL=https://neondatabase.github.io/zenith-coverage-data/$CIRCLE_SHA1
COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
scripts/git-upload \
--repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
--message="Add code coverage for $COMMIT_URL" \
copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE
# Add link to the coverage report to the commit
curl -f -X POST \
https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-H "Accept: application/vnd.github.v3+json" \
--user "$CI_ACCESS_TOKEN" \
--data \
"{
\"state\": \"success\",
\"context\": \"zenith-coverage\",
\"description\": \"Coverage report is ready\",
\"target_url\": \"$REPORT_URL\"
}"
# Build neondatabase/neon:latest image and push it to Docker hub
docker-image:
docker:
- image: cimg/base:2021.04
steps:
- checkout
- setup_remote_docker:
docker_layer_caching: true
- run:
name: Init postgres submodule
command: git submodule update --init --depth 1
- run:
name: Build and push Docker image
command: |
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
DOCKER_TAG=$(git log --oneline|wc -l)
docker build \
--pull \
--build-arg GIT_VERSION=${CIRCLE_SHA1} \
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
--tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:latest .
docker push neondatabase/neon:${DOCKER_TAG}
docker push neondatabase/neon:latest
# Build neondatabase/compute-node:latest image and push it to Docker hub
docker-image-compute:
docker:
- image: cimg/base:2021.04
steps:
- checkout
- setup_remote_docker:
docker_layer_caching: true
# Build neondatabase/compute-tools:latest image and push it to Docker hub
# TODO: this should probably also use versioned tag, not just :latest.
# XXX: but should it? We build and use it only locally now.
- run:
name: Build and push compute-tools Docker image
command: |
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
docker build \
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
--tag neondatabase/compute-tools:latest -f Dockerfile.compute-tools .
docker push neondatabase/compute-tools:latest
- run:
name: Init postgres submodule
command: git submodule update --init --depth 1
- run:
name: Build and push compute-node Docker image
command: |
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
DOCKER_TAG=$(git log --oneline|wc -l)
docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:latest vendor/postgres
docker push neondatabase/compute-node:${DOCKER_TAG}
docker push neondatabase/compute-node:latest
# Build production neondatabase/neon:release image and push it to Docker hub
docker-image-release:
docker:
- image: cimg/base:2021.04
steps:
- checkout
- setup_remote_docker:
docker_layer_caching: true
- run:
name: Init postgres submodule
command: git submodule update --init --depth 1
- run:
name: Build and push Docker image
command: |
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
DOCKER_TAG="release-$(git log --oneline|wc -l)"
docker build \
--pull \
--build-arg GIT_VERSION=${CIRCLE_SHA1} \
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
--tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:release .
docker push neondatabase/neon:${DOCKER_TAG}
docker push neondatabase/neon:release
# Build production neondatabase/compute-node:release image and push it to Docker hub
docker-image-compute-release:
docker:
- image: cimg/base:2021.04
steps:
- checkout
- setup_remote_docker:
docker_layer_caching: true
# Build neondatabase/compute-tools:release image and push it to Docker hub
# TODO: this should probably also use versioned tag, not just :latest.
# XXX: but should it? We build and use it only locally now.
- run:
name: Build and push compute-tools Docker image
command: |
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
docker build \
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
--tag neondatabase/compute-tools:release -f Dockerfile.compute-tools .
docker push neondatabase/compute-tools:release
- run:
name: Init postgres submodule
command: git submodule update --init --depth 1
- run:
name: Build and push compute-node Docker image
command: |
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
DOCKER_TAG="release-$(git log --oneline|wc -l)"
docker build --tag neondatabase/compute-node:${DOCKER_TAG} --tag neondatabase/compute-node:release vendor/postgres
docker push neondatabase/compute-node:${DOCKER_TAG}
docker push neondatabase/compute-node:release
deploy-staging:
docker:
- image: cimg/python:3.10
steps:
- checkout
- setup_remote_docker
- run:
name: Setup ansible
command: |
pip install --progress-bar off --user ansible boto3
- run:
name: Redeploy
command: |
cd "$(pwd)/.circleci/ansible"
./get_binaries.sh
echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key
echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
chmod 0600 ssh-key
ssh-add ssh-key
rm -f ssh-key ssh-key-cert.pub
ansible-playbook deploy.yaml -i staging.hosts
rm -f neon_install.tar.gz .neon_current_version
deploy-staging-proxy:
docker:
- image: cimg/base:2021.04
environment:
KUBECONFIG: .kubeconfig
steps:
- checkout
- run:
name: Store kubeconfig file
command: |
echo "${STAGING_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
chmod 0600 ${KUBECONFIG}
- run:
name: Setup helm v3
command: |
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
helm repo add neondatabase https://neondatabase.github.io/helm-charts
- run:
name: Re-deploy proxy
command: |
DOCKER_TAG=$(git log --oneline|wc -l)
helm upgrade neon-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
deploy-neon-stress:
docker:
- image: cimg/python:3.10
steps:
- checkout
- setup_remote_docker
- run:
name: Setup ansible
command: |
pip install --progress-bar off --user ansible boto3
- run:
name: Redeploy
command: |
cd "$(pwd)/.circleci/ansible"
./get_binaries.sh
echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key
echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
chmod 0600 ssh-key
ssh-add ssh-key
rm -f ssh-key ssh-key-cert.pub
ansible-playbook deploy.yaml -i neon-stress.hosts
rm -f neon_install.tar.gz .neon_current_version
deploy-neon-stress-proxy:
docker:
- image: cimg/base:2021.04
environment:
KUBECONFIG: .kubeconfig
steps:
- checkout
- run:
name: Store kubeconfig file
command: |
echo "${NEON_STRESS_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
chmod 0600 ${KUBECONFIG}
- run:
name: Setup helm v3
command: |
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
helm repo add neondatabase https://neondatabase.github.io/helm-charts
- run:
name: Re-deploy proxy
command: |
DOCKER_TAG=$(git log --oneline|wc -l)
helm upgrade neon-stress-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
helm upgrade neon-stress-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
deploy-release:
docker:
- image: cimg/python:3.10
steps:
- checkout
- setup_remote_docker
- run:
name: Setup ansible
command: |
pip install --progress-bar off --user ansible boto3
- run:
name: Redeploy
command: |
cd "$(pwd)/.circleci/ansible"
RELEASE=true ./get_binaries.sh
echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key
echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
chmod 0600 ssh-key
ssh-add ssh-key
rm -f ssh-key ssh-key-cert.pub
ansible-playbook deploy.yaml -i production.hosts
rm -f neon_install.tar.gz .neon_current_version
deploy-release-proxy:
docker:
- image: cimg/base:2021.04
environment:
KUBECONFIG: .kubeconfig
steps:
- checkout
- run:
name: Store kubeconfig file
command: |
echo "${PRODUCTION_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
chmod 0600 ${KUBECONFIG}
- run:
name: Setup helm v3
command: |
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
helm repo add zenithdb https://neondatabase.github.io/helm-charts
- run:
name: Re-deploy proxy
command: |
DOCKER_TAG="release-$(git log --oneline|wc -l)"
helm upgrade neon-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
# Trigger a new remote CI job
remote-ci-trigger:
docker:
- image: cimg/base:2021.04
parameters:
remote_repo:
type: string
environment:
REMOTE_REPO: << parameters.remote_repo >>
steps:
- run:
name: Set PR's status to pending
command: |
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
curl -f -X POST \
https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-H "Accept: application/vnd.github.v3+json" \
--user "$CI_ACCESS_TOKEN" \
--data \
"{
\"state\": \"pending\",
\"context\": \"neon-cloud-e2e\",
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
}"
- run:
name: Request a remote CI test
command: |
LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
curl -f -X POST \
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-H "Accept: application/vnd.github.v3+json" \
--user "$CI_ACCESS_TOKEN" \
--data \
"{
\"ref\": \"main\",
\"inputs\": {
\"ci_job_name\": \"neon-cloud-e2e\",
\"commit_hash\": \"$CIRCLE_SHA1\",
\"remote_repo\": \"$LOCAL_REPO\"
}
}"
workflows:
build_and_test:
jobs:
- check-codestyle-rust
- check-codestyle-python
- build-postgres:
name: build-postgres-<< matrix.build_type >>
@@ -343,6 +756,7 @@ workflows:
- build-postgres-<< matrix.build_type >>
- run-pytest:
name: pg_regress-tests-<< matrix.build_type >>
context: PERF_TEST_RESULT_CONNSTR
matrix:
parameters:
build_type: ["debug", "release"]
@@ -367,3 +781,120 @@ workflows:
save_perf_report: true
requires:
- build-neon-release
- coverage-report:
# Context passes credentials for gh api
context: CI_ACCESS_TOKEN
requires:
# TODO: consider adding more
- other-tests-debug
- docker-image:
# Context gives an ability to login
context: Docker Hub
# Build image only for commits to main
filters:
branches:
only:
- main
requires:
- pg_regress-tests-release
- other-tests-release
- docker-image-compute:
# Context gives an ability to login
context: Docker Hub
# Build image only for commits to main
filters:
branches:
only:
- main
requires:
- pg_regress-tests-release
- other-tests-release
- deploy-staging:
# Context gives an ability to login
context: Docker Hub
# deploy only for commits to main
filters:
branches:
only:
- main
requires:
- docker-image
- deploy-staging-proxy:
# deploy only for commits to main
filters:
branches:
only:
- main
requires:
- docker-image
- deploy-neon-stress:
# Context gives an ability to login
context: Docker Hub
# deploy only for commits to main
filters:
branches:
only:
- main
requires:
- docker-image
- deploy-neon-stress-proxy:
# deploy only for commits to main
filters:
branches:
only:
- main
requires:
- docker-image
- docker-image-release:
# Context gives an ability to login
context: Docker Hub
# Build image only for commits to main
filters:
branches:
only:
- release
requires:
- pg_regress-tests-release
- other-tests-release
- docker-image-compute-release:
# Context gives an ability to login
context: Docker Hub
# Build image only for commits to main
filters:
branches:
only:
- release
requires:
- pg_regress-tests-release
- other-tests-release
- deploy-release:
# Context gives an ability to login
context: Docker Hub
# deploy only for commits to main
filters:
branches:
only:
- release
requires:
- docker-image-release
- deploy-release-proxy:
# deploy only for commits to main
filters:
branches:
only:
- release
requires:
- docker-image-release
- remote-ci-trigger:
# Context passes credentials for gh api
context: CI_ACCESS_TOKEN
remote_repo: "neondatabase/cloud"
requires:
# XXX: Successful build doesn't mean everything is OK, but
# the job to be triggered takes so much time to complete (~22 min)
# that it's better not to wait for the commented-out steps
- build-neon-release
# - pg_regress-tests-release
# - other-tests-release

View File

@@ -9,8 +9,8 @@ tmp_install
tmp_check_cli
test_output
.vscode
.neon
integration_tests/.neon
.zenith
integration_tests/.zenith
.mypy_cache
Dockerfile

View File

@@ -1,140 +0,0 @@
name: 'Run python test'
description: 'Runs a Neon python test set, performing all the required preparations before'
inputs:
build_type:
description: 'Type of Rust (neon) and C (postgres) builds. Must be "release" or "debug".'
required: true
rust_toolchain:
description: 'Rust toolchain version to fetch the caches'
required: true
test_selection:
description: 'A python test suite to run'
required: true
extra_params:
description: 'Arbitrary parameters to pytest. For example "-s" to prevent capturing stdout/stderr'
required: false
default: ''
needs_postgres_source:
description: 'Set to true if the test suite requires postgres source checked out'
required: false
default: 'false'
run_in_parallel:
description: 'Whether to run tests in parallel'
required: false
default: 'true'
save_perf_report:
description: 'Whether to upload the performance report'
required: false
default: 'false'
runs:
using: "composite"
steps:
- name: Get Neon artifact for restoration
uses: actions/download-artifact@v3
with:
name: neon-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-artifact
path: ./neon-artifact/
- name: Extract Neon artifact
shell: bash -ex {0}
run: |
mkdir -p /tmp/neon/
tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
rm -rf ./neon-artifact/
- name: Checkout
if: inputs.needs_postgres_source == 'true'
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -ex {0}
run: ./scripts/pysync
- name: Run pytest
env:
NEON_BIN: /tmp/neon/bin
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
TEST_OUTPUT: /tmp/test_output
# this variable will be embedded in perf test report
# and is needed to distinguish different environments
PLATFORM: github-actions-selfhosted
shell: bash -ex {0}
run: |
PERF_REPORT_DIR="$(realpath test_runner/perf-report-local)"
rm -rf $PERF_REPORT_DIR
TEST_SELECTION="test_runner/${{ inputs.test_selection }}"
EXTRA_PARAMS="${{ inputs.extra_params }}"
if [ -z "$TEST_SELECTION" ]; then
echo "test_selection must be set"
exit 1
fi
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
fi
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
mkdir -p "$PERF_REPORT_DIR"
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
fi
fi
if [[ "${{ inputs.build_type }}" == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
elif [[ "${{ inputs.build_type }}" == "release" ]]; then
cov_prefix=()
fi
# Run the tests.
#
# The junit.xml file allows CircleCI to display more fine-grained test information
# in its "Tests" tab in the results page.
# --verbose prints name of each test (helpful when there are
# multiple tests in one file)
# -rA prints summary in the end
# -n4 uses four processes to run tests via pytest-xdist
# -s is not used to prevent pytest from capturing output, because tests are running
# in parallel and logs are mixed between different tests
"${cov_prefix[@]}" ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
--verbose \
-m "not remote_cluster" \
-rA $TEST_SELECTION $EXTRA_PARAMS
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
export REPORT_FROM="$PERF_REPORT_DIR"
export REPORT_TO=local
scripts/generate_and_push_perf_report.sh
fi
fi
- name: Delete all data but logs
shell: bash -ex {0}
if: always()
run: |
du -sh /tmp/test_output/*
find /tmp/test_output -type f ! -name "*.log" ! -name "regression.diffs" ! -name "junit.xml" ! -name "*.filediff" ! -name "*.stdout" ! -name "*.stderr" ! -name "flamegraph.svg" ! -name "*.metrics" -delete
du -sh /tmp/test_output/*
- name: Upload python test logs
if: always()
uses: actions/upload-artifact@v3
with:
retention-days: 7
if-no-files-found: error
name: python-test-${{ inputs.test_selection }}-${{ runner.os }}-${{ inputs.build_type }}-${{ inputs.rust_toolchain }}-logs
path: /tmp/test_output/

View File

@@ -1,17 +0,0 @@
name: 'Merge and upload coverage data'
description: 'Compresses and uploads the coverage data as an artifact'
runs:
using: "composite"
steps:
- name: Merge coverage data
shell: bash -ex {0}
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
- name: Upload coverage data
uses: actions/upload-artifact@v3
with:
retention-days: 7
if-no-files-found: error
name: coverage-data-artifact
path: /tmp/coverage/

View File

@@ -1,18 +0,0 @@
[Unit]
Description=Zenith safekeeper
After=network.target auditd.service
[Service]
Type=simple
User=safekeeper
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/lib
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}.local:6500 --listen-http {{ inventory_hostname }}.local:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ env_name }}/wal"}'
ExecReload=/bin/kill -HUP $MAINPID
KillMode=mixed
KillSignal=SIGINT
Restart=on-failure
TimeoutSec=10
LimitNOFILE=30000000
[Install]
WantedBy=multi-user.target

View File

@@ -19,18 +19,18 @@ jobs:
bench:
# this workflow runs on self hosteed runner
# it's environment is quite different from usual guthub runner
# probably the most important difference is that it doesn't start from clean workspace each time
# probably the most important difference is that it doesnt start from clean workspace each time
# e g if you install system packages they are not cleaned up since you install them directly in host machine
# not a container or something
# See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
runs-on: [self-hosted, zenith-benchmarker]
env:
POSTGRES_DISTRIB_DIR: "/usr/pgsql-14"
POSTGRES_DISTRIB_DIR: "/usr/pgsql-13"
steps:
- name: Checkout zenith repo
uses: actions/checkout@v3
uses: actions/checkout@v2
# actions/setup-python@v2 is not working correctly on self-hosted runners
# see https://github.com/actions/setup-python/issues/162
@@ -88,7 +88,7 @@ jobs:
# Plus time needed to initialize the test databases.
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
PLATFORM: "neon-staging"
PLATFORM: "zenith-staging"
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
run: |
@@ -96,7 +96,7 @@ jobs:
# since it might generate duplicates when calling ingest_perf_test_result.py
rm -rf perf-report-staging
mkdir -p perf-report-staging
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging --timeout 3600
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging
- name: Submit result
env:

View File

@@ -1,642 +0,0 @@
name: Test
on:
push:
branches:
- main
pull_request:
defaults:
run:
shell: bash -ex {0}
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
build-postgres:
runs-on: [ self-hosted, Linux, k8s-runner ]
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
rust_toolchain: [ 1.58 ]
env:
BUILD_TYPE: ${{ matrix.build_type }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Set pg revision for caching
id: pg_ver
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
- name: Cache postgres build
id: cache_pg
uses: actions/cache@v3
with:
path: tmp_install/
key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_ver.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Build postgres
if: steps.cache_pg.outputs.cache-hit != 'true'
run: mold -run make postgres -j$(nproc)
# actions/cache@v3 does not allow concurrently using the same cache across job steps, so use a separate cache
- name: Prepare postgres artifact
run: tar -C tmp_install/ -czf ./pg.tgz .
- name: Upload postgres artifact
uses: actions/upload-artifact@v3
with:
retention-days: 7
if-no-files-found: error
name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
path: ./pg.tgz
build-neon:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-postgres ]
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
rust_toolchain: [ 1.58 ]
env:
BUILD_TYPE: ${{ matrix.build_type }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Get postgres artifact for restoration
uses: actions/download-artifact@v3
with:
name: postgres-${{ runner.os }}-${{ matrix.build_type }}-artifact
path: ./postgres-artifact/
- name: Extract postgres artifact
run: |
mkdir ./tmp_install/
tar -xf ./postgres-artifact/pg.tgz -C ./tmp_install/
rm -rf ./postgres-artifact/
- name: Cache cargo deps
id: cache_cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/registry/
~/.cargo/git/
target/
# Fall back to older versions of the key, if no cache for current Cargo.lock was found
key: |
v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
- name: Run cargo build
run: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
CARGO_FLAGS=
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
CARGO_FLAGS="--release --features profiling"
fi
"${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
- name: Run cargo test
run: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
CARGO_FLAGS=
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
CARGO_FLAGS=--release
fi
"${cov_prefix[@]}" cargo test $CARGO_FLAGS
- name: Install rust binaries
run: |
if [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=()
fi
binaries=$(
"${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
)
test_exe_paths=$(
"${cov_prefix[@]}" cargo test --message-format=json --no-run |
jq -r '.executable | select(. != null)'
)
mkdir -p /tmp/neon/bin/
mkdir -p /tmp/neon/test_bin/
mkdir -p /tmp/neon/etc/
# Keep bloated coverage data files away from the rest of the artifact
mkdir -p /tmp/coverage/
# Install target binaries
for bin in $binaries; do
SRC=target/$BUILD_TYPE/$bin
DST=/tmp/neon/bin/$bin
cp "$SRC" "$DST"
done
# Install test executables and write list of all binaries (for code coverage)
if [[ $BUILD_TYPE == "debug" ]]; then
for bin in $binaries; do
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
done
for bin in $test_exe_paths; do
SRC=$bin
DST=/tmp/neon/test_bin/$(basename $bin)
cp "$SRC" "$DST"
echo "$DST" >> /tmp/coverage/binaries.list
done
fi
- name: Install postgres binaries
run: cp -a tmp_install /tmp/neon/pg_install
- name: Prepare neon artifact
run: tar -C /tmp/neon/ -czf ./neon.tgz .
- name: Upload neon binaries
uses: actions/upload-artifact@v3
with:
retention-days: 7
if-no-files-found: error
name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
path: ./neon.tgz
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
- name: Merge and upload coverage data
if: matrix.build_type == 'debug'
uses: ./.github/actions/save-coverage-data
pg_regress-tests:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-neon ]
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
rust_toolchain: [ 1.58 ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 2
- name: Pytest regress tests
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ matrix.build_type }}
rust_toolchain: ${{ matrix.rust_toolchain }}
test_selection: batch_pg_regress
needs_postgres_source: true
- name: Merge and upload coverage data
if: matrix.build_type == 'debug'
uses: ./.github/actions/save-coverage-data
other-tests:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-neon ]
strategy:
fail-fast: false
matrix:
build_type: [ debug, release ]
rust_toolchain: [ 1.58 ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 2
- name: Pytest other tests
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ matrix.build_type }}
rust_toolchain: ${{ matrix.rust_toolchain }}
test_selection: batch_others
- name: Merge and upload coverage data
if: matrix.build_type == 'debug'
uses: ./.github/actions/save-coverage-data
benchmarks:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-neon ]
strategy:
fail-fast: false
matrix:
build_type: [ release ]
rust_toolchain: [ 1.58 ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 2
- name: Pytest benchmarks
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ matrix.build_type }}
rust_toolchain: ${{ matrix.rust_toolchain }}
test_selection: performance
run_in_parallel: false
save_perf_report: true
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones
coverage-report:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ other-tests, pg_regress-tests ]
strategy:
fail-fast: false
matrix:
build_type: [ debug ]
rust_toolchain: [ 1.58 ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 1
- name: Restore cargo deps cache
id: cache_cargo
uses: actions/cache@v3
with:
path: |
~/.cargo/registry/
~/.cargo/git/
target/
key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
- name: Get Neon artifact for restoration
uses: actions/download-artifact@v3
with:
name: neon-${{ runner.os }}-${{ matrix.build_type }}-${{ matrix.rust_toolchain }}-artifact
path: ./neon-artifact/
- name: Extract Neon artifact
run: |
mkdir -p /tmp/neon/
tar -xf ./neon-artifact/neon.tgz -C /tmp/neon/
rm -rf ./neon-artifact/
- name: Restore coverage data
uses: actions/download-artifact@v3
with:
name: coverage-data-artifact
path: /tmp/coverage/
- name: Merge coverage data
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
- name: Build and upload coverage report
run: |
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
scripts/coverage \
--dir=/tmp/coverage report \
--input-objects=/tmp/coverage/binaries.list \
--commit-url=$COMMIT_URL \
--format=github
REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA
scripts/git-upload \
--repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
--message="Add code coverage for $COMMIT_URL" \
copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
# Add link to the coverage report to the commit
curl -f -X POST \
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"state\": \"success\",
\"context\": \"neon-coverage\",
\"description\": \"Coverage report is ready\",
\"target_url\": \"$REPORT_URL\"
}"
trigger-e2e-tests:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ build-neon ]
steps:
- name: Set PR's status to pending and request a remote CI test
run: |
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
REMOTE_REPO="${{ github.repository_owner }}/cloud"
curl -f -X POST \
https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"state\": \"pending\",
\"context\": \"neon-cloud-e2e\",
\"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
}"
curl -f -X POST \
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"ref\": \"main\",
\"inputs\": {
\"ci_job_name\": \"neon-cloud-e2e\",
\"commit_hash\": \"$COMMIT_SHA\",
\"remote_repo\": \"${{ github.repository }}\"
}
}"
docker-image:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ pg_regress-tests, other-tests ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
outputs:
build-tag: ${{steps.build-tag.outputs.tag}}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
with:
driver: docker
- name: Get build tag
run: |
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "::set-output name=tag::$(git rev-list --count HEAD)"
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
id: build-tag
- name: Get legacy build tag
run: |
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "::set-output name=tag::latest
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "::set-output name=tag::release
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
id: legacy-build-tag
- name: Build neon Docker image
uses: docker/build-push-action@v2
with:
context: .
build-args: |
GIT_VERSION="${{github.sha}}"
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
pull: true
push: true
tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
docker-image-compute:
runs-on: [ self-hosted, Linux, k8s-runner ]
needs: [ pg_regress-tests, other-tests ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
outputs:
build-tag: ${{steps.build-tag.outputs.tag}}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
with:
driver: docker
- name: Get build tag
run: |
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "::set-output name=tag::$(git rev-list --count HEAD)"
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
id: build-tag
- name: Get legacy build tag
run: |
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "::set-output name=tag::latest
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "::set-output name=tag::release
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
id: legacy-build-tag
- name: Build compute-tools Docker image
uses: docker/build-push-action@v2
with:
context: .
build-args: |
GIT_VERSION="${{github.sha}}"
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
push: false
file: Dockerfile.compute-tools
tags: neondatabase/compute-tools:local
- name: Push compute-tools Docker image
uses: docker/build-push-action@v2
with:
context: .
build-args: |
GIT_VERSION="${{github.sha}}"
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
push: true
file: Dockerfile.compute-tools
tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}
- name: Build compute-node Docker image
uses: docker/build-push-action@v2
with:
context: ./vendor/postgres/
build-args:
COMPUTE_TOOLS_TAG=local
push: true
tags: neondatabase/compute-node:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/compute-node:${{steps.build-tag.outputs.tag}}
calculate-deploy-targets:
runs-on: [ self-hosted, Linux, k8s-runner ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
outputs:
matrix-include: ${{ steps.set-matrix.outputs.include }}
steps:
- id: set-matrix
run: |
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA"}'
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA"}'
echo "::set-output name=include::[$STAGING, $NEON_STRESS]"
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA"}'
echo "::set-output name=include::[$PRODUCTION]"
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
deploy:
runs-on: [ self-hosted, Linux, k8s-runner ]
# We need both storage **and** compute images for deploy, because control plane
# picks the compute version based on the storage version. If it notices a fresh
# storage it may bump the compute version. And if compute image failed to build
# it may break things badly.
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
strategy:
matrix:
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Setup ansible
run: |
pip install --progress-bar off --user ansible boto3
- name: Redeploy
run: |
cd "$(pwd)/.github/ansible"
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
./get_binaries.sh
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
RELEASE=true ./get_binaries.sh
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
exit 1
fi
eval $(ssh-agent)
echo "${{ secrets.TELEPORT_SSH_KEY }}" | tr -d '\n'| base64 --decode >ssh-key
echo "${{ secrets.TELEPORT_SSH_CERT }}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
chmod 0600 ssh-key
ssh-add ssh-key
rm -f ssh-key ssh-key-cert.pub
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts
rm -f neon_install.tar.gz .neon_current_version
deploy-proxy:
runs-on: [ self-hosted, Linux, k8s-runner ]
# Compute image isn't strictly required for proxy deploy, but let's still wait for it
# to run all deploy jobs consistently.
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
strategy:
matrix:
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
env:
KUBECONFIG: .kubeconfig
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: true
fetch-depth: 0
- name: Store kubeconfig file
run: |
echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
chmod 0600 ${KUBECONFIG}
- name: Setup helm v3
run: |
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
helm repo add neondatabase https://neondatabase.github.io/helm-charts
- name: Re-deploy proxy
run: |
DOCKER_TAG=${{needs.docker-image.outputs.build-tag}}
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s

View File

@@ -1,133 +0,0 @@
name: Check code style and build
on:
push:
branches:
- main
pull_request:
defaults:
run:
shell: bash -ex {0}
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
jobs:
check-codestyle-rust:
strategy:
fail-fast: false
matrix:
# If we want to duplicate this job for different
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
rust_toolchain: [1.58]
os: [ubuntu-latest, macos-latest]
timeout-minutes: 50
name: run regression test suite
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 2
- name: Install rust toolchain ${{ matrix.rust_toolchain }}
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust_toolchain }}
components: rustfmt, clippy
override: true
- name: Check formatting
run: cargo fmt --all -- --check
- name: Install Ubuntu postgres dependencies
if: matrix.os == 'ubuntu-latest'
run: |
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev
- name: Install macOS postgres dependencies
if: matrix.os == 'macos-latest'
run: brew install flex bison openssl
- name: Set pg revision for caching
id: pg_ver
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
- name: Cache postgres build
id: cache_pg
uses: actions/cache@v2
with:
path: |
tmp_install/
key: ${{ runner.os }}-pg-${{ steps.pg_ver.outputs.pg_rev }}
- name: Set extra env for macOS
if: matrix.os == 'macos-latest'
run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Build postgres
if: steps.cache_pg.outputs.cache-hit != 'true'
run: make postgres
# Plain configure output can contain weird errors like 'error: C compiler cannot create executables'
# and the real cause will be inside config.log
- name: Print configure logs in case of failure
if: failure()
continue-on-error: true
run: |
echo '' && echo '=== config.log ===' && echo ''
cat tmp_install/build/config.log
echo '' && echo '=== configure.log ===' && echo ''
cat tmp_install/build/configure.log
- name: Cache cargo deps
id: cache_cargo
uses: actions/cache@v2
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
- name: Run cargo clippy
run: ./run_clippy.sh
- name: Ensure all project builds
run: cargo build --all --all-targets
check-codestyle-python:
runs-on: [ self-hosted, Linux, k8s-runner ]
steps:
- name: Checkout
uses: actions/checkout@v3
with:
submodules: false
fetch-depth: 1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-codestyle-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
run: ./scripts/pysync
- name: Run yapf to ensure code format
run: poetry run yapf --recursive --diff .
- name: Run mypy to check types
run: poetry run mypy .

View File

@@ -1,71 +0,0 @@
name: Test Postgres client libraries
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '23 02 * * *' # run once a day, timezone is utc
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
test-postgres-client-libs:
runs-on: [ ubuntu-latest ]
steps:
- name: Checkout
uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Cache poetry deps
id: cache_poetry
uses: actions/cache@v3
with:
path: ~/.cache/pypoetry/virtualenvs
key: v1-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -ex {0}
run: ./scripts/pysync
- name: Run pytest
env:
REMOTE_ENV: 1
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
TEST_OUTPUT: /tmp/test_output
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
shell: bash -ex {0}
run: |
# Test framework expects we have psql binary;
# but since we don't really need it in this test, let's mock it
mkdir -p "$POSTGRES_DISTRIB_DIR/bin" && touch "$POSTGRES_DISTRIB_DIR/bin/psql";
./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
--verbose \
-m "remote_cluster" \
-rA "test_runner/pg_clients"
- name: Post to a Slack channel
if: failure()
id: slack
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

73
.github/workflows/testing.yml vendored Normal file
View File

@@ -0,0 +1,73 @@
name: Build and Test
on:
pull_request:
push:
jobs:
regression-check:
strategy:
matrix:
# If we want to duplicate this job for different
# Rust toolchains (e.g. nightly or 1.37.0), add them here.
rust_toolchain: [stable]
os: [ubuntu-latest, macos-latest]
timeout-minutes: 30
name: run regression test suite
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: true
fetch-depth: 2
- name: install rust toolchain ${{ matrix.rust_toolchain }}
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: ${{ matrix.rust_toolchain }}
override: true
- name: Install Ubuntu postgres dependencies
if: matrix.os == 'ubuntu-latest'
run: |
sudo apt update
sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev
- name: Install macOs postgres dependencies
if: matrix.os == 'macos-latest'
run: brew install flex bison
- name: Set pg revision for caching
id: pg_ver
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
- name: Cache postgres build
id: cache_pg
uses: actions/cache@v2
with:
path: |
tmp_install/
key: ${{ runner.os }}-pg-${{ steps.pg_ver.outputs.pg_rev }}
- name: Build postgres
if: steps.cache_pg.outputs.cache-hit != 'true'
run: make postgres
- name: Cache cargo deps
id: cache_cargo
uses: actions/cache@v2
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}
- name: Run cargo clippy
run: ./run_clippy.sh
- name: Run cargo test
run: cargo test --all --all-targets

5
.gitignore vendored
View File

@@ -5,9 +5,8 @@
__pycache__/
test_output/
.vscode
.idea
/.neon
/integration_tests/.neon
/.zenith
/integration_tests/.zenith
# Coverage
*.profraw

View File

@@ -6,5 +6,5 @@ target/
tmp_install/
__pycache__/
test_output/
.neon/
.zenith/
.git/

150
Cargo.lock generated
View File

@@ -64,45 +64,6 @@ dependencies = [
"nodrop",
]
[[package]]
name = "asn1-rs"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30ff05a702273012438132f449575dbc804e27b2f3cbe3069aa237d26c98fa33"
dependencies = [
"asn1-rs-derive",
"asn1-rs-impl",
"displaydoc",
"nom",
"num-traits",
"rusticata-macros",
"thiserror",
"time 0.3.9",
]
[[package]]
name = "asn1-rs-derive"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db8b7511298d5b7784b40b092d9e9dcd3a627a5707e4b5e507931ab0d44eeebf"
dependencies = [
"proc-macro2",
"quote",
"syn",
"synstructure",
]
[[package]]
name = "asn1-rs-impl"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "async-stream"
version = "0.3.3"
@@ -402,16 +363,6 @@ dependencies = [
"textwrap 0.14.2",
]
[[package]]
name = "close_fds"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bc416f33de9d59e79e57560f450d21ff8393adcf1cdfc3e6d8fb93d5f88a2ed"
dependencies = [
"cfg-if",
"libc",
]
[[package]]
name = "cmake"
version = "0.1.48"
@@ -461,7 +412,6 @@ dependencies = [
"tar",
"tokio",
"tokio-postgres",
"url",
"workspace_hack",
]
@@ -752,12 +702,6 @@ dependencies = [
"syn",
]
[[package]]
name = "data-encoding"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
[[package]]
name = "debugid"
version = "0.7.3"
@@ -767,20 +711,6 @@ dependencies = [
"uuid",
]
[[package]]
name = "der-parser"
version = "7.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe398ac75057914d7d07307bf67dc7f3f574a26783b4fc7805a20ffa9f506e82"
dependencies = [
"asn1-rs",
"displaydoc",
"nom",
"num-bigint",
"num-traits",
"rusticata-macros",
]
[[package]]
name = "digest"
version = "0.9.0"
@@ -822,17 +752,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "displaydoc"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "either"
version = "1.6.1"
@@ -882,7 +801,6 @@ name = "etcd_broker"
version = "0.1.0"
dependencies = [
"etcd-client",
"once_cell",
"regex",
"serde",
"serde_json",
@@ -1802,20 +1720,11 @@ dependencies = [
"memchr",
]
[[package]]
name = "oid-registry"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38e20717fa0541f39bd146692035c37bedfa532b3e5071b35761082407546b2a"
dependencies = [
"asn1-rs",
]
[[package]]
name = "once_cell"
version = "1.10.0"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
[[package]]
name = "oorandom"
@@ -1880,7 +1789,6 @@ dependencies = [
"bytes",
"chrono",
"clap 3.0.14",
"close_fds",
"const_format",
"crc32c",
"crossbeam-utils",
@@ -1922,7 +1830,6 @@ dependencies = [
"tracing",
"url",
"utils",
"walkdir",
"workspace_hack",
]
@@ -2151,7 +2058,7 @@ dependencies = [
"serde",
"thiserror",
"utils",
"wal_craft",
"wal_generate",
"workspace_hack",
]
@@ -2330,7 +2237,6 @@ dependencies = [
"url",
"utils",
"workspace_hack",
"x509-parser",
]
[[package]]
@@ -2488,8 +2394,6 @@ version = "0.1.0"
dependencies = [
"anyhow",
"async-trait",
"metrics",
"once_cell",
"rusoto_core",
"rusoto_s3",
"serde",
@@ -2497,7 +2401,6 @@ dependencies = [
"tempfile",
"tokio",
"tokio-util 0.7.0",
"toml_edit",
"tracing",
"workspace_hack",
]
@@ -2702,15 +2605,6 @@ dependencies = [
"semver",
]
[[package]]
name = "rusticata-macros"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632"
dependencies = [
"nom",
]
[[package]]
name = "rustls"
version = "0.20.4"
@@ -2758,7 +2652,6 @@ name = "safekeeper"
version = "0.1.0"
dependencies = [
"anyhow",
"async-trait",
"byteorder",
"bytes",
"clap 3.0.14",
@@ -2767,14 +2660,12 @@ dependencies = [
"daemonize",
"etcd_broker",
"fs2",
"futures",
"git-version",
"hex",
"humantime",
"hyper",
"lazy_static",
"metrics",
"once_cell",
"postgres",
"postgres-protocol",
"postgres_ffi",
@@ -2788,7 +2679,6 @@ dependencies = [
"tokio",
"tokio-postgres",
"tokio-util 0.7.0",
"toml_edit",
"tracing",
"url",
"utils",
@@ -3150,18 +3040,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8"
[[package]]
name = "synstructure"
version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f"
dependencies = [
"proc-macro2",
"quote",
"syn",
"unicode-xid",
]
[[package]]
name = "tar"
version = "0.4.38"
@@ -3753,16 +3631,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wal_craft"
name = "wal_generate"
version = "0.1.0"
dependencies = [
"anyhow",
"clap 3.0.14",
"env_logger",
"log",
"once_cell",
"postgres",
"postgres_ffi",
"tempfile",
]
@@ -4026,24 +3902,6 @@ dependencies = [
"tracing-core",
]
[[package]]
name = "x509-parser"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9fb9bace5b5589ffead1afb76e43e34cff39cd0f3ce7e170ae0c29e53b88eb1c"
dependencies = [
"asn1-rs",
"base64",
"data-encoding",
"der-parser",
"lazy_static",
"nom",
"oid-registry",
"rusticata-macros",
"thiserror",
"time 0.3.9",
]
[[package]]
name = "xattr"
version = "0.2.2"

View File

@@ -1,5 +1,5 @@
# Build Postgres
FROM neondatabase/rust:1.58 AS pg-build
FROM zimg/rust:1.58 AS pg-build
WORKDIR /pg
USER root
@@ -14,7 +14,7 @@ RUN set -e \
&& tar -C tmp_install -czf /postgres_install.tar.gz .
# Build zenith binaries
FROM neondatabase/rust:1.58 AS build
FROM zimg/rust:1.58 AS build
ARG GIT_VERSION=local
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
@@ -25,7 +25,7 @@ COPY --from=pg-build /pg/tmp_install/include/postgresql/server tmp_install/inclu
COPY . .
# Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, loosing the compilation stats.
RUN set -e \
&& sudo -E "PATH=$PATH" mold -run cargo build --release \
&& cachepot -s
@@ -46,9 +46,9 @@ RUN set -e \
&& useradd -d /data zenith \
&& chown -R zenith:zenith /data
COPY --from=build --chown=zenith:zenith /home/runner/target/release/pageserver /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/runner/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/runner/target/release/proxy /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/pageserver /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/safekeeper /usr/local/bin
COPY --from=build --chown=zenith:zenith /home/circleci/project/target/release/proxy /usr/local/bin
COPY --from=pg-build /pg/tmp_install/ /usr/local/
COPY --from=pg-build /postgres_install.tar.gz /data/

95
Dockerfile.alpine Normal file
View File

@@ -0,0 +1,95 @@
#
# Docker image for console integration testing.
#
# We may also reuse it in CI to unify installation process and as a general binaries building
# tool for production servers.
#
# Dynamic linking is used for librocksdb and libstdc++ bacause librocksdb-sys calls
# bindgen with "dynamic" feature flag. This also prevents usage of dockerhub alpine-rust
# images which are statically linked and have guards against any dlopen. I would rather
# prefer all static binaries so we may change the way librocksdb-sys builds or wait until
# we will have our own storage and drop rockdb dependency.
#
# Cargo-chef is used to separate dependencies building from main binaries building. This
# way `docker build` will download and install dependencies only of there are changes to
# out Cargo.toml files.
#
#
# build postgres separately -- this layer will be rebuilt only if one of
# mentioned paths will get any changes
#
FROM alpine:3.13 as pg-build
RUN apk add --update clang llvm compiler-rt compiler-rt-static lld musl-dev binutils \
make bison flex readline-dev zlib-dev perl linux-headers libseccomp-dev
WORKDIR zenith
COPY ./vendor/postgres vendor/postgres
COPY ./Makefile Makefile
# Build using clang and lld
RUN CC='clang' LD='lld' CFLAGS='-fuse-ld=lld --rtlib=compiler-rt' make postgres -j4
#
# Calculate cargo dependencies.
# This will always run, but only generate recipe.json with list of dependencies without
# installing them.
#
FROM alpine:20210212 as cargo-deps-inspect
RUN apk add --update rust cargo
RUN cargo install cargo-chef
WORKDIR zenith
COPY . .
RUN cargo chef prepare --recipe-path recipe.json
#
# Build cargo dependencies.
# This temp cantainner would be build only if recipe.json was changed.
#
FROM alpine:20210212 as deps-build
RUN apk add --update rust cargo openssl-dev clang build-base
# rust-rocksdb can be built against system-wide rocksdb -- that saves about
# 10 minutes during build. Rocksdb apk package is in testing now, but use it
# anyway. In case of any troubles we can download and build rocksdb here manually
# (to cache it as a docker layer).
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
WORKDIR zenith
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY --from=cargo-deps-inspect /root/.cargo/bin/cargo-chef /root/.cargo/bin/
COPY --from=cargo-deps-inspect /zenith/recipe.json recipe.json
RUN ROCKSDB_LIB_DIR=/usr/lib/ cargo chef cook --release --recipe-path recipe.json
#
# Build zenith binaries
#
FROM alpine:20210212 as build
RUN apk add --update rust cargo openssl-dev clang build-base
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb-dev
WORKDIR zenith
COPY . .
# Copy cached dependencies
COPY --from=pg-build /zenith/tmp_install/include/postgresql/server tmp_install/include/postgresql/server
COPY --from=deps-build /zenith/target target
COPY --from=deps-build /root/.cargo /root/.cargo
RUN cargo build --release
#
# Copy binaries to resulting image.
# build-base hare to provide libstdc++ (it will also bring gcc, but leave it this way until we figure
# out how to statically link rocksdb or avoid it at all).
#
FROM alpine:3.13
RUN apk add --update openssl build-base libseccomp-dev
RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb
COPY --from=build /zenith/target/release/pageserver /usr/local/bin
COPY --from=build /zenith/target/release/safekeeper /usr/local/bin
COPY --from=build /zenith/target/release/proxy /usr/local/bin
COPY --from=pg-build /zenith/tmp_install /usr/local
COPY docker-entrypoint.sh /docker-entrypoint.sh
RUN addgroup zenith && adduser -h /data -D -G zenith zenith
VOLUME ["/data"]
WORKDIR /data
USER zenith
EXPOSE 6400
ENTRYPOINT ["/docker-entrypoint.sh"]
CMD ["pageserver"]

View File

@@ -1,6 +1,6 @@
# First transient image to build compute_tools binaries
# NB: keep in sync with rust image version in .circle/config.yml
FROM neondatabase/rust:1.58 AS rust-build
FROM zimg/rust:1.58 AS rust-build
ARG CACHEPOT_BUCKET=zenith-rust-cachepot
ARG AWS_ACCESS_KEY_ID
@@ -15,4 +15,4 @@ RUN set -e \
# Final image that only has one binary
FROM debian:buster-slim
COPY --from=rust-build /home/runner/target/release/compute_ctl /usr/local/bin/compute_ctl
COPY --from=rust-build /home/circleci/project/target/release/compute_ctl /usr/local/bin/compute_ctl

View File

@@ -1,8 +1,3 @@
ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
# Where to install Postgres, default is ./tmp_install, maybe useful for package managers
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/tmp_install
# Seccomp BPF is only available for Linux
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux)
@@ -31,7 +26,7 @@ endif
# macOS with brew-installed openssl requires explicit paths
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
PG_CONFIGURE_OPTS += --with-includes=$(HOMEBREW_PREFIX)/opt/openssl/include --with-libraries=$(HOMEBREW_PREFIX)/opt/openssl/lib
PG_CONFIGURE_OPTS += --with-includes=/usr/local/opt/openssl/include --with-libraries=/usr/local/opt/openssl/lib
endif
# Choose whether we should be silent or verbose
@@ -60,55 +55,55 @@ zenith: postgres-headers
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
### PostgreSQL parts
$(POSTGRES_INSTALL_DIR)/build/config.status:
tmp_install/build/config.status:
+@echo "Configuring postgres build"
mkdir -p $(POSTGRES_INSTALL_DIR)/build
(cd $(POSTGRES_INSTALL_DIR)/build && \
$(ROOT_PROJECT_DIR)/vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
mkdir -p tmp_install/build
(cd tmp_install/build && \
../../vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) \
$(SECCOMP) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR)) > configure.log)
--prefix=$(abspath tmp_install) > configure.log)
# nicer alias for running 'configure'
.PHONY: postgres-configure
postgres-configure: $(POSTGRES_INSTALL_DIR)/build/config.status
postgres-configure: tmp_install/build/config.status
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/include
# Install the PostgreSQL header files into tmp_install/include
.PHONY: postgres-headers
postgres-headers: postgres-configure
+@echo "Installing PostgreSQL headers"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/src/include MAKELEVEL=0 install
$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install
# Compile and install PostgreSQL and contrib/neon
# Compile and install PostgreSQL and contrib/zenith
.PHONY: postgres
postgres: postgres-configure \
postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
+@echo "Compiling PostgreSQL"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 install
+@echo "Compiling contrib/neon"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon install
+@echo "Compiling contrib/neon_test_utils"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon_test_utils install
$(MAKE) -C tmp_install/build MAKELEVEL=0 install
+@echo "Compiling contrib/zenith"
$(MAKE) -C tmp_install/build/contrib/zenith install
+@echo "Compiling contrib/zenith_test_utils"
$(MAKE) -C tmp_install/build/contrib/zenith_test_utils install
+@echo "Compiling pg_buffercache"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pg_buffercache install
$(MAKE) -C tmp_install/build/contrib/pg_buffercache install
+@echo "Compiling pageinspect"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pageinspect install
$(MAKE) -C tmp_install/build/contrib/pageinspect install
.PHONY: postgres-clean
postgres-clean:
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 clean
$(MAKE) -C tmp_install/build MAKELEVEL=0 clean
# This doesn't remove the effects of 'configure'.
.PHONY: clean
clean:
cd $(POSTGRES_INSTALL_DIR)/build && $(MAKE) clean
cd tmp_install/build && $(MAKE) clean
$(CARGO_CMD_PREFIX) cargo clean
# This removes everything
.PHONY: distclean
distclean:
rm -rf $(POSTGRES_INSTALL_DIR)
rm -rf tmp_install
$(CARGO_CMD_PREFIX) cargo clean
.PHONY: fmt
@@ -117,4 +112,4 @@ fmt:
.PHONY: setup-pre-commit-hook
setup-pre-commit-hook:
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
ln -s -f ../../pre-commit.py .git/hooks/pre-commit

View File

@@ -5,11 +5,6 @@ Neon is a serverless open source alternative to AWS Aurora Postgres. It separate
The project used to be called "Zenith". Many of the commands and code comments
still refer to "zenith", but we are in the process of renaming things.
## Quick start
[Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.
Alternatively, compile and run the project [locally](#running-local-installation).
## Architecture overview
A Neon installation consists of compute nodes and Neon storage engine.
@@ -29,18 +24,13 @@ Pageserver consists of:
## Running local installation
#### Installing dependencies on Linux
#### building on Ubuntu/ Debian (Linux)
1. Install build dependencies and other useful packages
* On Ubuntu or Debian this set of packages should be sufficient to build the code:
```bash
On Ubuntu or Debian this set of packages should be sufficient to build the code:
```text
apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
libssl-dev clang pkg-config libpq-dev etcd cmake postgresql-client
```
* On Fedora these packages are needed:
```bash
dnf install flex bison readline-devel zlib-devel openssl-devel \
libseccomp-devel perl clang cmake etcd postgresql postgresql-contrib
libssl-dev clang pkg-config libpq-dev libprotobuf-dev etcd
```
2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -49,11 +39,23 @@ dnf install flex bison readline-devel zlib-devel openssl-devel \
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```
#### Installing dependencies on OSX (12.3.1)
3. Install PostgreSQL Client
```
apt install postgresql-client
```
4. Build neon and patched postgres
```sh
git clone --recursive https://github.com/neondatabase/neon.git
cd neon
make -j5
```
#### building on OSX (12.3.1)
1. Install XCode and dependencies
```
xcode-select --install
brew install protobuf etcd openssl
brew install protobuf etcd
```
2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -69,20 +71,11 @@ brew install libpq
brew link --force libpq
```
#### Building on Linux and OSX
1. Build neon and patched postgres
```
# Note: The path to the neon sources can not contain a space.
4. Build neon and patched postgres
```sh
git clone --recursive https://github.com/neondatabase/neon.git
cd neon
# The preferred and default is to make a debug build. This will create a
# demonstrably slower build than a release build. If you want to use a release
# build, utilize "`BUILD_TYPE=release make -j`nproc``"
make -j`nproc`
make -j5
```
#### dependency installation notes
@@ -95,7 +88,7 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
#### running neon database
1. Start pageserver and postgres on top of it (should be called from repo root):
```sh
# Create repository in .neon with proper paths to binaries and data
# Create repository in .zenith with proper paths to binaries and data
# Later that would be responsibility of a package install script
> ./target/debug/neon_local init
initializing tenantid 9ef87a5bf0d92544f6fafeeb3239695c
@@ -105,17 +98,17 @@ pageserver init succeeded
# start pageserver and safekeeper
> ./target/debug/neon_local start
Starting pageserver at '127.0.0.1:64000' in '.neon'
Starting pageserver at '127.0.0.1:64000' in '.zenith'
Pageserver started
initializing for sk 1 for 7676
Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'
Starting safekeeper at '127.0.0.1:5454' in '.zenith/safekeepers/sk1'
Safekeeper started
# start postgres compute node
> ./target/debug/neon_local pg start main
Starting new postgres main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
Starting postgres node at 'host=127.0.0.1 port=55432 user=zenith_admin dbname=postgres'
# check list of running postgres instances
> ./target/debug/neon_local pg list
@@ -125,7 +118,7 @@ Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=pos
2. Now it is possible to connect to postgres and run some queries:
```text
> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
postgres=# CREATE TABLE t(key int primary key, value text);
CREATE TABLE
postgres=# insert into t values(1,1);
@@ -151,8 +144,8 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
# start postgres on that branch
> ./target/debug/neon_local pg start migration_check --branch-name migration_check
Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
Extracting base backup to create postgres instance: path=.zenith/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
Starting postgres node at 'host=127.0.0.1 port=55433 user=zenith_admin dbname=postgres'
# check the new list of running postgres instances
> ./target/debug/neon_local pg list
@@ -162,7 +155,7 @@ Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=pos
# this new postgres instance will have all the data from 'main' postgres,
# but all modifications would not affect data in original postgres
> psql -p55433 -h 127.0.0.1 -U cloud_admin postgres
> psql -p55433 -h 127.0.0.1 -U zenith_admin postgres
postgres=# select * from t;
key | value
-----+-------
@@ -173,7 +166,7 @@ postgres=# insert into t values(2,2);
INSERT 0 1
# check that the new change doesn't affect the 'main' postgres
> psql -p55432 -h 127.0.0.1 -U cloud_admin postgres
> psql -p55432 -h 127.0.0.1 -U zenith_admin postgres
postgres=# select * from t;
key | value
-----+-------
@@ -211,7 +204,7 @@ Same applies to certain spelling: i.e. we use MB to denote 1024 * 1024 bytes, wh
To get more familiar with this aspect, refer to:
- [Neon glossary](/docs/glossary.md)
- [PostgreSQL glossary](https://www.postgresql.org/docs/14/glossary.html)
- [PostgreSQL glossary](https://www.postgresql.org/docs/13/glossary.html)
- Other PostgreSQL documentation and sources (Neon fork sources can be found [here](https://github.com/neondatabase/postgres))
## Join the development

View File

@@ -18,5 +18,4 @@ serde_json = "1"
tar = "0.4"
tokio = { version = "1.17", features = ["macros", "rt", "rt-multi-thread"] }
tokio-postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
url = "2.2.2"
workspace_hack = { version = "0.1", path = "../workspace_hack" }

View File

@@ -22,7 +22,7 @@ Also `compute_ctl` spawns two separate service threads:
Usage example:
```sh
compute_ctl -D /var/db/postgres/compute \
-C 'postgresql://cloud_admin@localhost/postgres' \
-C 'postgresql://zenith_admin@localhost/postgres' \
-S /var/db/postgres/specs/current.json \
-b /usr/local/bin/postgres
```

View File

@@ -21,7 +21,7 @@
//! Usage example:
//! ```sh
//! compute_ctl -D /var/db/postgres/compute \
//! -C 'postgresql://cloud_admin@localhost/postgres' \
//! -C 'postgresql://zenith_admin@localhost/postgres' \
//! -S /var/db/postgres/specs/current.json \
//! -b /usr/local/bin/postgres
//! ```
@@ -33,7 +33,7 @@ use std::process::exit;
use std::sync::{Arc, RwLock};
use std::{thread, time::Duration};
use anyhow::{Context, Result};
use anyhow::Result;
use chrono::Utc;
use clap::Arg;
use log::{error, info};
@@ -45,7 +45,6 @@ use compute_tools::monitor::launch_monitor;
use compute_tools::params::*;
use compute_tools::pg_helpers::*;
use compute_tools::spec::*;
use url::Url;
fn main() -> Result<()> {
// TODO: re-use `utils::logging` later
@@ -117,22 +116,22 @@ fn main() -> Result<()> {
let pageserver_connstr = spec
.cluster
.settings
.find("neon.pageserver_connstring")
.find("zenith.page_server_connstring")
.expect("pageserver connstr should be provided");
let tenant = spec
.cluster
.settings
.find("neon.tenant_id")
.find("zenith.zenith_tenant")
.expect("tenant id should be provided");
let timeline = spec
.cluster
.settings
.find("neon.timeline_id")
.find("zenith.zenith_timeline")
.expect("tenant id should be provided");
let compute_state = ComputeNode {
start_time: Utc::now(),
connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
connstr: connstr.to_string(),
pgdata: pgdata.to_string(),
pgbin: pgbin.to_string(),
spec,

View File

@@ -1,3 +1,5 @@
use std::sync::Arc;
use anyhow::{anyhow, Result};
use log::error;
use postgres::Client;
@@ -21,8 +23,9 @@ pub fn create_writablity_check_data(client: &mut Client) -> Result<()> {
Ok(())
}
pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
pub async fn check_writability(compute: &Arc<ComputeNode>) -> Result<()> {
let connstr = &compute.connstr;
let (client, connection) = tokio_postgres::connect(connstr, NoTls).await?;
if client.is_closed() {
return Err(anyhow!("connection to postgres closed"));
}

View File

@@ -35,8 +35,7 @@ use crate::spec::*;
/// Compute node info shared across several `compute_ctl` threads.
pub struct ComputeNode {
pub start_time: DateTime<Utc>,
// Url type maintains proper escaping
pub connstr: url::Url,
pub connstr: String,
pub pgdata: String,
pub pgbin: String,
pub spec: ComputeSpec,
@@ -147,14 +146,8 @@ impl ComputeNode {
_ => format!("basebackup {} {} {}", &self.tenant, &self.timeline, lsn),
};
let copyreader = client.copy_out(basebackup_cmd.as_str())?;
// Read the archive directly from the `CopyOutReader`
//
// Set `ignore_zeros` so that unpack() reads all the Copy data and
// doesn't stop at the end-of-archive marker. Otherwise, if the server
// sends an Error after finishing the tarball, we will not notice it.
let mut ar = tar::Archive::new(copyreader);
ar.set_ignore_zeros(true);
ar.unpack(&self.pgdata)?;
self.metrics.basebackup_ms.store(
@@ -263,39 +256,11 @@ impl ComputeNode {
.unwrap_or_else(|| "5432".to_string());
wait_for_postgres(&mut pg, &port, pgdata_path)?;
// If connection fails,
// it may be the old node with `zenith_admin` superuser.
//
// In this case we need to connect with old `zenith_admin`name
// and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges.
let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
Err(e) => {
info!(
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
e
);
let mut zenith_admin_connstr = self.connstr.clone();
zenith_admin_connstr
.set_username("zenith_admin")
.map_err(|_| anyhow::anyhow!("invalid connstr"))?;
let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
drop(client);
// reconnect with connsting with expected name
Client::connect(self.connstr.as_str(), NoTls)?
}
Ok(client) => client,
};
let mut client = Client::connect(&self.connstr, NoTls)?;
handle_roles(&self.spec, &mut client)?;
handle_databases(&self.spec, &mut client)?;
handle_role_deletions(self, &mut client)?;
handle_grants(self, &mut client)?;
handle_grants(&self.spec, &mut client)?;
create_writablity_check_data(&mut client)?;
// 'Close' connection

View File

@@ -13,11 +13,11 @@ const MONITOR_CHECK_INTERVAL: u64 = 500; // milliseconds
// Spin in a loop and figure out the last activity time in the Postgres.
// Then update it in the shared state. This function never errors out.
// XXX: the only expected panic is at `RwLock` unwrap().
fn watch_compute_activity(compute: &ComputeNode) {
fn watch_compute_activity(compute: &Arc<ComputeNode>) {
// Suppose that `connstr` doesn't change
let connstr = compute.connstr.as_str();
let connstr = compute.connstr.clone();
// Define `client` outside of the loop to reuse existing connection if it's active.
let mut client = Client::connect(connstr, NoTls);
let mut client = Client::connect(&connstr, NoTls);
let timeout = time::Duration::from_millis(MONITOR_CHECK_INTERVAL);
info!("watching Postgres activity at {}", connstr);
@@ -32,7 +32,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
info!("connection to postgres closed, trying to reconnect");
// Connection is closed, reconnect and try again.
client = Client::connect(connstr, NoTls);
client = Client::connect(&connstr, NoTls);
continue;
}
@@ -43,7 +43,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
FROM pg_stat_activity
WHERE backend_type = 'client backend'
AND pid != pg_backend_pid()
AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
AND usename != 'zenith_admin';", // XXX: find a better way to filter other monitors?
&[],
);
let mut last_active = compute.state.read().unwrap().last_active;
@@ -93,7 +93,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
debug!("cannot connect to postgres: {}, retrying", e);
// Establish a new connection and try again.
client = Client::connect(connstr, NoTls);
client = Client::connect(&connstr, NoTls);
}
}
}

View File

@@ -1,4 +1,3 @@
use std::fmt::Write;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::net::{SocketAddr, TcpStream};
@@ -139,11 +138,9 @@ impl Role {
// Now we also support SCRAM-SHA-256 and to preserve compatibility
// we treat all encrypted_password as md5 unless they starts with SCRAM-SHA-256.
if pass.starts_with("SCRAM-SHA-256") {
write!(params, " PASSWORD '{pass}'")
.expect("String is documented to not to error during write operations");
params.push_str(&format!(" PASSWORD '{}'", pass));
} else {
write!(params, " PASSWORD 'md5{pass}'")
.expect("String is documented to not to error during write operations");
params.push_str(&format!(" PASSWORD 'md5{}'", pass));
}
} else {
params.push_str(" PASSWORD NULL");
@@ -161,8 +158,7 @@ impl Database {
/// it may require a proper quoting too.
pub fn to_pg_options(&self) -> String {
let mut params: String = self.options.as_pg_options();
write!(params, " OWNER {}", &self.owner.quote())
.expect("String is documented to not to error during write operations");
params.push_str(&format!(" OWNER {}", &self.owner.quote()));
params
}
@@ -248,20 +244,18 @@ pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()
bail!("Postgres exited unexpectedly with code {}", code);
}
// Check that we can open pid file first.
if let Ok(file) = File::open(&pid_path) {
let file = BufReader::new(file);
let last_line = file.lines().last();
if pid_path.exists() {
let file = BufReader::new(File::open(&pid_path)?);
let status = file
.lines()
.last()
.unwrap()
.unwrap_or_else(|_| "unknown".to_string());
let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
// Pid file could be there and we could read it, but it could be empty, for example.
if let Some(Ok(line)) = last_line {
let status = line.trim();
let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
// Now Postgres is ready to accept connections
if status == "ready" && can_connect {
break;
}
// Now Postgres is ready to accept connections
if status.trim() == "ready" && can_connect {
break;
}
}

View File

@@ -1,12 +1,10 @@
use std::path::Path;
use anyhow::{anyhow, Result};
use anyhow::Result;
use log::{info, log_enabled, warn, Level};
use postgres::error::SqlState;
use postgres::{Client, NoTls};
use postgres::Client;
use serde::Deserialize;
use crate::compute::ComputeNode;
use crate::config;
use crate::params::PG_HBA_ALL_MD5;
use crate::pg_helpers::*;
@@ -99,13 +97,18 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
// Process delta operations first
if let Some(ops) = &spec.delta_operations {
info!("processing role renames");
info!("processing delta operations on roles");
for op in ops {
match op.action.as_ref() {
// We do not check either role exists or not,
// Postgres will take care of it for us
"delete_role" => {
// no-op now, roles will be deleted at the end of configuration
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
warn!("deleting role '{}'", &op.name);
xact.execute(query.as_str(), &[])?;
}
// Renaming role drops its password, since role name is
// Renaming role drops its password, since tole name is
// used as a salt there. It is important that this role
// is recorded with a new `name` in the `roles` list.
// Follow up roles update will set the new password.
@@ -179,7 +182,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
xact.execute(query.as_str(), &[])?;
let grant_query = format!(
"GRANT pg_read_all_data, pg_write_all_data TO {}",
"grant pg_read_all_data, pg_write_all_data to {}",
name.quote()
);
xact.execute(grant_query.as_str(), &[])?;
@@ -194,70 +197,6 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
Ok(())
}
/// Reassign all dependent objects and delete requested roles.
pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
let spec = &node.spec;
// First, reassign all dependent objects to db owners.
if let Some(ops) = &spec.delta_operations {
info!("reassigning dependent objects of to-be-deleted roles");
for op in ops {
if op.action == "delete_role" {
reassign_owned_objects(node, &op.name)?;
}
}
}
// Second, proceed with role deletions.
let mut xact = client.transaction()?;
if let Some(ops) = &spec.delta_operations {
info!("processing role deletions");
for op in ops {
// We do not check either role exists or not,
// Postgres will take care of it for us
if op.action == "delete_role" {
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.quote());
warn!("deleting role '{}'", &op.name);
xact.execute(query.as_str(), &[])?;
}
}
}
Ok(())
}
// Reassign all owned objects in all databases to the owner of the database.
fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
for db in &node.spec.cluster.databases {
if db.owner != *role_name {
let mut connstr = node.connstr.clone();
// database name is always the last and the only component of the path
connstr.set_path(&db.name);
let mut client = Client::connect(connstr.as_str(), NoTls)?;
// This will reassign all dependent objects to the db owner
let reassign_query = format!(
"REASSIGN OWNED BY {} TO {}",
role_name.quote(),
db.owner.quote()
);
info!(
"reassigning objects owned by '{}' in db '{}' to '{}'",
role_name, &db.name, &db.owner
);
client.simple_query(&reassign_query)?;
// This now will only drop privileges of the role
let drop_query = format!("DROP OWNED BY {}", role_name.quote());
client.simple_query(&drop_query)?;
}
}
Ok(())
}
/// It follows mostly the same logic as `handle_roles()` excepting that we
/// does not use an explicit transactions block, since major database operations
/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
@@ -350,66 +289,23 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
Ok(())
}
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
let spec = &node.spec;
// Grant CREATE ON DATABASE to the database owner
// to allow clients create trusted extensions.
pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
info!("cluster spec grants:");
// We now have a separate `web_access` role to connect to the database
// via the web interface and proxy link auth. And also we grant a
// read / write all data privilege to every role. So also grant
// create to everyone.
// XXX: later we should stop messing with Postgres ACL in such horrible
// ways.
let roles = spec
.cluster
.roles
.iter()
.map(|r| r.name.quote())
.collect::<Vec<_>>();
for db in &spec.cluster.databases {
let dbname = &db.name;
let query: String = format!(
"GRANT CREATE ON DATABASE {} TO {}",
dbname.quote(),
roles.join(", ")
db.owner.quote()
);
info!("grant query {}", &query);
client.execute(query.as_str(), &[])?;
}
// Do some per-database access adjustments. We'd better do this at db creation time,
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
// atomically.
let mut db_connstr = node.connstr.clone();
for db in &node.spec.cluster.databases {
// database name is always the last and the only component of the path
db_connstr.set_path(&db.name);
let mut db_client = Client::connect(db_connstr.as_str(), NoTls)?;
// This will only change ownership on the schema itself, not the objects
// inside it. Without it owner of the `public` schema will be `cloud_admin`
// and database owner cannot do anything with it.
let alter_query = format!("ALTER SCHEMA public OWNER TO {}", db.owner.quote());
let res = db_client.simple_query(&alter_query);
if let Err(e) = res {
if e.code() == Some(&SqlState::INVALID_SCHEMA_NAME) {
// This is OK, db just don't have a `public` schema.
// Probably user dropped it manually.
info!("no 'public' schema found in the database {}", db.name);
} else {
// Something different happened, propagate the error
return Err(anyhow!(e));
}
}
}
Ok(())
}

View File

@@ -85,7 +85,7 @@
"vartype": "bool"
},
{
"name": "safekeepers",
"name": "wal_acceptors",
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
"vartype": "string"
},
@@ -150,7 +150,7 @@
"vartype": "integer"
},
{
"name": "neon.tenant_id",
"name": "zenith.zenith_tenant",
"value": "b0554b632bd4d547a63b86c3630317e8",
"vartype": "string"
},
@@ -160,13 +160,13 @@
"vartype": "integer"
},
{
"name": "neon.timeline_id",
"name": "zenith.zenith_timeline",
"value": "2414a61ffc94e428f14b5758fe308e13",
"vartype": "string"
},
{
"name": "shared_preload_libraries",
"value": "neon",
"value": "zenith",
"vartype": "string"
},
{
@@ -175,7 +175,7 @@
"vartype": "string"
},
{
"name": "neon.pageserver_connstring",
"name": "zenith.page_server_connstring",
"value": "host=127.0.0.1 port=6400",
"vartype": "string"
}

View File

@@ -28,7 +28,7 @@ mod pg_helpers_tests {
assert_eq!(
spec.cluster.settings.as_pg_settings(),
"fsync = off\nwal_level = replica\nhot_standby = on\nsafekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nneon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nneon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'neon'\nsynchronous_standby_names = 'walproposer'\nneon.pageserver_connstring = 'host=127.0.0.1 port=6400'"
"fsync = off\nwal_level = replica\nhot_standby = on\nwal_acceptors = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'\nwal_log_hints = on\nlog_connections = on\nshared_buffers = 32768\nport = 55432\nmax_connections = 100\nmax_wal_senders = 10\nlisten_addresses = '0.0.0.0'\nwal_sender_timeout = 0\npassword_encryption = md5\nmaintenance_work_mem = 65536\nmax_parallel_workers = 8\nmax_worker_processes = 8\nzenith.zenith_tenant = 'b0554b632bd4d547a63b86c3630317e8'\nmax_replication_slots = 10\nzenith.zenith_timeline = '2414a61ffc94e428f14b5758fe308e13'\nshared_preload_libraries = 'zenith'\nsynchronous_standby_names = 'walproposer'\nzenith.page_server_connstring = 'host=127.0.0.1 port=6400'"
);
}

View File

@@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
tar = "0.4.38"
tar = "0.4.33"
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
serde = { version = "1.0", features = ["derive"] }
serde_with = "1.12.0"

View File

@@ -148,9 +148,9 @@ impl PostgresNode {
// Read a few options from the config file
let context = format!("in config file {}", cfg_path_str);
let port: u16 = conf.parse_field("port", &context)?;
let timeline_id: ZTimelineId = conf.parse_field("neon.timeline_id", &context)?;
let tenant_id: ZTenantId = conf.parse_field("neon.tenant_id", &context)?;
let uses_wal_proposer = conf.get("safekeepers").is_some();
let timeline_id: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?;
let tenant_id: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?;
let uses_wal_proposer = conf.get("wal_acceptors").is_some();
// parse recovery_target_lsn, if any
let recovery_target_lsn: Option<Lsn> =
@@ -231,13 +231,8 @@ impl PostgresNode {
.context("page server 'basebackup' command failed")?;
// Read the archive directly from the `CopyOutReader`
//
// Set `ignore_zeros` so that unpack() reads all the Copy data and
// doesn't stop at the end-of-archive marker. Otherwise, if the server
// sends an Error after finishing the tarball, we will not notice it.
let mut ar = tar::Archive::new(copyreader);
ar.set_ignore_zeros(true);
ar.unpack(&self.pgdata())
tar::Archive::new(copyreader)
.unpack(&self.pgdata())
.context("extracting base backup failed")?;
Ok(())
@@ -279,8 +274,6 @@ impl PostgresNode {
conf.append("listen_addresses", &self.address.ip().to_string());
conf.append("port", &self.address.port().to_string());
conf.append("wal_keep_size", "0");
// walproposer panics when basebackup is invalid, it is pointless to restart in this case.
conf.append("restart_after_crash", "off");
// Configure the node to fetch pages from pageserver
let pageserver_connstr = {
@@ -303,11 +296,11 @@ impl PostgresNode {
// uses only needed variables namely host, port, user, password.
format!("postgresql://no_user:{}@{}:{}", password, host, port)
};
conf.append("shared_preload_libraries", "neon");
conf.append("shared_preload_libraries", "zenith");
conf.append_line("");
conf.append("neon.pageserver_connstring", &pageserver_connstr);
conf.append("neon.tenant_id", &self.tenant_id.to_string());
conf.append("neon.timeline_id", &self.timeline_id.to_string());
conf.append("zenith.page_server_connstring", &pageserver_connstr);
conf.append("zenith.zenith_tenant", &self.tenant_id.to_string());
conf.append("zenith.zenith_timeline", &self.timeline_id.to_string());
if let Some(lsn) = self.lsn {
conf.append("recovery_target_lsn", &lsn.to_string());
}
@@ -341,7 +334,7 @@ impl PostgresNode {
.map(|sk| format!("localhost:{}", sk.pg_port))
.collect::<Vec<String>>()
.join(",");
conf.append("safekeepers", &safekeepers);
conf.append("wal_acceptors", &safekeepers);
} else {
// We only use setup without safekeepers for tests,
// and don't care about data durability on pageserver,
@@ -352,6 +345,7 @@ impl PostgresNode {
// This isn't really a supported configuration, but can be useful for
// testing.
conf.append("synchronous_standby_names", "pageserver");
conf.append("zenith.callmemaybe_connstring", &self.connstr());
}
let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
@@ -498,7 +492,7 @@ impl PostgresNode {
"host={} port={} user={} dbname={}",
self.address.ip(),
self.address.port(),
"cloud_admin",
"zenith_admin",
"postgres"
)
}

View File

@@ -48,10 +48,6 @@ pub fn start_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
format!("--data-dir={}", etcd_data_dir.display()),
format!("--listen-client-urls={client_urls}"),
format!("--advertise-client-urls={client_urls}"),
// Set --quota-backend-bytes to keep the etcd virtual memory
// size smaller. Our test etcd clusters are very small.
// See https://github.com/etcd-io/etcd/issues/7910
"--quota-backend-bytes=100000000".to_string(),
])
.stdout(Stdio::from(etcd_stdout_file))
.stderr(Stdio::from(etcd_stderr_file))
@@ -77,7 +73,7 @@ pub fn stop_etcd_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
let etcd_pid_file_path = etcd_pid_file_path(env);
let pid = Pid::from_raw(read_pidfile(&etcd_pid_file_path).with_context(|| {
format!(
"Failed to read etcd pid file at {}",
"Failed to read etcd pid filea at {}",
etcd_pid_file_path.display()
)
})?);

View File

@@ -49,12 +49,3 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
cmd
}
}
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
if let Ok(value) = std::env::var(env_key) {
cmd = cmd.env(env_key, value);
}
}
cmd
}

View File

@@ -15,15 +15,15 @@ use std::process::{Command, Stdio};
use utils::{
auth::{encode_from_key_file, Claims, Scope},
postgres_backend::AuthType,
zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
};
use crate::safekeeper::SafekeeperNode;
//
// This data structures represents neon_local CLI config
// This data structures represents zenith CLI config
//
// It is deserialized from the .neon/config file, or the config file passed
// It is deserialized from the .zenith/config file, or the config file passed
// to 'zenith init --config=<path>' option. See control_plane/simple.conf for
// an example.
//
@@ -34,8 +34,8 @@ pub struct LocalEnv {
// compute nodes).
//
// This is not stored in the config file. Rather, this is the path where the
// config file itself is. It is read from the NEON_REPO_DIR env variable or
// '.neon' if not given.
// config file itself is. It is read from the ZENITH_REPO_DIR env variable or
// '.zenith' if not given.
#[serde(skip)]
pub base_data_dir: PathBuf,
@@ -119,24 +119,16 @@ impl EtcdBroker {
}
pub fn comma_separated_endpoints(&self) -> String {
self.broker_endpoints
.iter()
.map(|url| {
// URL by default adds a '/' path at the end, which is not what etcd CLI wants.
let url_string = url.as_str();
if url_string.ends_with('/') {
&url_string[0..url_string.len() - 1]
} else {
url_string
}
})
.fold(String::new(), |mut comma_separated_urls, url| {
self.broker_endpoints.iter().map(Url::as_str).fold(
String::new(),
|mut comma_separated_urls, url| {
if !comma_separated_urls.is_empty() {
comma_separated_urls.push(',');
}
comma_separated_urls.push_str(url);
comma_separated_urls
})
},
)
}
}
@@ -144,7 +136,7 @@ impl EtcdBroker {
#[serde(default)]
pub struct PageServerConf {
// node id
pub id: NodeId,
pub id: ZNodeId,
// Pageserver connection settings
pub listen_pg_addr: String,
pub listen_http_addr: String,
@@ -159,7 +151,7 @@ pub struct PageServerConf {
impl Default for PageServerConf {
fn default() -> Self {
Self {
id: NodeId(0),
id: ZNodeId(0),
listen_pg_addr: String::new(),
listen_http_addr: String::new(),
auth_type: AuthType::Trust,
@@ -171,25 +163,19 @@ impl Default for PageServerConf {
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[serde(default)]
pub struct SafekeeperConf {
pub id: NodeId,
pub id: ZNodeId,
pub pg_port: u16,
pub http_port: u16,
pub sync: bool,
pub remote_storage: Option<String>,
pub backup_threads: Option<u32>,
pub auth_enabled: bool,
}
impl Default for SafekeeperConf {
fn default() -> Self {
Self {
id: NodeId(0),
id: ZNodeId(0),
pg_port: 0,
http_port: 0,
sync: true,
remote_storage: None,
backup_threads: None,
auth_enabled: false,
}
}
}
@@ -339,7 +325,7 @@ impl LocalEnv {
pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
// Currently, the user first passes a config file with 'zenith init --config=<path>'
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
// to .neon/config. TODO: We lose any formatting and comments along the way, which is
// to .zenith/config. TODO: We lose any formatting and comments along the way, which is
// a bit sad.
let mut conf_content = r#"# This file describes a locale deployment of the page server
# and safekeeeper node. It is read by the 'zenith' command-line
@@ -391,7 +377,6 @@ impl LocalEnv {
base_path != Path::new(""),
"repository base path is missing"
);
ensure!(
!base_path.exists(),
"directory '{}' already exists. Perhaps already initialized?",
@@ -403,6 +388,16 @@ impl LocalEnv {
self.pg_distrib_dir.display()
);
}
for binary in ["pageserver", "safekeeper"] {
if !self.zenith_distrib_dir.join(binary).exists() {
bail!(
"Can't find binary '{}' in zenith distrib dir '{}'",
binary,
self.zenith_distrib_dir.display()
);
}
}
for binary in ["pageserver", "safekeeper"] {
if !self.zenith_distrib_dir.join(binary).exists() {
bail!(
@@ -411,6 +406,12 @@ impl LocalEnv {
);
}
}
if !self.pg_distrib_dir.join("bin/postgres").exists() {
bail!(
"Can't find postgres binary at {}",
self.pg_distrib_dir.display()
);
}
fs::create_dir(&base_path)?;
@@ -467,9 +468,9 @@ impl LocalEnv {
}
fn base_path() -> PathBuf {
match std::env::var_os("NEON_REPO_DIR") {
match std::env::var_os("ZENITH_REPO_DIR") {
Some(val) => PathBuf::from(val),
None => PathBuf::from(".neon"),
None => PathBuf::from(".zenith"),
}
}

View File

@@ -18,12 +18,12 @@ use thiserror::Error;
use utils::{
connstring::connection_address,
http::error::HttpErrorBody,
zid::{NodeId, ZTenantId, ZTimelineId},
zid::{ZNodeId, ZTenantId, ZTimelineId},
};
use crate::local_env::{LocalEnv, SafekeeperConf};
use crate::storage::PageServerNode;
use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
use crate::{fill_rust_env_vars, read_pidfile};
#[derive(Error, Debug)]
pub enum SafekeeperHttpError {
@@ -65,7 +65,7 @@ impl ResponseErrorMessageExt for Response {
//
#[derive(Debug)]
pub struct SafekeeperNode {
pub id: NodeId,
pub id: ZNodeId,
pub conf: SafekeeperConf,
@@ -100,7 +100,7 @@ impl SafekeeperNode {
.unwrap()
}
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: ZNodeId) -> PathBuf {
env.safekeeper_data_dir(format!("sk{}", sk_id).as_ref())
}
@@ -143,19 +143,6 @@ impl SafekeeperNode {
if let Some(prefix) = self.env.etcd_broker.broker_etcd_prefix.as_deref() {
cmd.args(&["--broker-etcd-prefix", prefix]);
}
if let Some(threads) = self.conf.backup_threads {
cmd.args(&["--backup-threads", threads.to_string().as_ref()]);
}
if let Some(ref remote_storage) = self.conf.remote_storage {
cmd.args(&["--remote-storage", remote_storage]);
}
if self.conf.auth_enabled {
cmd.arg("--auth-validation-public-key-path");
// PathBuf is better be passed as is, not via `String`.
cmd.arg(self.env.base_data_dir.join("auth_public_key.pem"));
}
fill_aws_secrets_vars(&mut cmd);
if !cmd.status()?.success() {
bail!(
@@ -299,7 +286,7 @@ impl SafekeeperNode {
&self,
tenant_id: ZTenantId,
timeline_id: ZTimelineId,
peer_ids: Vec<NodeId>,
peer_ids: Vec<ZNodeId>,
) -> Result<()> {
Ok(self
.http_request(

View File

@@ -1,8 +1,6 @@
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufReader, Write};
use std::io::Write;
use std::net::TcpStream;
use std::num::NonZeroU64;
use std::path::PathBuf;
use std::process::Command;
use std::time::Duration;
@@ -13,7 +11,6 @@ use nix::errno::Errno;
use nix::sys::signal::{kill, Signal};
use nix::unistd::Pid;
use pageserver::http::models::{TenantConfigRequest, TenantCreateRequest, TimelineCreateRequest};
use pageserver::tenant_mgr::TenantInfo;
use pageserver::timelines::TimelineInfo;
use postgres::{Config, NoTls};
use reqwest::blocking::{Client, RequestBuilder, Response};
@@ -28,7 +25,8 @@ use utils::{
};
use crate::local_env::LocalEnv;
use crate::{fill_aws_secrets_vars, fill_rust_env_vars, read_pidfile};
use crate::{fill_rust_env_vars, read_pidfile};
use pageserver::tenant_mgr::TenantInfo;
#[derive(Error, Debug)]
pub enum PageserverHttpError {
@@ -39,12 +37,6 @@ pub enum PageserverHttpError {
Response(String),
}
impl From<anyhow::Error> for PageserverHttpError {
fn from(e: anyhow::Error) -> Self {
Self::Response(e.to_string())
}
}
type Result<T> = result::Result<T, PageserverHttpError>;
pub trait ResponseErrorMessageExt: Sized {
@@ -418,15 +410,6 @@ impl PageServerNode {
.map(|x| x.parse::<usize>())
.transpose()?,
pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
walreceiver_connect_timeout: settings
.get("walreceiver_connect_timeout")
.map(|x| x.to_string()),
lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
max_lsn_wal_lag: settings
.get("max_lsn_wal_lag")
.map(|x| x.parse::<NonZeroU64>())
.transpose()
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
})
.send()?
.error_from_body()?
@@ -450,41 +433,22 @@ impl PageServerNode {
tenant_id,
checkpoint_distance: settings
.get("checkpoint_distance")
.map(|x| x.parse::<u64>())
.transpose()
.context("Failed to parse 'checkpoint_distance' as an integer")?,
.map(|x| x.parse::<u64>().unwrap()),
compaction_target_size: settings
.get("compaction_target_size")
.map(|x| x.parse::<u64>())
.transpose()
.context("Failed to parse 'compaction_target_size' as an integer")?,
.map(|x| x.parse::<u64>().unwrap()),
compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
compaction_threshold: settings
.get("compaction_threshold")
.map(|x| x.parse::<usize>())
.transpose()
.context("Failed to parse 'compaction_threshold' as an integer")?,
.map(|x| x.parse::<usize>().unwrap()),
gc_horizon: settings
.get("gc_horizon")
.map(|x| x.parse::<u64>())
.transpose()
.context("Failed to parse 'gc_horizon' as an integer")?,
.map(|x| x.parse::<u64>().unwrap()),
gc_period: settings.get("gc_period").map(|x| x.to_string()),
image_creation_threshold: settings
.get("image_creation_threshold")
.map(|x| x.parse::<usize>())
.transpose()
.context("Failed to parse 'image_creation_threshold' as non zero integer")?,
.map(|x| x.parse::<usize>().unwrap()),
pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
walreceiver_connect_timeout: settings
.get("walreceiver_connect_timeout")
.map(|x| x.to_string()),
lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
max_lsn_wal_lag: settings
.get("max_lsn_wal_lag")
.map(|x| x.parse::<NonZeroU64>())
.transpose()
.context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
})
.send()?
.error_from_body()?;
@@ -528,54 +492,13 @@ impl PageServerNode {
Ok(timeline_info_response)
}
/// Import a basebackup prepared using either:
/// a) `pg_basebackup -F tar`, or
/// b) The `fullbackup` pageserver endpoint
///
/// # Arguments
/// * `tenant_id` - tenant to import into. Created if not exists
/// * `timeline_id` - id to assign to imported timeline
/// * `base` - (start lsn of basebackup, path to `base.tar` file)
/// * `pg_wal` - if there's any wal to import: (end lsn, path to `pg_wal.tar`)
pub fn timeline_import(
&self,
tenant_id: ZTenantId,
timeline_id: ZTimelineId,
base: (Lsn, PathBuf),
pg_wal: Option<(Lsn, PathBuf)>,
) -> anyhow::Result<()> {
let mut client = self.pg_connection_config.connect(NoTls).unwrap();
// Init base reader
let (start_lsn, base_tarfile_path) = base;
let base_tarfile = File::open(base_tarfile_path)?;
let mut base_reader = BufReader::new(base_tarfile);
// Init wal reader if necessary
let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {
let wal_tarfile = File::open(wal_tarfile_path)?;
let wal_reader = BufReader::new(wal_tarfile);
(end_lsn, Some(wal_reader))
} else {
(start_lsn, None)
};
// Import base
let import_cmd =
format!("import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
let mut writer = client.copy_in(&import_cmd)?;
io::copy(&mut base_reader, &mut writer)?;
writer.finish()?;
// Import wal if necessary
if let Some(mut wal_reader) = wal_reader {
let import_cmd = format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
let mut writer = client.copy_in(&import_cmd)?;
io::copy(&mut wal_reader, &mut writer)?;
writer.finish()?;
}
Ok(())
}
}
fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
for env_key in ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"] {
if let Ok(value) = std::env::var(env_key) {
cmd = cmd.env(env_key, value);
}
}
cmd
}

View File

@@ -6,7 +6,7 @@
- [docker.md](docker.md) — Docker images and building pipeline.
- [glossary.md](glossary.md) — Glossary of all the terms used in codebase.
- [multitenancy.md](multitenancy.md) — how multitenancy is organized in the pageserver and Zenith CLI.
- [sourcetree.md](sourcetree.md) — Overview of the source tree layout.
- [sourcetree.md](sourcetree.md) — Overview of the source tree layeout.
- [pageserver/README.md](/pageserver/README.md) — pageserver overview.
- [postgres_ffi/README.md](/libs/postgres_ffi/README.md) — Postgres FFI overview.
- [test_runner/README.md](/test_runner/README.md) — tests infrastructure overview.

View File

@@ -188,7 +188,7 @@ Not currently committed but proposed:
3. Prefetching
- Why?
As far as pages in Zenith are loaded on demand, to reduce node startup time
and also speedup some massive queries we need some mechanism for bulk loading to
and also sppedup some massive queries we need some mechanism for bulk loading to
reduce page request round-trip overhead.
Currently Postgres is supporting prefetching only for bitmap scan.

View File

@@ -2,7 +2,7 @@
### Authentication
### Backpressure
### Backpresssure
Backpressure is used to limit the lag between pageserver and compute node or WAL service.
@@ -115,7 +115,7 @@ Neon safekeeper LSNs. For more check [safekeeper/README_PROTO.md](/safekeeper/RE
* `CommitLSN`: position in WAL confirmed by quorum safekeepers.
* `RestartLSN`: position in WAL confirmed by all safekeepers.
* `FlushLSN`: part of WAL persisted to the disk by safekeeper.
* `VCL`: the largest LSN for which we can guarantee availability of all prior records.
* `VCL`: the largerst LSN for which we can guarantee availablity of all prior records.
Neon pageserver LSNs:
* `last_record_lsn` - the end of last processed WAL record.

View File

@@ -6,7 +6,7 @@ Zenith supports multitenancy. One pageserver can serve multiple tenants at once.
### Tenants in other commands
By default during `zenith init` new tenant is created on the pageserver. Newly created tenant's id is saved to cli config, so other commands can use it automatically if no direct argument `--tenantid=<tenantid>` is provided. So generally tenantid more frequently appears in internal pageserver interface. Its commands take tenantid argument to distinguish to which tenant operation should be applied. CLI support creation of new tenants.
By default during `zenith init` new tenant is created on the pageserver. Newly created tenant's id is saved to cli config, so other commands can use it automatically if no direct arugment `--tenantid=<tenantid>` is provided. So generally tenantid more frequently appears in internal pageserver interface. Its commands take tenantid argument to distinguish to which tenant operation should be applied. CLI support creation of new tenants.
Examples for cli:

View File

@@ -77,7 +77,7 @@ Upon storage node restart recent WAL files are applied to appropriate pages and
### **Checkpointing**
No such mechanism is needed. Or we may look at the storage node as at kind of continuous checkpointer.
No such mechanism is needed. Or we may look at the storage node as at kind of continuous chekpointer.
### **Full page writes (torn page protection)**
@@ -111,13 +111,13 @@ Since we are storing page diffs of variable sizes there is no structural depende
### **Chunk metadata**
Chunk metadata is a file lies in chunk directory that stores info about current snapshots and PITR regions. Chunk should always consult this data when merging SSTables and applying delete markers.
Chunk metadata is a file lies in chunk directory that stores info about current snapshots and PITR regions. Chunck should always consult this data when merging SSTables and applying delete markers.
### **Chunk splitting**
*(NB: following paragraph is about how to avoid page splitting)*
When chunks hits some soft storage limit (let's say 100Gb) it should be split in half and global metadata about chunk boundaries should be updated. Here i assume that chunk split is a local operation happening on single node. Process of chink splitting should look like following:
When chunks hits some soft storage limit (let's say 100Gb) it should be split in half and global matadata about chunk boundaries should be updated. Here i assume that chunk split is a local operation happening on single node. Process of chink splitting should look like following:
1. Find separation key and spawn two new chunks with [lo, mid) [mid, hi) boundaries.
@@ -166,7 +166,7 @@ Multi-tenant storage makes sense even on a laptop, when you work with different
Few databases are stored in one chunk, replicated three times
- When database can't fit into one storage node it can occupy lots of chunks that were split while database was growing. Chunk placement on nodes is controlled by us with some automatization, but we always may manually move chunks around the cluster.
- When database can't fit into one storage node it can occupy lots of chunks that were split while database was growing. Chunk placement on nodes is controlled by us with some automatization, but we alway may manually move chunks around the cluster.
<img width="940" alt="Screenshot_2021-02-22_at_16 49 10" src="https://user-images.githubusercontent.com/284219/108729815-fb071e00-753b-11eb-86e0-be6703e47d82.png">

View File

@@ -123,7 +123,7 @@ Show currently attached storages. For example:
> zenith storage list
NAME USED TYPE OPTIONS PATH
local 5.1G zenith-local /opt/zenith/store/local
local.compr 20.4G zenith-local compression=on /opt/zenith/store/local.compr
local.compr 20.4G zenith-local comression=on /opt/zenith/store/local.compr
zcloud 60G zenith-remote zenith.tech/stas/mystore
s3tank 80G S3
```
@@ -136,9 +136,9 @@ s3tank 80G S3
## pg
Manages postgres data directories and can start postgres instances with proper configuration. An experienced user may avoid using that (except pg create) and configure/run postgres by themselves.
Manages postgres data directories and can start postgreses with proper configuration. An experienced user may avoid using that (except pg create) and configure/run postgres by themself.
Pg is a term for a single postgres running on some data. I'm trying to avoid separation of datadir management and postgres instance management -- both that concepts bundled here together.
Pg is a term for a single postgres running on some data. I'm trying to avoid here separation of datadir management and postgres instance management -- both that concepts bundled here together.
**zenith pg create** [--no-start --snapshot --cow] -s storage-name -n pgdata

View File

@@ -31,7 +31,7 @@ Ideally, just one binary that incorporates all elements we need.
#### Components:
- **zenith-CLI** - interface for end-users. Turns commands to REST requests and handles responses to show them in a user-friendly way.
- **zenith-CLI** - interface for end-users. Turns commands to REST requests and handles responces to show them in a user-friendly way.
CLI proposal is here https://github.com/libzenith/rfcs/blob/003-laptop-cli.md/003-laptop-cli.md
WIP code is here: https://github.com/libzenith/postgres/tree/main/pageserver/src/bin/cli

View File

@@ -25,9 +25,9 @@ To make changes in the catalog you need to run compute nodes
zenith start /home/pipedpiper/northwind:main -- starts a compute instance
zenith start zenith://zenith.tech/northwind:main -- starts a compute instance in the cloud
-- you can start a compute node against any hash or branch
zenith start /home/pipedpiper/northwind:experimental --port 8008 -- start another compute instance (on different port)
zenith start /home/pipedpiper/northwind:experimental --port 8008 -- start anothe compute instance (on different port)
-- you can start a compute node against any hash or branch
zenith start /home/pipedpiper/northwind:<hash> --port 8009 -- start another compute instance (on different port)
zenith start /home/pipedpiper/northwind:<hash> --port 8009 -- start anothe compute instance (on different port)
-- After running some DML you can run
-- zenith status and see how there are two WAL streams one on top of

View File

@@ -121,7 +121,7 @@ repository, launch an instance on the same branch in both clones, and
later try to push/pull between them? Perhaps create a new timeline
every time you start up an instance? Then you would detect that the
timelines have diverged. That would match with the "epoch" concept
that we have in the WAL safekeeper
that we have in the WAL safekeepr
### zenith checkout/commit

View File

@@ -2,9 +2,9 @@ While working on export/import commands, I understood that they fit really well
We may think about backups as snapshots in a different format (i.e plain pgdata format, basebackup tar format, WAL-G format (if they want to support it) and so on). They use same storage API, the only difference is the code that packs/unpacks files.
Even if zenith aims to maintains durability using it's own snapshots, backups will be useful for uploading data from postgres to zenith.
Even if zenith aims to maintains durability using it's own snapshots, backups will be useful for uploading data from postges to zenith.
So here is an attempt to design consistent CLI for different usage scenarios:
So here is an attemt to design consistent CLI for diferent usage scenarios:
#### 1. Start empty pageserver.
That is what we have now.

View File

@@ -3,7 +3,7 @@
GetPage@LSN can be called with older LSNs, and the page server needs
to be able to reconstruct older page versions. That's needed for
having read-only replicas that lag behind the primary, or that are
"anchored" at an older LSN, and internally in the page server when you
"anchored" at an older LSN, and internally in the page server whne you
branch at an older point in time. How do you do that?
For now, I'm not considering incremental snapshots at all. I don't
@@ -192,7 +192,7 @@ for a particular relation readily available alongside the snapshot
files, and you don't need to track what snapshot LSNs exist
separately.
(If we wanted to minimize the number of files, you could include the
(If we wanted to minize the number of files, you could include the
snapshot @300 and the WAL between 200 and 300 in the same file, but I
feel it's probably better to keep them separate)

View File

@@ -121,7 +121,7 @@ The properties of s3 that we depend on are:
list objects
streaming read of entire object
read byte range from object
streaming write new object (may use multipart upload for better reliability)
streaming write new object (may use multipart upload for better relialibity)
delete object (that should not disrupt an already-started read).
Uploaded files, restored backups, or s3 buckets controlled by users could contain malicious content. We should always validate that objects contain the content theyre supposed to. Incorrect, Corrupt or malicious-looking contents should cause software (cloud tools, pageserver) to fail gracefully.

View File

@@ -40,7 +40,7 @@ b) overwrite older pages with the newer pages -- if there is no replica we proba
I imagine that newly created pages would just be added to the back of PageStore (again in queue-like fashion) and this way there wouldn't be any meaningful ordering inside of that queue. When we are forming a new incremental snapshot we may prohibit any updates to the current set of pages in PageStore (giving up on single page version rule) and cut off that whole set when snapshot creation is complete.
With option b) we can also treat PageStor as an uncompleted incremental snapshot.
With option b) we can also treat PageStor as an uncompleted increamental snapshot.
### LocalStore
@@ -123,7 +123,7 @@ As far as I understand Bookfile/Aversion addresses versioning and serialization
As for exact data that should go to snapshots I think it is the following for each snapshot:
* format version number
* set of key/values to interpret content (e.g. is page compression enabled, is that a full or incremental snapshot, previous snapshot id, is there WAL at the end on file, etc) -- it is up to a reader to decide what to do if some keys are missing or some unknown key are present. If we add something backward compatible to the file we can keep the version number.
* set of key/values to interpret content (e.g. is page compression enabled, is that a full or incremental snapshot, previous snapshot id, is there WAL at the end on file, etc) -- it is up to a reader to decide what to do if some keys are missing or some unknow key are present. If we add something backward compatible to the file we can keep the version number.
* array of [BuffTag, corresponding offset in file] for pages -- IIUC that is analogous to ToC in Bookfile
* array of [(BuffTag, LSN), corresponding offset in file] for the WAL records
* pages, one by one
@@ -131,7 +131,7 @@ As for exact data that should go to snapshots I think it is the following for ea
It is also important to be able to load metadata quickly since it would be one of the main factors impacting the time of page server start. E.g. if would store/cache about 10TB of data per page server, the size of uncompressed page references would be about 30GB (10TB / ( 8192 bytes page size / ( ~18 bytes per ObjectTag + 8 bytes offset in the file))).
1) Since our ToC/array of entries can be sorted by ObjectTag we can store the whole BufferTag only when relation_id is changed and store only delta-encoded offsets for a given relation. That would reduce the average per-page metadata size to something less than 4 bytes instead of 26 (assuming that pages would follow the same order and offset deltas would be small).
1) Since our ToC/array of entries can be sorted by ObjectTag we can store the whole BufferTag only when realtion_id is changed and store only delta-encoded offsets for a given relation. That would reduce the average per-page metadata size to something less than 4 bytes instead of 26 (assuming that pages would follow the same order and offset delatas would be small).
2) It makes sense to keep ToC at the beginning of the file to avoid extra seeks to locate it. Doesn't matter too much with the local files but matters on S3 -- if we are accessing a lot of ~1Gb files with the size of metadata ~ 1Mb then the time to transfer this metadata would be comparable with access latency itself (which is about a half of a second). So by slurping metadata with one read of file header instead of N reads we can improve the speed of page server start by this N factor.
I think both of that optimizations can be done later, but that is something to keep in mind when we are designing our storage serialization routines.

View File

@@ -7,13 +7,13 @@ and e.g. prevents electing two proposers with the same term -- it is actually
called `term` in the code. The second, called `epoch`, reflects progress of log
receival and this might lag behind `term`; safekeeper switches to epoch `n` when
it has received all committed log records from all `< n` terms. This roughly
corresponds to proposed in
correspones to proposed in
https://github.com/zenithdb/rfcs/pull/3/files
This makes our biggest our difference from Raft. In Raft, every log record is
stamped with term in which it was generated; while we essentially store in
stamped with term in which it was generated; while we essentialy store in
`epoch` only the term of the highest record on this safekeeper -- when we know
it -- because during recovery generally we don't, and `epoch` is bumped directly
to the term of the proposer who performs the recovery when it is finished. It is

View File

@@ -124,7 +124,7 @@ Each storage node can subscribe to the relevant sets of keys and maintain a loca
### Safekeeper address discovery
During the startup safekeeper should publish the address he is listening on as the part of `{"sk_#{sk_id}" => ip_address}`. Then the pageserver can resolve `sk_#{sk_id}` to the actual address. This way it would work both locally and in the cloud setup. Safekeeper should have `--advertised-address` CLI option so that we can listen on e.g. 0.0.0.0 but advertise something more useful.
During the startup safekeeper should publish the address he is listening on as the part of `{"sk_#{sk_id}" => ip_address}`. Then the pageserver can resolve `sk_#{sk_id}` to the actual address. This way it would work both locally and in the cloud setup. Safekeeper should have `--advertised-address` CLI option so that we can listen on e.g. 0.0.0.0 but advertize something more useful.
### Safekeeper behavior
@@ -195,7 +195,7 @@ sequenceDiagram
PS1->>SK1: start replication
```
#### Behaviour of services during typical operations
#### Behavour of services during typical operations
```mermaid
sequenceDiagram
@@ -250,7 +250,7 @@ sequenceDiagram
PS2->>M: Register downloaded timeline
PS2->>M: Get safekeepers for timeline, subscribe to changes
PS2->>SK1: Start replication to catch up
note over O: PS2 caught up, time to switch compute
note over O: PS2 catched up, time to switch compute
O->>C: Restart compute with new pageserver url in config
note over C: Wal push is restarted
loop request pages

View File

@@ -49,7 +49,7 @@ topics.
RFC lifecycle:
- Should be submitted in a pull request with and full RFC text in a committed markdown file and copy of the Summary and Motivation sections also included in the PR body.
- Should be submitted in a pull request with and full RFC text in a commited markdown file and copy of the Summary and Motivation sections also included in the PR body.
- RFC should be published for review before most of the actual code is written. This isnt a strict rule, dont hesitate to experiment and build a POC in parallel with writing an RFC.
- Add labels to the PR in the same manner as you do Issues. Example TBD
- Request the review from your peers. Reviewing the RFCs from your peers is a priority, same as reviewing the actual code.

View File

@@ -22,8 +22,8 @@ so we don't want to give users access to the functionality that we don't think i
* pageserver - calculate the size consumed by a timeline and add it to the feedback message.
* safekeeper - pass feedback message from pageserver to compute.
* compute - receive feedback message, enforce size limit based on GUC `neon.max_cluster_size`.
* console - set and update `neon.max_cluster_size` setting
* compute - receive feedback message, enforce size limit based on GUC `zenith.max_cluster_size`.
* console - set and update `zenith.max_cluster_size` setting
## Proposed implementation
@@ -36,12 +36,12 @@ This is how the `LOGICAL_TIMELINE_SIZE` metric is implemented in the pageserver.
Alternatively, we could count only relation data. As in pg_database_size().
This approach is somewhat more user-friendly because it is the data that is really affected by the user.
On the other hand, it puts us in a weaker position than other services, i.e., RDS.
We will need to refactor the timeline_size counter or add another counter to implement it.
We will need to refactor the timeline_size counter or add another counter to implement it.
Timeline size is updated during wal digestion. It is not versioned and is valid at the last_received_lsn moment.
Then this size should be reported to compute node.
`current_timeline_size` value is included in the walreceiver's custom feedback message: `ReplicationFeedback.`
`current_timeline_size` value is included in the walreceiver's custom feedback message: `ZenithFeedback.`
(PR about protocol changes https://github.com/zenithdb/zenith/pull/1037).
@@ -49,7 +49,7 @@ This message is received by the safekeeper and propagated to compute node as a p
Finally, when compute node receives the `current_timeline_size` from safekeeper (or from pageserver directly), it updates the global variable.
And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > neon.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
And then every zenith_extend() operation checks if limit is reached `(current_timeline_size > zenith.max_cluster_size)` and throws `ERRCODE_DISK_FULL` error if so.
(see Postgres error codes [https://www.postgresql.org/docs/devel/errcodes-appendix.html](https://www.postgresql.org/docs/devel/errcodes-appendix.html))
TODO:
@@ -64,16 +64,16 @@ We should warn users if the limit is soon to be reached.
### **Reliability, failure modes and corner cases**
1. `current_timeline_size` is valid at the last received and digested by pageserver lsn.
If pageserver lags behind compute node, `current_timeline_size` will lag too. This lag can be tuned using backpressure, but it is not expected to be 0 all the time.
So transactions that happen in this lsn range may cause limit overflow. Especially operations that generate (i.e., CREATE DATABASE) or free (i.e., TRUNCATE) a lot of data pages while generating a small amount of WAL. Are there other operations like this?
Currently, CREATE DATABASE operations are restricted in the console. So this is not an issue.
### **Security implications**
We treat compute as an untrusted component. That's why we try to isolate it with secure container runtime or a VM.
Malicious users may change the `neon.max_cluster_size`, so we need an extra size limit check.
Malicious users may change the `zenith.max_cluster_size`, so we need an extra size limit check.
To cover this case, we also monitor the compute node size in the console.

View File

@@ -23,7 +23,7 @@ gc_horizon = '67108864'
max_file_descriptors = '100'
# initial superuser role name to use when creating a new tenant
initial_superuser_name = 'cloud_admin'
initial_superuser_name = 'zenith_admin'
broker_etcd_prefix = 'neon'
broker_endpoints = ['some://etcd']
@@ -31,14 +31,14 @@ broker_endpoints = ['some://etcd']
# [remote_storage]
```
The config above shows default values for all basic pageserver settings, besides `broker_endpoints`: that one has to be set by the user,
The config above shows default values for all basic pageserver settings, besides `broker_endpoints`: that one has to be set by the user,
see the corresponding section below.
Pageserver uses default values for all files that are missing in the config, so it's not a hard error to leave the config blank.
Yet, it validates the config values it can (e.g. postgres install dir) and errors if the validation fails, refusing to start.
Note the `[remote_storage]` section: it's a [table](https://toml.io/en/v1.0.0#table) in TOML specification and
- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'cloud_admin'`
- either has to be placed in the config after the table-less values such as `initial_superuser_name = 'zenith_admin'`
- or can be placed anywhere if rewritten in identical form as [inline table](https://toml.io/en/v1.0.0#inline-table): `remote_storage = {foo = 2}`
@@ -54,7 +54,7 @@ Note that TOML distinguishes between strings and integers, the former require si
A list of endpoints (etcd currently) to connect and pull the information from.
Mandatory, does not have a default, since requires etcd to be started as a separate process,
and its connection url should be specified separately.
and its connection url should be specified separately.
#### broker_etcd_prefix
@@ -105,31 +105,17 @@ Interval at which garbage collection is triggered. Default is 100 s.
#### image_creation_threshold
L0 delta layer threshold for L1 image layer creation. Default is 3.
L0 delta layer threshold for L1 iamge layer creation. Default is 3.
#### pitr_interval
WAL retention duration for PITR branching. Default is 30 days.
#### walreceiver_connect_timeout
Time to wait to establish the wal receiver connection before failing
#### lagging_wal_timeout
Time the pageserver did not get any WAL updates from safekeeper (if any).
Avoids lagging pageserver preemptively by forcing to switch it from stalled connections.
#### max_lsn_wal_lag
Difference between Lsn values of the latest available WAL on safekeepers: if currently connected safekeeper starts to lag too long and too much,
it gets swapped to the different one.
#### initial_superuser_name
Name of the initial superuser role, passed to initdb when a new tenant
is initialized. It doesn't affect anything after initialization. The
default is Note: The default is 'cloud_admin', and the console
default is Note: The default is 'zenith_admin', and the console
depends on that, so if you change it, bad things will happen.
#### page_cache_size
@@ -154,7 +140,7 @@ The default distrib dir is `./tmp_install/`.
#### workdir (-D)
A directory in the file system, where pageserver will store its files.
The default is `./.neon/`.
The default is `./.zenith/`.
This parameter has a special CLI alias (`-D`) and can not be overridden with regular `-c` way.
@@ -199,7 +185,7 @@ If no IAM bucket access is used during the remote storage usage, use the `AWS_AC
###### General remote storage configuration
Pageserver allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.
Pagesever allows only one remote storage configured concurrently and errors if parameters from multiple different remote configurations are used.
No default values are used for the remote storage configuration parameters.
Besides, there are parameters common for all types of remote storage that can be configured, those have defaults:

View File

@@ -10,7 +10,7 @@ Intended to be used in integration tests and in CLI tools for local installation
`/docs`:
Documentation of the Zenith features and concepts.
Documentaion of the Zenith features and concepts.
Now it is mostly dev documentation.
`/monitoring`:
@@ -42,13 +42,13 @@ Integration tests, written in Python using the `pytest` framework.
`/vendor/postgres`:
PostgreSQL source tree, with the modifications needed for Neon.
PostgreSQL source tree, with the modifications needed for Zenith.
`/vendor/postgres/contrib/neon`:
`/vendor/postgres/contrib/zenith`:
PostgreSQL extension that implements storage manager API and network communications with remote page server.
`/vendor/postgres/contrib/neon_test_utils`:
`/vendor/postgres/contrib/zenith_test_utils`:
PostgreSQL extension that contains functions needed for testing and debugging.
@@ -92,7 +92,7 @@ A single virtual environment with all dependencies is described in the single `P
### Prerequisites
- Install Python 3.9 (the minimal supported version) or greater.
- Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesn't work as expected.
- Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesnt work as expected.
- If you have some trouble with other version you can resolve it by installing Python 3.9 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
```bash
# In Ubuntu

View File

@@ -9,7 +9,6 @@
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_with = "1.12.0"
once_cell = "1.8.0"
utils = { path = "../utils" }
workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -1,209 +1,348 @@
//! A set of primitives to access a shared data/updates, propagated via etcd broker (not persistent).
//! Intended to connect services to each other, not to store their data.
use std::{
collections::{hash_map, HashMap},
fmt::Display,
str::FromStr,
};
/// All broker keys, that are used when dealing with etcd.
pub mod subscription_key;
/// All broker values, possible to use when dealing with etcd.
pub mod subscription_value;
use std::str::FromStr;
use serde::de::DeserializeOwned;
use subscription_key::SubscriptionKey;
use tokio::{sync::mpsc, task::JoinHandle};
use tracing::*;
use crate::subscription_key::SubscriptionFullKey;
use regex::{Captures, Regex};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
pub use etcd_client::*;
use tokio::{sync::mpsc, task::JoinHandle};
use tracing::*;
use utils::{
lsn::Lsn,
zid::{ZNodeId, ZTenantId, ZTenantTimelineId},
};
/// Default value to use for prefixing to all etcd keys with.
/// This way allows isolating safekeeper/pageserver groups in the same etcd cluster.
pub const DEFAULT_NEON_BROKER_ETCD_PREFIX: &str = "neon";
/// A way to control the data retrieval from a certain subscription.
pub struct BrokerSubscription<V> {
/// An unbounded channel to fetch the relevant etcd updates from.
pub value_updates: mpsc::UnboundedReceiver<BrokerUpdate<V>>,
key: SubscriptionKey,
/// A subscription task handle, to allow waiting on it for the task to complete.
/// Both the updates channel and the handle require `&mut`, so it's better to keep
/// both `pub` to allow using both in the same structures without borrow checker complaining.
pub watcher_handle: JoinHandle<Result<(), BrokerError>>,
watcher: Watcher,
#[derive(Debug, Deserialize, Serialize)]
struct SafekeeperTimeline {
safekeeper_id: ZNodeId,
info: SkTimelineInfo,
}
impl<V> BrokerSubscription<V> {
/// Cancels the subscription, stopping the data poller and waiting for it to shut down.
pub async fn cancel(mut self) -> Result<(), BrokerError> {
self.watcher.cancel().await.map_err(|e| {
BrokerError::EtcdClient(
e,
format!("Failed to cancel broker subscription, kind: {:?}", self.key),
)
})?;
match (&mut self.watcher_handle).await {
Ok(res) => res,
Err(e) => {
if e.is_cancelled() {
// don't error on the tasks that are cancelled already
Ok(())
} else {
Err(BrokerError::InternalError(format!(
"Panicked during broker subscription task, kind: {:?}, error: {e}",
self.key
)))
}
}
}
}
}
impl<V> Drop for BrokerSubscription<V> {
fn drop(&mut self) {
// we poll data from etcd into the channel in the same struct, so if the whole struct gets dropped,
// no more data is used by the receiver and it's safe to cancel and drop the whole etcd subscription task.
self.watcher_handle.abort();
}
}
/// An update from the etcd broker.
pub struct BrokerUpdate<V> {
/// Etcd generation version, the bigger the more actual the data is.
pub etcd_version: i64,
/// Etcd key for the corresponding value, parsed from the broker KV.
pub key: SubscriptionFullKey,
/// Current etcd value, parsed from the broker KV.
pub value: V,
/// Published data about safekeeper's timeline. Fields made optional for easy migrations.
#[serde_as]
#[derive(Debug, Deserialize, Serialize)]
pub struct SkTimelineInfo {
/// Term of the last entry.
pub last_log_term: Option<u64>,
/// LSN of the last record.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub flush_lsn: Option<Lsn>,
/// Up to which LSN safekeeper regards its WAL as committed.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub commit_lsn: Option<Lsn>,
/// LSN up to which safekeeper offloaded WAL to s3.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub s3_wal_lsn: Option<Lsn>,
/// LSN of last checkpoint uploaded by pageserver.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub remote_consistent_lsn: Option<Lsn>,
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub peer_horizon_lsn: Option<Lsn>,
#[serde(default)]
pub safekeeper_connection_string: Option<String>,
}
#[derive(Debug, thiserror::Error)]
pub enum BrokerError {
#[error("Etcd client error: {0}. Context: {1}")]
EtcdClient(etcd_client::Error, String),
#[error("Error during parsing etcd key: {0}")]
KeyNotParsed(String),
#[error("Error during parsing etcd data: {0}")]
ParsingError(String),
#[error("Internal error: {0}")]
InternalError(String),
}
/// A way to control the data retrieval from a certain subscription.
pub struct SkTimelineSubscription {
safekeeper_timeline_updates:
mpsc::UnboundedReceiver<HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>>>,
kind: SkTimelineSubscriptionKind,
watcher_handle: JoinHandle<Result<(), BrokerError>>,
watcher: Watcher,
}
impl SkTimelineSubscription {
/// Asynchronously polls for more data from the subscription, suspending the current future if there's no data sent yet.
pub async fn fetch_data(
&mut self,
) -> Option<HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>>> {
self.safekeeper_timeline_updates.recv().await
}
/// Cancels the subscription, stopping the data poller and waiting for it to shut down.
pub async fn cancel(mut self) -> Result<(), BrokerError> {
self.watcher.cancel().await.map_err(|e| {
BrokerError::EtcdClient(
e,
format!(
"Failed to cancel timeline subscription, kind: {:?}",
self.kind
),
)
})?;
self.watcher_handle.await.map_err(|e| {
BrokerError::InternalError(format!(
"Failed to join the timeline updates task, kind: {:?}, error: {e}",
self.kind
))
})?
}
}
/// The subscription kind to the timeline updates from safekeeper.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct SkTimelineSubscriptionKind {
broker_etcd_prefix: String,
kind: SubscriptionKind,
}
impl SkTimelineSubscriptionKind {
pub fn all(broker_etcd_prefix: String) -> Self {
Self {
broker_etcd_prefix,
kind: SubscriptionKind::All,
}
}
pub fn tenant(broker_etcd_prefix: String, tenant: ZTenantId) -> Self {
Self {
broker_etcd_prefix,
kind: SubscriptionKind::Tenant(tenant),
}
}
pub fn timeline(broker_etcd_prefix: String, timeline: ZTenantTimelineId) -> Self {
Self {
broker_etcd_prefix,
kind: SubscriptionKind::Timeline(timeline),
}
}
fn watch_regex(&self) -> Regex {
match self.kind {
SubscriptionKind::All => Regex::new(&format!(
r"^{}/([[:xdigit:]]+)/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
self.broker_etcd_prefix
))
.expect("wrong regex for 'everything' subscription"),
SubscriptionKind::Tenant(tenant_id) => Regex::new(&format!(
r"^{}/{tenant_id}/([[:xdigit:]]+)/safekeeper/([[:digit:]])$",
self.broker_etcd_prefix
))
.expect("wrong regex for 'tenant' subscription"),
SubscriptionKind::Timeline(ZTenantTimelineId {
tenant_id,
timeline_id,
}) => Regex::new(&format!(
r"^{}/{tenant_id}/{timeline_id}/safekeeper/([[:digit:]])$",
self.broker_etcd_prefix
))
.expect("wrong regex for 'timeline' subscription"),
}
}
/// Etcd key to use for watching a certain timeline updates from safekeepers.
pub fn watch_key(&self) -> String {
match self.kind {
SubscriptionKind::All => self.broker_etcd_prefix.to_string(),
SubscriptionKind::Tenant(tenant_id) => {
format!("{}/{tenant_id}/safekeeper", self.broker_etcd_prefix)
}
SubscriptionKind::Timeline(ZTenantTimelineId {
tenant_id,
timeline_id,
}) => format!(
"{}/{tenant_id}/{timeline_id}/safekeeper",
self.broker_etcd_prefix
),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum SubscriptionKind {
/// Get every timeline update.
All,
/// Get certain tenant timelines' updates.
Tenant(ZTenantId),
/// Get certain timeline updates.
Timeline(ZTenantTimelineId),
}
/// Creates a background task to poll etcd for timeline updates from safekeepers.
/// Stops and returns `Err` on any error during etcd communication.
/// Watches the key changes until either the watcher is cancelled via etcd or the subscription cancellation handle,
/// exiting normally in such cases.
/// Etcd values are parsed as json fukes into a type, specified in the generic patameter.
pub async fn subscribe_for_json_values<V>(
pub async fn subscribe_to_safekeeper_timeline_updates(
client: &mut Client,
key: SubscriptionKey,
) -> Result<BrokerSubscription<V>, BrokerError>
where
V: DeserializeOwned + Send + 'static,
{
subscribe_for_values(client, key, |_, value_str| {
match serde_json::from_str::<V>(value_str) {
Ok(value) => Some(value),
Err(e) => {
error!("Failed to parse value str '{value_str}': {e}");
None
}
}
})
.await
}
/// Same as [`subscribe_for_json_values`], but allows to specify a custom parser of a etcd value string.
pub async fn subscribe_for_values<P, V>(
client: &mut Client,
key: SubscriptionKey,
value_parser: P,
) -> Result<BrokerSubscription<V>, BrokerError>
where
V: Send + 'static,
P: Fn(SubscriptionFullKey, &str) -> Option<V> + Send + 'static,
{
info!("Subscribing to broker value updates, key: {key:?}");
let subscription_key = key.clone();
subscription: SkTimelineSubscriptionKind,
) -> Result<SkTimelineSubscription, BrokerError> {
info!("Subscribing to timeline updates, subscription kind: {subscription:?}");
let (watcher, mut stream) = client
.watch(key.watch_key(), Some(WatchOptions::new().with_prefix()))
.watch(
subscription.watch_key(),
Some(WatchOptions::new().with_prefix()),
)
.await
.map_err(|e| {
BrokerError::EtcdClient(
e,
format!("Failed to init the watch for subscription {key:?}"),
format!("Failed to init the watch for subscription {subscription:?}"),
)
})?;
let (value_updates_sender, value_updates_receiver) = mpsc::unbounded_channel();
let (timeline_updates_sender, safekeeper_timeline_updates) = mpsc::unbounded_channel();
let subscription_kind = subscription.kind;
let regex = subscription.watch_regex();
let watcher_handle = tokio::spawn(async move {
while let Some(resp) = stream.message().await.map_err(|e| BrokerError::InternalError(format!(
"Failed to get messages from the subscription stream, kind: {:?}, error: {e}", key.kind
"Failed to get messages from the subscription stream, kind: {subscription_kind:?}, error: {e}"
)))? {
if resp.canceled() {
info!("Watch for timeline updates subscription was canceled, exiting");
break;
}
let mut timeline_updates: HashMap<ZTenantTimelineId, HashMap<ZNodeId, SkTimelineInfo>> = HashMap::new();
// Keep track that the timeline data updates from etcd arrive in the right order.
// https://etcd.io/docs/v3.5/learning/api_guarantees/#isolation-level-and-consistency-of-replicas
// > etcd does not ensure linearizability for watch operations. Users are expected to verify the revision of watch responses to ensure correct ordering.
let mut timeline_etcd_versions: HashMap<ZTenantTimelineId, i64> = HashMap::new();
let events = resp.events();
debug!("Processing {} events", events.len());
for event in events {
if EventType::Put == event.event_type() {
if let Some(new_etcd_kv) = event.kv() {
match parse_etcd_kv(new_etcd_kv, &value_parser, &key.cluster_prefix) {
Ok(Some((key, value))) => if let Err(e) = value_updates_sender.send(BrokerUpdate {
etcd_version: new_etcd_kv.version(),
key,
value,
}) {
info!("Broker value updates for key {key:?} sender got dropped, exiting: {e}");
break;
},
Ok(None) => debug!("Ignoring key {key:?} : no value was returned by the parser"),
Err(BrokerError::KeyNotParsed(e)) => debug!("Unexpected key {key:?} for timeline update: {e}"),
Err(e) => error!("Failed to represent etcd KV {new_etcd_kv:?}: {e}"),
let new_kv_version = new_etcd_kv.version();
match parse_etcd_key_value(subscription_kind, &regex, new_etcd_kv) {
Ok(Some((zttid, timeline))) => {
match timeline_updates
.entry(zttid)
.or_default()
.entry(timeline.safekeeper_id)
{
hash_map::Entry::Occupied(mut o) => {
let old_etcd_kv_version = timeline_etcd_versions.get(&zttid).copied().unwrap_or(i64::MIN);
if old_etcd_kv_version < new_kv_version {
o.insert(timeline.info);
timeline_etcd_versions.insert(zttid,new_kv_version);
}
}
hash_map::Entry::Vacant(v) => {
v.insert(timeline.info);
timeline_etcd_versions.insert(zttid,new_kv_version);
}
}
}
Ok(None) => {}
Err(e) => error!("Failed to parse timeline update: {e}"),
};
}
}
}
if let Err(e) = timeline_updates_sender.send(timeline_updates) {
info!("Timeline updates sender got dropped, exiting: {e}");
break;
}
}
Ok(())
}.instrument(info_span!("etcd_broker")));
});
Ok(BrokerSubscription {
key: subscription_key,
value_updates: value_updates_receiver,
Ok(SkTimelineSubscription {
kind: subscription,
safekeeper_timeline_updates,
watcher_handle,
watcher,
})
}
fn parse_etcd_kv<P, V>(
fn parse_etcd_key_value(
subscription_kind: SubscriptionKind,
regex: &Regex,
kv: &KeyValue,
value_parser: &P,
cluster_prefix: &str,
) -> Result<Option<(SubscriptionFullKey, V)>, BrokerError>
where
P: Fn(SubscriptionFullKey, &str) -> Option<V>,
{
let key_str = kv.key_str().map_err(|e| {
BrokerError::EtcdClient(e, "Failed to extract key str out of etcd KV".to_string())
})?;
let value_str = kv.value_str().map_err(|e| {
BrokerError::EtcdClient(e, "Failed to extract value str out of etcd KV".to_string())
})?;
) -> Result<Option<(ZTenantTimelineId, SafekeeperTimeline)>, BrokerError> {
let caps = if let Some(caps) = regex.captures(kv.key_str().map_err(|e| {
BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as key str"))
})?) {
caps
} else {
return Ok(None);
};
if !key_str.starts_with(cluster_prefix) {
return Err(BrokerError::KeyNotParsed(format!(
"KV has unexpected key '{key_str}' that does not start with cluster prefix {cluster_prefix}"
)));
}
let (zttid, safekeeper_id) = match subscription_kind {
SubscriptionKind::All => (
ZTenantTimelineId::new(
parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?,
),
ZNodeId(parse_capture(&caps, 3).map_err(BrokerError::ParsingError)?),
),
SubscriptionKind::Tenant(tenant_id) => (
ZTenantTimelineId::new(
tenant_id,
parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?,
),
ZNodeId(parse_capture(&caps, 2).map_err(BrokerError::ParsingError)?),
),
SubscriptionKind::Timeline(zttid) => (
zttid,
ZNodeId(parse_capture(&caps, 1).map_err(BrokerError::ParsingError)?),
),
};
let key = SubscriptionFullKey::from_str(&key_str[cluster_prefix.len()..]).map_err(|e| {
BrokerError::KeyNotParsed(format!("Failed to parse KV key '{key_str}': {e}"))
let info_str = kv.value_str().map_err(|e| {
BrokerError::EtcdClient(e, format!("Failed to represent kv {kv:?} as value str"))
})?;
Ok(value_parser(key, value_str).map(|value| (key, value)))
Ok(Some((
zttid,
SafekeeperTimeline {
safekeeper_id,
info: serde_json::from_str(info_str).map_err(|e| {
BrokerError::ParsingError(format!(
"Failed to parse '{info_str}' as safekeeper timeline info: {e}"
))
})?,
},
)))
}
fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
where
T: FromStr,
<T as FromStr>::Err: Display,
{
let capture_match = caps
.get(index)
.ok_or_else(|| format!("Failed to get capture match at index {index}"))?
.as_str();
capture_match.parse().map_err(|e| {
format!(
"Failed to parse {} from {capture_match}: {e}",
std::any::type_name::<T>()
)
})
}

View File

@@ -1,310 +0,0 @@
//! Etcd broker keys, used in the project and shared between instances.
//! The keys are split into two categories:
//!
//! * [`SubscriptionFullKey`] full key format: `<cluster_prefix>/<tenant>/<timeline>/<node_kind>/<operation>/<node_id>`
//! Always returned from etcd in this form, always start with the user key provided.
//!
//! * [`SubscriptionKey`] user input key format: always partial, since it's unknown which `node_id`'s are available.
//! Full key always starts with the user input one, due to etcd subscription properties.
use std::{fmt::Display, str::FromStr};
use once_cell::sync::Lazy;
use regex::{Captures, Regex};
use utils::zid::{NodeId, ZTenantId, ZTenantTimelineId};
/// The subscription kind to the timeline updates from safekeeper.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct SubscriptionKey {
/// Generic cluster prefix, allowing to use the same etcd instance by multiple logic groups.
pub cluster_prefix: String,
/// The subscription kind.
pub kind: SubscriptionKind,
}
/// All currently possible key kinds of a etcd broker subscription.
/// Etcd works so, that every key that starts with the subbscription key given is considered matching and
/// returned as part of the subscrption.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SubscriptionKind {
/// Get every update in etcd.
All,
/// Get etcd updates for any timeiline of a certain tenant, affected by any operation from any node kind.
TenantTimelines(ZTenantId),
/// Get etcd updates for a certain timeline of a tenant, affected by any operation from any node kind.
Timeline(ZTenantTimelineId),
/// Get etcd timeline updates, specific to a certain node kind.
Node(ZTenantTimelineId, NodeKind),
/// Get etcd timeline updates for a certain operation on specific nodes.
Operation(ZTenantTimelineId, NodeKind, OperationKind),
}
/// All kinds of nodes, able to write into etcd.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NodeKind {
Safekeeper,
Pageserver,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum OperationKind {
Safekeeper(SkOperationKind),
}
/// Current operations, running inside the safekeeper node.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SkOperationKind {
TimelineInfo,
WalBackup,
}
static SUBSCRIPTION_FULL_KEY_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new("/([[:xdigit:]]+)/([[:xdigit:]]+)/([^/]+)/([^/]+)/([[:digit:]]+)$")
.expect("wrong subscription full etcd key regex")
});
/// Full key, received from etcd during any of the component's work.
/// No other etcd keys are considered during system's work.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct SubscriptionFullKey {
pub id: ZTenantTimelineId,
pub node_kind: NodeKind,
pub operation: OperationKind,
pub node_id: NodeId,
}
impl SubscriptionKey {
/// Subscribes for all etcd updates.
pub fn all(cluster_prefix: String) -> Self {
SubscriptionKey {
cluster_prefix,
kind: SubscriptionKind::All,
}
}
/// Subscribes to a given timeline info updates from safekeepers.
pub fn sk_timeline_info(cluster_prefix: String, timeline: ZTenantTimelineId) -> Self {
Self {
cluster_prefix,
kind: SubscriptionKind::Operation(
timeline,
NodeKind::Safekeeper,
OperationKind::Safekeeper(SkOperationKind::TimelineInfo),
),
}
}
/// Subscribes to all timeine updates during specific operations, running on the corresponding nodes.
pub fn operation(
cluster_prefix: String,
timeline: ZTenantTimelineId,
node_kind: NodeKind,
operation: OperationKind,
) -> Self {
Self {
cluster_prefix,
kind: SubscriptionKind::Operation(timeline, node_kind, operation),
}
}
/// Etcd key to use for watching a certain timeline updates from safekeepers.
pub fn watch_key(&self) -> String {
let cluster_prefix = &self.cluster_prefix;
match self.kind {
SubscriptionKind::All => cluster_prefix.to_string(),
SubscriptionKind::TenantTimelines(tenant_id) => {
format!("{cluster_prefix}/{tenant_id}")
}
SubscriptionKind::Timeline(id) => {
format!("{cluster_prefix}/{id}")
}
SubscriptionKind::Node(id, node_kind) => {
format!("{cluster_prefix}/{id}/{node_kind}")
}
SubscriptionKind::Operation(id, node_kind, operation_kind) => {
format!("{cluster_prefix}/{id}/{node_kind}/{operation_kind}")
}
}
}
}
impl Display for OperationKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
OperationKind::Safekeeper(o) => o.fmt(f),
}
}
}
impl FromStr for OperationKind {
type Err = String;
fn from_str(operation_kind_str: &str) -> Result<Self, Self::Err> {
match operation_kind_str {
"timeline_info" => Ok(OperationKind::Safekeeper(SkOperationKind::TimelineInfo)),
"wal_backup" => Ok(OperationKind::Safekeeper(SkOperationKind::WalBackup)),
_ => Err(format!("Unknown operation kind: {operation_kind_str}")),
}
}
}
impl Display for SubscriptionFullKey {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self {
id,
node_kind,
operation,
node_id,
} = self;
write!(f, "{id}/{node_kind}/{operation}/{node_id}")
}
}
impl FromStr for SubscriptionFullKey {
type Err = String;
fn from_str(subscription_kind_str: &str) -> Result<Self, Self::Err> {
let key_captures = match SUBSCRIPTION_FULL_KEY_REGEX.captures(subscription_kind_str) {
Some(captures) => captures,
None => {
return Err(format!(
"Subscription kind str does not match a subscription full key regex {}",
SUBSCRIPTION_FULL_KEY_REGEX.as_str()
));
}
};
Ok(Self {
id: ZTenantTimelineId::new(
parse_capture(&key_captures, 1)?,
parse_capture(&key_captures, 2)?,
),
node_kind: parse_capture(&key_captures, 3)?,
operation: parse_capture(&key_captures, 4)?,
node_id: NodeId(parse_capture(&key_captures, 5)?),
})
}
}
fn parse_capture<T>(caps: &Captures, index: usize) -> Result<T, String>
where
T: FromStr,
<T as FromStr>::Err: Display,
{
let capture_match = caps
.get(index)
.ok_or_else(|| format!("Failed to get capture match at index {index}"))?
.as_str();
capture_match.parse().map_err(|e| {
format!(
"Failed to parse {} from {capture_match}: {e}",
std::any::type_name::<T>()
)
})
}
impl Display for NodeKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Safekeeper => write!(f, "safekeeper"),
Self::Pageserver => write!(f, "pageserver"),
}
}
}
impl FromStr for NodeKind {
type Err = String;
fn from_str(node_kind_str: &str) -> Result<Self, Self::Err> {
match node_kind_str {
"safekeeper" => Ok(Self::Safekeeper),
"pageserver" => Ok(Self::Pageserver),
_ => Err(format!("Invalid node kind: {node_kind_str}")),
}
}
}
impl Display for SkOperationKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::TimelineInfo => write!(f, "timeline_info"),
Self::WalBackup => write!(f, "wal_backup"),
}
}
}
impl FromStr for SkOperationKind {
type Err = String;
fn from_str(operation_str: &str) -> Result<Self, Self::Err> {
match operation_str {
"timeline_info" => Ok(Self::TimelineInfo),
"wal_backup" => Ok(Self::WalBackup),
_ => Err(format!("Invalid operation: {operation_str}")),
}
}
}
#[cfg(test)]
mod tests {
use utils::zid::ZTimelineId;
use super::*;
#[test]
fn full_cluster_key_parsing() {
let prefix = "neon";
let node_kind = NodeKind::Safekeeper;
let operation_kind = OperationKind::Safekeeper(SkOperationKind::WalBackup);
let tenant_id = ZTenantId::generate();
let timeline_id = ZTimelineId::generate();
let id = ZTenantTimelineId::new(tenant_id, timeline_id);
let node_id = NodeId(1);
let timeline_subscription_keys = [
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::All,
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::TenantTimelines(tenant_id),
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::Timeline(id),
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::Node(id, node_kind),
},
SubscriptionKey {
cluster_prefix: prefix.to_string(),
kind: SubscriptionKind::Operation(id, node_kind, operation_kind),
},
];
let full_key_string = format!(
"{}/{node_id}",
timeline_subscription_keys.last().unwrap().watch_key()
);
for key in timeline_subscription_keys {
assert!(full_key_string.starts_with(&key.watch_key()), "Full key '{full_key_string}' should start with any of the keys, keys, but {key:?} did not match");
}
let full_key = SubscriptionFullKey::from_str(&full_key_string).unwrap_or_else(|e| {
panic!("Failed to parse {full_key_string} as a subscription full key: {e}")
});
assert_eq!(
full_key,
SubscriptionFullKey {
id,
node_kind,
operation: operation_kind,
node_id
}
)
}
}

View File

@@ -1,35 +0,0 @@
//! Module for the values to put into etcd.
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, DisplayFromStr};
use utils::lsn::Lsn;
/// Data about safekeeper's timeline. Fields made optional for easy migrations.
#[serde_as]
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SkTimelineInfo {
/// Term of the last entry.
pub last_log_term: Option<u64>,
/// LSN of the last record.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub flush_lsn: Option<Lsn>,
/// Up to which LSN safekeeper regards its WAL as committed.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub commit_lsn: Option<Lsn>,
/// LSN up to which safekeeper has backed WAL.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub backup_lsn: Option<Lsn>,
/// LSN of last checkpoint uploaded by pageserver.
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub remote_consistent_lsn: Option<Lsn>,
#[serde_as(as = "Option<DisplayFromStr>")]
#[serde(default)]
pub peer_horizon_lsn: Option<Lsn>,
/// A connection string to use for WAL receiving.
#[serde(default)]
pub safekeeper_connstr: Option<String>,
}

View File

@@ -3,7 +3,6 @@
//! Otherwise, we might not see all metrics registered via
//! a default registry.
use lazy_static::lazy_static;
pub use prometheus::{core, default_registry, proto};
pub use prometheus::{exponential_buckets, linear_buckets};
pub use prometheus::{register_gauge, Gauge};
pub use prometheus::{register_gauge_vec, GaugeVec};

View File

@@ -23,7 +23,7 @@ workspace_hack = { version = "0.1", path = "../../workspace_hack" }
[dev-dependencies]
env_logger = "0.9"
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
wal_craft = { path = "wal_craft" }
wal_generate = { path = "wal_generate" }
[build-dependencies]
bindgen = "0.59.1"

View File

@@ -2,7 +2,6 @@ extern crate bindgen;
use std::env;
use std::path::PathBuf;
use std::process::Command;
use bindgen::callbacks::ParseCallbacks;
@@ -46,43 +45,6 @@ fn main() {
// Tell cargo to invalidate the built crate whenever the wrapper changes
println!("cargo:rerun-if-changed=pg_control_ffi.h");
// Finding the location of C headers for the Postgres server:
// - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/tmp_install`
// - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/tmp_install/include/postgresql/server`
let mut pg_install_dir: PathBuf;
if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR") {
pg_install_dir = postgres_install_dir.into();
} else {
pg_install_dir = PathBuf::from("tmp_install")
}
if pg_install_dir.is_relative() {
let cwd = env::current_dir().unwrap();
pg_install_dir = cwd.join("..").join("..").join(pg_install_dir);
}
let pg_config_bin = pg_install_dir.join("bin").join("pg_config");
let inc_server_path: String = if pg_config_bin.exists() {
let output = Command::new(pg_config_bin)
.arg("--includedir-server")
.output()
.expect("failed to execute `pg_config --includedir-server`");
if !output.status.success() {
panic!("`pg_config --includedir-server` failed")
}
String::from_utf8(output.stdout).unwrap().trim_end().into()
} else {
pg_install_dir
.join("include")
.join("postgresql")
.join("server")
.into_os_string()
.into_string()
.unwrap()
};
// The bindgen::Builder is the main entry point
// to bindgen, and lets you build up options for
// the resulting bindings.
@@ -119,7 +81,15 @@ fn main() {
// explicit padding fields.
.explicit_padding(true)
//
.clang_arg(format!("-I{inc_server_path}"))
// Path the server include dir. It is in tmp_install/include/server, if you did
// "configure --prefix=<path to tmp_install>". But if you used "configure --prefix=/",
// and used DESTDIR to move it into tmp_install, then it's in
// tmp_install/include/postgres/server
// 'pg_config --includedir-server' would perhaps be the more proper way to find it,
// but this will do for now.
//
.clang_arg("-I../../tmp_install/include/server")
.clang_arg("-I../../tmp_install/include/postgresql/server")
//
// Finish the builder and generate the bindings.
//

View File

@@ -73,7 +73,7 @@ impl WalStreamDecoder {
/// Returns one of the following:
/// Ok((Lsn, Bytes)): a tuple containing the LSN of next record, and the record itself
/// Ok(None): there is not enough data in the input buffer. Feed more by calling the `feed_bytes` function
/// Err(WalDecodeError): an error occurred while decoding, meaning the input was invalid.
/// Err(WalDecodeError): an error occured while decoding, meaning the input was invalid.
///
pub fn poll_decode(&mut self) -> Result<Option<(Lsn, Bytes)>, WalDecodeError> {
let recordbuf;
@@ -82,17 +82,7 @@ impl WalStreamDecoder {
// that cross page boundaries.
loop {
// parse and verify page boundaries as we go
if self.padlen > 0 {
// We should first skip padding, as we may have to skip some page headers if we're processing the XLOG_SWITCH record.
if self.inputbuf.remaining() < self.padlen as usize {
return Ok(None);
}
// skip padding
self.inputbuf.advance(self.padlen as usize);
self.lsn += self.padlen as u64;
self.padlen = 0;
} else if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
// parse long header
if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
@@ -138,6 +128,15 @@ impl WalStreamDecoder {
self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
continue;
} else if self.padlen > 0 {
if self.inputbuf.remaining() < self.padlen as usize {
return Ok(None);
}
// skip padding
self.inputbuf.advance(self.padlen as usize);
self.lsn += self.padlen as u64;
self.padlen = 0;
} else if self.contlen == 0 {
assert!(self.recordbuf.is_empty());
@@ -227,10 +226,10 @@ impl WalStreamDecoder {
self.padlen = self.lsn.calc_padding(8u32) as u32;
}
// We should return LSN of the next record, not the last byte of this record or
// the byte immediately after. Note that this handles both XLOG_SWITCH and usual
// records, the former "spans" until the next WAL segment (see test_xlog_switch).
let result = (self.lsn + self.padlen as u64, recordbuf);
// Always align resulting LSN on 0x8 boundary -- that is important for getPage()
// and WalReceiver integration. Since this code is used both for WalReceiver and
// initial WAL import let's force alignment right here.
let result = (self.lsn.align(), recordbuf);
Ok(Some(result))
}
}

View File

@@ -531,7 +531,7 @@ impl CheckPoint {
///
/// Returns 'true' if the XID was updated.
pub fn update_next_xid(&mut self, xid: u32) -> bool {
// nextXid should nw greater than any XID in WAL, so increment provided XID and check for wraparround.
// nextXid should nw greate than any XID in WAL, so increment provided XID and check for wraparround.
let mut new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
// To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.
// XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
@@ -597,18 +597,19 @@ mod tests {
fn init_logging() {
let _ = env_logger::Builder::from_env(
env_logger::Env::default()
.default_filter_or("wal_craft=info,postgres_ffi::xlog_utils=trace"),
.default_filter_or("wal_generate=info,postgres_ffi::xlog_utils=trace"),
)
.is_test(true)
.try_init();
}
fn test_end_of_wal<C: wal_craft::Crafter>(
fn test_end_of_wal(
test_name: &str,
generate_wal: impl Fn(&mut postgres::Client) -> anyhow::Result<postgres::types::PgLsn>,
expected_end_of_wal_non_partial: Lsn,
last_segment: &str,
) {
use wal_craft::*;
use wal_generate::*;
// 1. Generate some WAL
let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("..")
@@ -621,9 +622,9 @@ mod tests {
fs::remove_dir_all(&cfg.datadir).unwrap();
}
cfg.initdb().unwrap();
let srv = cfg.start_server().unwrap();
let mut srv = cfg.start_server().unwrap();
let expected_wal_end: Lsn =
u64::from(C::craft(&mut srv.connect_with_timeout().unwrap()).unwrap()).into();
u64::from(generate_wal(&mut srv.connect_with_timeout().unwrap()).unwrap()).into();
srv.kill();
// 2. Pick WAL generated by initdb
@@ -680,8 +681,9 @@ mod tests {
#[test]
pub fn test_find_end_of_wal_simple() {
init_logging();
test_end_of_wal::<wal_craft::Simple>(
test_end_of_wal(
"test_find_end_of_wal_simple",
wal_generate::generate_simple,
"0/2000000".parse::<Lsn>().unwrap(),
"000000010000000000000001",
);
@@ -690,8 +692,9 @@ mod tests {
#[test]
pub fn test_find_end_of_wal_crossing_segment_followed_by_small_one() {
init_logging();
test_end_of_wal::<wal_craft::WalRecordCrossingSegmentFollowedBySmallOne>(
test_end_of_wal(
"test_find_end_of_wal_crossing_segment_followed_by_small_one",
wal_generate::generate_wal_record_crossing_segment_followed_by_small_one,
"0/3000000".parse::<Lsn>().unwrap(),
"000000010000000000000002",
);
@@ -701,8 +704,9 @@ mod tests {
#[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
pub fn test_find_end_of_wal_last_crossing_segment() {
init_logging();
test_end_of_wal::<wal_craft::LastWalRecordCrossingSegment>(
test_end_of_wal(
"test_find_end_of_wal_last_crossing_segment",
wal_generate::generate_last_wal_record_crossing_segment,
"0/3000000".parse::<Lsn>().unwrap(),
"000000010000000000000002",
);

View File

@@ -1,100 +0,0 @@
use anyhow::*;
use clap::{App, Arg, ArgMatches};
use std::str::FromStr;
use wal_craft::*;
fn main() -> Result<()> {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("wal_craft=info"))
.init();
let type_arg = &Arg::new("type")
.takes_value(true)
.help("Type of WAL to craft")
.possible_values([
Simple::NAME,
LastWalRecordXlogSwitch::NAME,
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
WalRecordCrossingSegmentFollowedBySmallOne::NAME,
LastWalRecordCrossingSegment::NAME,
])
.required(true);
let arg_matches = App::new("Postgres WAL crafter")
.about("Crafts Postgres databases with specific WAL properties")
.subcommand(
App::new("print-postgres-config")
.about("Print the configuration required for PostgreSQL server before running this script")
)
.subcommand(
App::new("with-initdb")
.about("Craft WAL in a new data directory first initialized with initdb")
.arg(type_arg)
.arg(
Arg::new("datadir")
.takes_value(true)
.help("Data directory for the Postgres server")
.required(true)
)
.arg(
Arg::new("pg-distrib-dir")
.long("pg-distrib-dir")
.takes_value(true)
.help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
.default_value("/usr/local")
)
)
.subcommand(
App::new("in-existing")
.about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
.arg(type_arg)
.arg(
Arg::new("connection")
.takes_value(true)
.help("Connection string to the Postgres database to populate")
.required(true)
)
)
.get_matches();
let wal_craft = |arg_matches: &ArgMatches, client| {
let lsn = match arg_matches.value_of("type").unwrap() {
Simple::NAME => Simple::craft(client)?,
LastWalRecordXlogSwitch::NAME => LastWalRecordXlogSwitch::craft(client)?,
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME => {
LastWalRecordXlogSwitchEndsOnPageBoundary::craft(client)?
}
WalRecordCrossingSegmentFollowedBySmallOne::NAME => {
WalRecordCrossingSegmentFollowedBySmallOne::craft(client)?
}
LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,
a => panic!("Unknown --type argument: {}", a),
};
println!("end_of_wal = {}", lsn);
Ok(())
};
match arg_matches.subcommand() {
None => panic!("No subcommand provided"),
Some(("print-postgres-config", _)) => {
for cfg in REQUIRED_POSTGRES_CONFIG.iter() {
println!("{}", cfg);
}
Ok(())
}
Some(("with-initdb", arg_matches)) => {
let cfg = Conf {
pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
datadir: arg_matches.value_of("datadir").unwrap().into(),
};
cfg.initdb()?;
let srv = cfg.start_server()?;
wal_craft(arg_matches, &mut srv.connect_with_timeout()?)?;
srv.kill();
Ok(())
}
Some(("in-existing", arg_matches)) => wal_craft(
arg_matches,
&mut postgres::Config::from_str(arg_matches.value_of("connection").unwrap())?
.connect(postgres::NoTls)?,
),
Some(_) => panic!("Unknown subcommand"),
}
}

View File

@@ -1,5 +1,5 @@
[package]
name = "wal_craft"
name = "wal_generate"
version = "0.1.0"
edition = "2021"
@@ -10,7 +10,5 @@ anyhow = "1.0"
clap = "3.0"
env_logger = "0.9"
log = "0.4"
once_cell = "1.8.0"
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
postgres_ffi = { path = "../" }
tempfile = "3.2"

View File

@@ -0,0 +1,58 @@
use anyhow::*;
use clap::{App, Arg};
use wal_generate::*;
fn main() -> Result<()> {
env_logger::Builder::from_env(
env_logger::Env::default().default_filter_or("wal_generate=info"),
)
.init();
let arg_matches = App::new("Postgres WAL generator")
.about("Generates Postgres databases with specific WAL properties")
.arg(
Arg::new("datadir")
.short('D')
.long("datadir")
.takes_value(true)
.help("Data directory for the Postgres server")
.required(true)
)
.arg(
Arg::new("pg-distrib-dir")
.long("pg-distrib-dir")
.takes_value(true)
.help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
.default_value("/usr/local")
)
.arg(
Arg::new("type")
.long("type")
.takes_value(true)
.help("Type of WAL to generate")
.possible_values(["simple", "last_wal_record_crossing_segment", "wal_record_crossing_segment_followed_by_small_one"])
.required(true)
)
.get_matches();
let cfg = Conf {
pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
datadir: arg_matches.value_of("datadir").unwrap().into(),
};
cfg.initdb()?;
let mut srv = cfg.start_server()?;
let lsn = match arg_matches.value_of("type").unwrap() {
"simple" => generate_simple(&mut srv.connect_with_timeout()?)?,
"last_wal_record_crossing_segment" => {
generate_last_wal_record_crossing_segment(&mut srv.connect_with_timeout()?)?
}
"wal_record_crossing_segment_followed_by_small_one" => {
generate_wal_record_crossing_segment_followed_by_small_one(
&mut srv.connect_with_timeout()?,
)?
}
a => panic!("Unknown --type argument: {}", a),
};
println!("end_of_wal = {}", lsn);
srv.kill();
Ok(())
}

View File

@@ -1,14 +1,9 @@
use anyhow::*;
use core::time::Duration;
use log::*;
use once_cell::sync::Lazy;
use postgres::types::PgLsn;
use postgres::Client;
use postgres_ffi::xlog_utils::{
XLOG_BLCKSZ, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
};
use std::cmp::Ordering;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::time::Instant;
@@ -26,16 +21,6 @@ pub struct PostgresServer {
client_config: postgres::Config,
}
pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
vec![
"wal_keep_size=50MB", // Ensure old WAL is not removed
"shared_preload_libraries=neon", // can only be loaded at startup
// Disable background processes as much as possible
"wal_writer_delay=10s",
"autovacuum=off",
]
});
impl Conf {
fn pg_bin_dir(&self) -> PathBuf {
self.pg_distrib_dir.join("bin")
@@ -84,12 +69,6 @@ impl Conf {
pub fn start_server(&self) -> Result<PostgresServer> {
info!("Starting Postgres server in {:?}", self.datadir);
let log_file = fs::File::create(self.datadir.join("pg.log")).with_context(|| {
format!(
"Failed to create pg.log file in directory {}",
self.datadir.display()
)
})?;
let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
let unix_socket_dir_path = unix_socket_dir.path().to_owned();
let server_process = self
@@ -99,9 +78,13 @@ impl Conf {
.arg(unix_socket_dir_path.as_os_str())
.arg("-D")
.arg(self.datadir.as_os_str())
.args(&["-c", "wal_keep_size=50MB"]) // Ensure old WAL is not removed
.args(&["-c", "logging_collector=on"]) // stderr will mess up with tests output
.args(REQUIRED_POSTGRES_CONFIG.iter().flat_map(|cfg| ["-c", cfg]))
.stderr(Stdio::from(log_file))
.args(&["-c", "shared_preload_libraries=zenith"]) // can only be loaded at startup
// Disable background processes as much as possible
.args(&["-c", "wal_writer_delay=10s"])
.args(&["-c", "autovacuum=off"])
.stderr(Stdio::null())
.spawn()?;
let server = PostgresServer {
process: server_process,
@@ -154,7 +137,7 @@ impl PostgresServer {
bail!("Connection timed out");
}
pub fn kill(mut self) {
pub fn kill(&mut self) {
self.process.kill().unwrap();
self.process.wait().unwrap();
}
@@ -191,15 +174,11 @@ pub trait PostgresClientExt: postgres::GenericClient {
impl<C: postgres::GenericClient> PostgresClientExt for C {}
pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result<()> {
client.execute("create extension if not exists neon_test_utils", &[])?;
let wal_keep_size: String = client.query_one("SHOW wal_keep_size", &[])?.get(0);
ensure!(wal_keep_size == "50MB");
let wal_writer_delay: String = client.query_one("SHOW wal_writer_delay", &[])?.get(0);
ensure!(wal_writer_delay == "10s");
let autovacuum: String = client.query_one("SHOW autovacuum", &[])?.get(0);
ensure!(autovacuum == "off");
fn generate_internal<C: postgres::GenericClient>(
client: &mut C,
f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
) -> Result<PgLsn> {
client.execute("create extension if not exists zenith_test_utils", &[])?;
let wal_segment_size = client.query_one(
"select cast(setting as bigint) as setting, unit \
@@ -215,29 +194,13 @@ pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result
"Unexpected wal_segment_size in bytes"
);
Ok(())
}
pub trait Crafter {
const NAME: &'static str;
/// Generates WAL using the client `client`. Returns the expected end-of-wal LSN.
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn>;
}
fn craft_internal<C: postgres::GenericClient>(
client: &mut C,
f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
) -> Result<PgLsn> {
ensure_server_config(client)?;
let initial_lsn = client.pg_current_wal_insert_lsn()?;
info!("LSN initial = {}", initial_lsn);
let last_lsn = match f(client, initial_lsn)? {
None => client.pg_current_wal_insert_lsn()?,
Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
Ordering::Less => bail!("Some records were inserted after the crafted WAL"),
Ordering::Less => bail!("Some records were inserted after the generated WAL"),
Ordering::Equal => last_lsn,
Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
},
@@ -246,116 +209,25 @@ fn craft_internal<C: postgres::GenericClient>(
// Some records may be not flushed, e.g. non-transactional logical messages.
client.execute("select neon_xlogflush(pg_current_wal_insert_lsn())", &[])?;
match last_lsn.cmp(&client.pg_current_wal_flush_lsn()?) {
Ordering::Less => bail!("Some records were flushed after the crafted WAL"),
Ordering::Less => bail!("Some records were flushed after the generated WAL"),
Ordering::Equal => {}
Ordering::Greater => bail!("Reported LSN is greater than flush_lsn"),
}
Ok(last_lsn)
}
pub struct Simple;
impl Crafter for Simple {
const NAME: &'static str = "simple";
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
craft_internal(client, |client, _| {
client.execute("CREATE table t(x int)", &[])?;
Ok(None)
})
}
}
pub struct LastWalRecordXlogSwitch;
impl Crafter for LastWalRecordXlogSwitch {
const NAME: &'static str = "last_wal_record_xlog_switch";
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
// Do not use generate_internal because here we end up with flush_lsn exactly on
// the segment boundary and insert_lsn after the initial page header, which is unusual.
ensure_server_config(client)?;
pub fn generate_simple(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
generate_internal(client, |client, _| {
client.execute("CREATE table t(x int)", &[])?;
let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
let next_segment = PgLsn::from(0x0200_0000);
ensure!(
after_xlog_switch <= next_segment,
"XLOG_SWITCH message ended after the expected segment boundary: {} > {}",
after_xlog_switch,
next_segment
);
Ok(next_segment)
}
Ok(None)
})
}
pub struct LastWalRecordXlogSwitchEndsOnPageBoundary;
impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
const NAME: &'static str = "last_wal_record_xlog_switch_ends_on_page_boundary";
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
// Do not use generate_internal because here we end up with flush_lsn exactly on
// the segment boundary and insert_lsn after the initial page header, which is unusual.
ensure_server_config(client)?;
client.execute("CREATE table t(x int)", &[])?;
// Add padding so the XLOG_SWITCH record ends exactly on XLOG_BLCKSZ boundary.
// We will use logical message as the padding. We start with detecting how much WAL
// it takes for one logical message, considering all alignments and headers.
let base_wal_advance = {
let before_lsn = client.pg_current_wal_insert_lsn()?;
// Small non-empty message bigger than few bytes is more likely than an empty
// message to have the same format as the big padding message.
client.execute(
"SELECT pg_logical_emit_message(false, 'swch', REPEAT('a', 10))",
&[],
)?;
// The XLOG_SWITCH record has no data => its size is exactly XLOG_SIZE_OF_XLOG_RECORD.
(u64::from(client.pg_current_wal_insert_lsn()?) - u64::from(before_lsn)) as usize
+ XLOG_SIZE_OF_XLOG_RECORD
};
let mut remaining_lsn =
XLOG_BLCKSZ - u64::from(client.pg_current_wal_insert_lsn()?) as usize % XLOG_BLCKSZ;
if remaining_lsn < base_wal_advance {
remaining_lsn += XLOG_BLCKSZ;
}
let repeats = 10 + remaining_lsn - base_wal_advance;
info!(
"current_wal_insert_lsn={}, remaining_lsn={}, base_wal_advance={}, repeats={}",
client.pg_current_wal_insert_lsn()?,
remaining_lsn,
base_wal_advance,
repeats
);
client.execute(
"SELECT pg_logical_emit_message(false, 'swch', REPEAT('a', $1))",
&[&(repeats as i32)],
)?;
info!(
"current_wal_insert_lsn={}, XLOG_SIZE_OF_XLOG_RECORD={}",
client.pg_current_wal_insert_lsn()?,
XLOG_SIZE_OF_XLOG_RECORD
);
// Emit the XLOG_SWITCH
let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
let next_segment = PgLsn::from(0x0200_0000);
ensure!(
after_xlog_switch < next_segment,
"XLOG_SWITCH message ended on or after the expected segment boundary: {} > {}",
after_xlog_switch,
next_segment
);
ensure!(
u64::from(after_xlog_switch) as usize % XLOG_BLCKSZ == XLOG_SIZE_OF_XLOG_SHORT_PHD,
"XLOG_SWITCH message ended not on page boundary: {}",
after_xlog_switch
);
Ok(next_segment)
}
}
fn craft_single_logical_message(
fn generate_single_logical_message(
client: &mut impl postgres::GenericClient,
transactional: bool,
) -> Result<PgLsn> {
craft_internal(client, |client, initial_lsn| {
generate_internal(client, |client, initial_lsn| {
ensure!(
initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
"Initial LSN is too far in the future"
@@ -393,18 +265,14 @@ fn craft_single_logical_message(
})
}
pub struct WalRecordCrossingSegmentFollowedBySmallOne;
impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
const NAME: &'static str = "wal_record_crossing_segment_followed_by_small_one";
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
craft_single_logical_message(client, true)
}
pub fn generate_wal_record_crossing_segment_followed_by_small_one(
client: &mut impl postgres::GenericClient,
) -> Result<PgLsn> {
generate_single_logical_message(client, true)
}
pub struct LastWalRecordCrossingSegment;
impl Crafter for LastWalRecordCrossingSegment {
const NAME: &'static str = "last_wal_record_crossing_segment";
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
craft_single_logical_message(client, false)
}
pub fn generate_last_wal_record_crossing_segment<C: postgres::GenericClient>(
client: &mut C,
) -> Result<PgLsn> {
generate_single_logical_message(client, false)
}

View File

@@ -5,17 +5,14 @@ edition = "2021"
[dependencies]
anyhow = { version = "1.0", features = ["backtrace"] }
async-trait = "0.1"
metrics = { version = "0.1", path = "../metrics" }
once_cell = "1.8.0"
tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
tokio-util = { version = "0.7", features = ["io"] }
tracing = "0.1.27"
rusoto_core = "0.48"
rusoto_s3 = "0.48"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
tokio-util = { version = "0.7", features = ["io"] }
toml_edit = { version = "0.13", features = ["easy"] }
tracing = "0.1.27"
async-trait = "0.1"
workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -12,16 +12,12 @@ use std::{
borrow::Cow,
collections::HashMap,
ffi::OsStr,
fmt::Debug,
num::{NonZeroU32, NonZeroUsize},
path::{Path, PathBuf},
pin::Pin,
};
use anyhow::{bail, Context};
use anyhow::Context;
use tokio::io;
use toml_edit::Item;
use tracing::info;
pub use self::{
@@ -42,19 +38,13 @@ pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
pub trait RemoteObjectName {
// Needed to retrieve last component for RemoteObjectId.
// In other words a file name
fn object_name(&self) -> Option<&str>;
}
/// Storage (potentially remote) API to manage its state.
/// This storage tries to be unaware of any layered repository context,
/// providing basic CRUD operations for storage files.
#[async_trait::async_trait]
pub trait RemoteStorage: Send + Sync {
/// A way to uniquely reference a file in the remote storage.
type RemoteObjectId: RemoteObjectName;
type RemoteObjectId;
/// Attempts to derive the storage path out of the local path, if the latter is correct.
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId>;
@@ -65,12 +55,6 @@ pub trait RemoteStorage: Send + Sync {
/// Lists all items the storage has right now.
async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;
/// Lists all top level subdirectories for a given prefix
async fn list_prefixes(
&self,
prefix: Option<Self::RemoteObjectId>,
) -> anyhow::Result<Vec<Self::RemoteObjectId>>;
/// Streams the local file contents into remote into the remote storage entry.
async fn upload(
&self,
@@ -84,7 +68,11 @@ pub trait RemoteStorage: Send + Sync {
/// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
/// Returns the metadata, if any was stored with the file previously.
async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError>;
async fn download(
&self,
from: &Self::RemoteObjectId,
to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
) -> anyhow::Result<Option<StorageMetadata>>;
/// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
/// Returns the metadata, if any was stored with the file previously.
@@ -93,49 +81,12 @@ pub trait RemoteStorage: Send + Sync {
from: &Self::RemoteObjectId,
start_inclusive: u64,
end_exclusive: Option<u64>,
) -> Result<Download, DownloadError>;
to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
) -> anyhow::Result<Option<StorageMetadata>>;
async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()>;
}
pub struct Download {
pub download_stream: Pin<Box<dyn io::AsyncRead + Unpin + Send>>,
/// Extra key-value data, associated with the current remote file.
pub metadata: Option<StorageMetadata>,
}
impl Debug for Download {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Download")
.field("metadata", &self.metadata)
.finish()
}
}
#[derive(Debug)]
pub enum DownloadError {
/// Validation or other error happened due to user input.
BadInput(anyhow::Error),
/// The file was not found in the remote storage.
NotFound,
/// The file was found in the remote storage, but the download failed.
Other(anyhow::Error),
}
impl std::fmt::Display for DownloadError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DownloadError::BadInput(e) => {
write!(f, "Failed to download a remote file due to user input: {e}")
}
DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e}"),
}
}
}
impl std::error::Error for DownloadError {}
/// Every storage, currently supported.
/// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
pub enum GenericRemoteStorage {
@@ -227,7 +178,7 @@ pub struct S3Config {
pub concurrency_limit: NonZeroUsize,
}
impl Debug for S3Config {
impl std::fmt::Debug for S3Config {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("S3Config")
.field("bucket_name", &self.bucket_name)
@@ -252,90 +203,6 @@ pub fn path_with_suffix_extension(original_path: impl AsRef<Path>, suffix: &str)
.with_extension(new_extension.as_ref())
}
impl RemoteStorageConfig {
pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
let local_path = toml.get("local_path");
let bucket_name = toml.get("bucket_name");
let bucket_region = toml.get("bucket_region");
let max_concurrent_syncs = NonZeroUsize::new(
parse_optional_integer("max_concurrent_syncs", toml)?
.unwrap_or(DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS),
)
.context("Failed to parse 'max_concurrent_syncs' as a positive integer")?;
let max_sync_errors = NonZeroU32::new(
parse_optional_integer("max_sync_errors", toml)?
.unwrap_or(DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS),
)
.context("Failed to parse 'max_sync_errors' as a positive integer")?;
let concurrency_limit = NonZeroUsize::new(
parse_optional_integer("concurrency_limit", toml)?
.unwrap_or(DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT),
)
.context("Failed to parse 'concurrency_limit' as a positive integer")?;
let storage = match (local_path, bucket_name, bucket_region) {
(None, None, None) => bail!("no 'local_path' nor 'bucket_name' option"),
(_, Some(_), None) => {
bail!("'bucket_region' option is mandatory if 'bucket_name' is given ")
}
(_, None, Some(_)) => {
bail!("'bucket_name' option is mandatory if 'bucket_region' is given ")
}
(None, Some(bucket_name), Some(bucket_region)) => RemoteStorageKind::AwsS3(S3Config {
bucket_name: parse_toml_string("bucket_name", bucket_name)?,
bucket_region: parse_toml_string("bucket_region", bucket_region)?,
prefix_in_bucket: toml
.get("prefix_in_bucket")
.map(|prefix_in_bucket| parse_toml_string("prefix_in_bucket", prefix_in_bucket))
.transpose()?,
endpoint: toml
.get("endpoint")
.map(|endpoint| parse_toml_string("endpoint", endpoint))
.transpose()?,
concurrency_limit,
}),
(Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
parse_toml_string("local_path", local_path)?,
)),
(Some(_), Some(_), _) => bail!("local_path and bucket_name are mutually exclusive"),
};
Ok(RemoteStorageConfig {
max_concurrent_syncs,
max_sync_errors,
storage,
})
}
}
// Helper functions to parse a toml Item
fn parse_optional_integer<I, E>(name: &str, item: &toml_edit::Item) -> anyhow::Result<Option<I>>
where
I: TryFrom<i64, Error = E>,
E: std::error::Error + Send + Sync + 'static,
{
let toml_integer = match item.get(name) {
Some(item) => item
.as_integer()
.with_context(|| format!("configure option {name} is not an integer"))?,
None => return Ok(None),
};
I::try_from(toml_integer)
.map(Some)
.with_context(|| format!("configure option {name} is too large"))
}
fn parse_toml_string(name: &str, item: &Item) -> anyhow::Result<String> {
let s = item
.as_str()
.with_context(|| format!("configure option {name} is not a string"))?;
Ok(s.to_string())
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -5,7 +5,6 @@
//! volume is mounted to the local FS.
use std::{
borrow::Cow,
future::Future,
path::{Path, PathBuf},
pin::Pin,
@@ -18,16 +17,10 @@ use tokio::{
};
use tracing::*;
use crate::{path_with_suffix_extension, Download, DownloadError, RemoteObjectName};
use crate::path_with_suffix_extension;
use super::{strip_path_prefix, RemoteStorage, StorageMetadata};
impl RemoteObjectName for PathBuf {
fn object_name(&self) -> Option<&str> {
self.file_stem().and_then(|n| n.to_str())
}
}
pub struct LocalFs {
working_directory: PathBuf,
storage_root: PathBuf,
@@ -108,18 +101,7 @@ impl RemoteStorage for LocalFs {
}
async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
get_all_files(&self.storage_root, true).await
}
async fn list_prefixes(
&self,
prefix: Option<Self::RemoteObjectId>,
) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
let path = match prefix {
Some(prefix) => Cow::Owned(self.storage_root.join(prefix)),
None => Cow::Borrowed(&self.storage_root),
};
get_all_files(path.as_ref(), false).await
get_all_files(&self.storage_root).await
}
async fn upload(
@@ -210,56 +192,14 @@ impl RemoteStorage for LocalFs {
Ok(())
}
async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError> {
let file_path = self
.resolve_in_storage(from)
.map_err(DownloadError::BadInput)?;
if file_exists(&file_path).map_err(DownloadError::BadInput)? {
let source = io::BufReader::new(
fs::OpenOptions::new()
.read(true)
.open(&file_path)
.await
.with_context(|| {
format!(
"Failed to open source file '{}' to use in the download",
file_path.display()
)
})
.map_err(DownloadError::Other)?,
);
let metadata = self
.read_storage_metadata(&file_path)
.await
.map_err(DownloadError::Other)?;
Ok(Download {
metadata,
download_stream: Box::pin(source),
})
} else {
Err(DownloadError::NotFound)
}
}
async fn download_byte_range(
async fn download(
&self,
from: &Self::RemoteObjectId,
start_inclusive: u64,
end_exclusive: Option<u64>,
) -> Result<Download, DownloadError> {
if let Some(end_exclusive) = end_exclusive {
if end_exclusive <= start_inclusive {
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) is not less than end_exclusive ({end_exclusive:?})")));
};
if start_inclusive == end_exclusive.saturating_sub(1) {
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
}
}
let file_path = self
.resolve_in_storage(from)
.map_err(DownloadError::BadInput)?;
if file_exists(&file_path).map_err(DownloadError::BadInput)? {
to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
) -> anyhow::Result<Option<StorageMetadata>> {
let file_path = self.resolve_in_storage(from)?;
if file_path.exists() && file_path.is_file() {
let mut source = io::BufReader::new(
fs::OpenOptions::new()
.read(true)
@@ -270,31 +210,81 @@ impl RemoteStorage for LocalFs {
"Failed to open source file '{}' to use in the download",
file_path.display()
)
})
.map_err(DownloadError::Other)?,
})?,
);
io::copy(&mut source, to).await.with_context(|| {
format!(
"Failed to download file '{}' from the local storage",
file_path.display()
)
})?;
source.flush().await?;
self.read_storage_metadata(&file_path).await
} else {
bail!(
"File '{}' either does not exist or is not a file",
file_path.display()
)
}
}
async fn download_byte_range(
&self,
from: &Self::RemoteObjectId,
start_inclusive: u64,
end_exclusive: Option<u64>,
to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
) -> anyhow::Result<Option<StorageMetadata>> {
if let Some(end_exclusive) = end_exclusive {
ensure!(
end_exclusive > start_inclusive,
"Invalid range, start ({}) is bigger then end ({:?})",
start_inclusive,
end_exclusive
);
if start_inclusive == end_exclusive.saturating_sub(1) {
return Ok(None);
}
}
let file_path = self.resolve_in_storage(from)?;
if file_path.exists() && file_path.is_file() {
let mut source = io::BufReader::new(
fs::OpenOptions::new()
.read(true)
.open(&file_path)
.await
.with_context(|| {
format!(
"Failed to open source file '{}' to use in the download",
file_path.display()
)
})?,
);
source
.seek(io::SeekFrom::Start(start_inclusive))
.await
.context("Failed to seek to the range start in a local storage file")
.map_err(DownloadError::Other)?;
let metadata = self
.read_storage_metadata(&file_path)
.await
.map_err(DownloadError::Other)?;
.context("Failed to seek to the range start in a local storage file")?;
match end_exclusive {
Some(end_exclusive) => {
io::copy(&mut source.take(end_exclusive - start_inclusive), to).await
}
None => io::copy(&mut source, to).await,
}
.with_context(|| {
format!(
"Failed to download file '{}' range from the local storage",
file_path.display()
)
})?;
Ok(match end_exclusive {
Some(end_exclusive) => Download {
metadata,
download_stream: Box::pin(source.take(end_exclusive - start_inclusive)),
},
None => Download {
metadata,
download_stream: Box::pin(source),
},
})
self.read_storage_metadata(&file_path).await
} else {
Err(DownloadError::NotFound)
bail!(
"File '{}' either does not exist or is not a file",
file_path.display()
)
}
}
@@ -317,7 +307,6 @@ fn storage_metadata_path(original_path: &Path) -> PathBuf {
fn get_all_files<'a, P>(
directory_path: P,
recursive: bool,
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
where
P: AsRef<Path> + Send + Sync + 'a,
@@ -334,11 +323,7 @@ where
if file_type.is_symlink() {
debug!("{:?} us a symlink, skipping", entry_path)
} else if file_type.is_dir() {
if recursive {
paths.extend(get_all_files(entry_path, true).await?.into_iter())
} else {
paths.push(dir_entry.path())
}
paths.extend(get_all_files(entry_path).await?.into_iter())
} else {
paths.push(dir_entry.path());
}
@@ -367,19 +352,6 @@ async fn create_target_directory(target_file_path: &Path) -> anyhow::Result<()>
Ok(())
}
fn file_exists(file_path: &Path) -> anyhow::Result<bool> {
if file_path.exists() {
ensure!(
file_path.is_file(),
"file path '{}' is not a file",
file_path.display()
);
Ok(true)
} else {
Ok(false)
}
}
#[cfg(test)]
mod pure_tests {
use tempfile::tempdir;
@@ -546,31 +518,6 @@ mod fs_tests {
use std::{collections::HashMap, io::Write};
use tempfile::tempdir;
async fn read_and_assert_remote_file_contents(
storage: &LocalFs,
#[allow(clippy::ptr_arg)]
// have to use &PathBuf due to `storage.local_path` parameter requirements
remote_storage_path: &PathBuf,
expected_metadata: Option<&StorageMetadata>,
) -> anyhow::Result<String> {
let mut download = storage
.download(remote_storage_path)
.await
.map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
ensure!(
download.metadata.as_ref() == expected_metadata,
"Unexpected metadata returned for the downloaded file"
);
let mut contents = String::new();
download
.download_stream
.read_to_string(&mut contents)
.await
.context("Failed to read remote file contents into string")?;
Ok(contents)
}
#[tokio::test]
async fn upload_file() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();
@@ -621,7 +568,15 @@ mod fs_tests {
let upload_name = "upload_1";
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
let contents = read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
let mut content_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
let metadata = storage.download(&upload_target, &mut content_bytes).await?;
assert!(
metadata.is_none(),
"No metadata should be returned for no metadata upload"
);
content_bytes.flush().await?;
let contents = String::from_utf8(content_bytes.into_inner().into_inner())?;
assert_eq!(
dummy_contents(upload_name),
contents,
@@ -629,9 +584,13 @@ mod fs_tests {
);
let non_existing_path = PathBuf::from("somewhere").join("else");
match storage.download(&non_existing_path).await {
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
match storage.download(&non_existing_path, &mut io::sink()).await {
Ok(_) => panic!("Should not allow downloading non-existing storage files"),
Err(e) => {
let error_string = e.to_string();
assert!(error_string.contains("does not exist"));
assert!(error_string.contains(&non_existing_path.display().to_string()));
}
}
Ok(())
}
@@ -644,31 +603,58 @@ mod fs_tests {
let upload_name = "upload_1";
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
let full_range_download_contents =
read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
let mut full_range_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
let metadata = storage
.download_byte_range(&upload_target, 0, None, &mut full_range_bytes)
.await?;
assert!(
metadata.is_none(),
"No metadata should be returned for no metadata upload"
);
full_range_bytes.flush().await?;
assert_eq!(
dummy_contents(upload_name),
full_range_download_contents,
String::from_utf8(full_range_bytes.into_inner().into_inner())?,
"Download full range should return the whole upload"
);
let mut zero_range_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
let same_byte = 1_000_000_000;
let metadata = storage
.download_byte_range(
&upload_target,
same_byte,
Some(same_byte + 1), // exclusive end
&mut zero_range_bytes,
)
.await?;
assert!(
metadata.is_none(),
"No metadata should be returned for no metadata upload"
);
zero_range_bytes.flush().await?;
assert!(
zero_range_bytes.into_inner().into_inner().is_empty(),
"Zero byte range should not download any part of the file"
);
let uploaded_bytes = dummy_contents(upload_name).into_bytes();
let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);
let mut first_part_download = storage
.download_byte_range(&upload_target, 0, Some(first_part_local.len() as u64))
let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
let metadata = storage
.download_byte_range(
&upload_target,
0,
Some(first_part_local.len() as u64),
&mut first_part_remote,
)
.await?;
assert!(
first_part_download.metadata.is_none(),
metadata.is_none(),
"No metadata should be returned for no metadata upload"
);
let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
io::copy(
&mut first_part_download.download_stream,
&mut first_part_remote,
)
.await?;
first_part_remote.flush().await?;
let first_part_remote = first_part_remote.into_inner().into_inner();
assert_eq!(
@@ -677,24 +663,20 @@ mod fs_tests {
"First part bytes should be returned when requested"
);
let mut second_part_download = storage
let mut second_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
let metadata = storage
.download_byte_range(
&upload_target,
first_part_local.len() as u64,
Some((first_part_local.len() + second_part_local.len()) as u64),
&mut second_part_remote,
)
.await?;
assert!(
second_part_download.metadata.is_none(),
metadata.is_none(),
"No metadata should be returned for no metadata upload"
);
let mut second_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
io::copy(
&mut second_part_download.download_stream,
&mut second_part_remote,
)
.await?;
second_part_remote.flush().await?;
let second_part_remote = second_part_remote.into_inner().into_inner();
assert_eq!(
@@ -714,30 +696,11 @@ mod fs_tests {
let upload_name = "upload_1";
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
let start = 1_000_000_000;
let end = start + 1;
match storage
.download_byte_range(
&upload_target,
start,
Some(end), // exclusive end
)
.await
{
Ok(_) => panic!("Should not allow downloading wrong ranges"),
Err(e) => {
let error_string = e.to_string();
assert!(error_string.contains("zero bytes"));
assert!(error_string.contains(&start.to_string()));
assert!(error_string.contains(&end.to_string()));
}
}
let start = 10000;
let end = 234;
assert!(start > end, "Should test an incorrect range");
match storage
.download_byte_range(&upload_target, start, Some(end))
.download_byte_range(&upload_target, start, Some(end), &mut io::sink())
.await
{
Ok(_) => panic!("Should not allow downloading wrong ranges"),
@@ -749,6 +712,18 @@ mod fs_tests {
}
}
let non_existing_path = PathBuf::from("somewhere").join("else");
match storage
.download_byte_range(&non_existing_path, 1, Some(3), &mut io::sink())
.await
{
Ok(_) => panic!("Should not allow downloading non-existing storage file ranges"),
Err(e) => {
let error_string = e.to_string();
assert!(error_string.contains("does not exist"));
assert!(error_string.contains(&non_existing_path.display().to_string()));
}
}
Ok(())
}
@@ -787,26 +762,35 @@ mod fs_tests {
let upload_target =
upload_dummy_file(&workdir, &storage, upload_name, Some(metadata.clone())).await?;
let full_range_download_contents =
read_and_assert_remote_file_contents(&storage, &upload_target, Some(&metadata)).await?;
let mut content_bytes = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
let full_download_metadata = storage.download(&upload_target, &mut content_bytes).await?;
content_bytes.flush().await?;
let contents = String::from_utf8(content_bytes.into_inner().into_inner())?;
assert_eq!(
dummy_contents(upload_name),
full_range_download_contents,
contents,
"We should upload and download the same contents"
);
assert_eq!(
full_download_metadata.as_ref(),
Some(&metadata),
"We should get the same metadata back for full download"
);
let uploaded_bytes = dummy_contents(upload_name).into_bytes();
let (first_part_local, _) = uploaded_bytes.split_at(3);
let mut partial_download_with_metadata = storage
.download_byte_range(&upload_target, 0, Some(first_part_local.len() as u64))
.await?;
let mut first_part_remote = io::BufWriter::new(std::io::Cursor::new(Vec::new()));
io::copy(
&mut partial_download_with_metadata.download_stream,
&mut first_part_remote,
)
.await?;
let partial_download_metadata = storage
.download_byte_range(
&upload_target,
0,
Some(first_part_local.len() as u64),
&mut first_part_remote,
)
.await?;
first_part_remote.flush().await?;
let first_part_remote = first_part_remote.into_inner().into_inner();
assert_eq!(
@@ -816,8 +800,8 @@ mod fs_tests {
);
assert_eq!(
partial_download_with_metadata.metadata,
Some(metadata),
partial_download_metadata.as_ref(),
Some(&metadata),
"We should get the same metadata back for partial download"
);
@@ -859,7 +843,7 @@ mod fs_tests {
}
fn dummy_contents(name: &str) -> String {
format!("contents for {name}")
format!("contents for {}", name)
}
async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<PathBuf>> {

View File

@@ -9,87 +9,20 @@ use std::path::{Path, PathBuf};
use anyhow::Context;
use rusoto_core::{
credential::{InstanceMetadataProvider, StaticProvider},
HttpClient, Region, RusotoError,
HttpClient, Region,
};
use rusoto_s3::{
DeleteObjectRequest, GetObjectError, GetObjectRequest, ListObjectsV2Request, PutObjectRequest,
S3Client, StreamingBody, S3,
DeleteObjectRequest, GetObjectRequest, ListObjectsV2Request, PutObjectRequest, S3Client,
StreamingBody, S3,
};
use tokio::{io, sync::Semaphore};
use tokio_util::io::ReaderStream;
use tracing::debug;
use crate::{
strip_path_prefix, Download, DownloadError, RemoteObjectName, RemoteStorage, S3Config,
};
use crate::{strip_path_prefix, RemoteStorage, S3Config};
use super::StorageMetadata;
pub(super) mod metrics {
use metrics::{register_int_counter_vec, IntCounterVec};
use once_cell::sync::Lazy;
static S3_REQUESTS_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"remote_storage_s3_requests_count",
"Number of s3 requests of particular type",
&["request_type"],
)
.expect("failed to define a metric")
});
static S3_REQUESTS_FAIL_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"remote_storage_s3_failures_count",
"Number of failed s3 requests of particular type",
&["request_type"],
)
.expect("failed to define a metric")
});
pub fn inc_get_object() {
S3_REQUESTS_COUNT.with_label_values(&["get_object"]).inc();
}
pub fn inc_get_object_fail() {
S3_REQUESTS_FAIL_COUNT
.with_label_values(&["get_object"])
.inc();
}
pub fn inc_put_object() {
S3_REQUESTS_COUNT.with_label_values(&["put_object"]).inc();
}
pub fn inc_put_object_fail() {
S3_REQUESTS_FAIL_COUNT
.with_label_values(&["put_object"])
.inc();
}
pub fn inc_delete_object() {
S3_REQUESTS_COUNT
.with_label_values(&["delete_object"])
.inc();
}
pub fn inc_delete_object_fail() {
S3_REQUESTS_FAIL_COUNT
.with_label_values(&["delete_object"])
.inc();
}
pub fn inc_list_objects() {
S3_REQUESTS_COUNT.with_label_values(&["list_objects"]).inc();
}
pub fn inc_list_objects_fail() {
S3_REQUESTS_FAIL_COUNT
.with_label_values(&["list_objects"])
.inc();
}
}
const S3_PREFIX_SEPARATOR: char = '/';
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
@@ -119,25 +52,6 @@ impl S3ObjectKey {
}
}
impl RemoteObjectName for S3ObjectKey {
/// Turn a/b/c or a/b/c/ into c
fn object_name(&self) -> Option<&str> {
// corner case, char::to_string is not const, thats why this is more verbose than it needs to be
// see https://github.com/rust-lang/rust/issues/88674
if self.0.len() == 1 && self.0.chars().next().unwrap() == S3_PREFIX_SEPARATOR {
return None;
}
if self.0.ends_with(S3_PREFIX_SEPARATOR) {
self.0.rsplit(S3_PREFIX_SEPARATOR).nth(1)
} else {
self.0
.rsplit_once(S3_PREFIX_SEPARATOR)
.map(|(_, last)| last)
}
}
}
/// AWS S3 storage.
pub struct S3Bucket {
workdir: PathBuf,
@@ -208,39 +122,6 @@ impl S3Bucket {
concurrency_limiter: Semaphore::new(aws_config.concurrency_limit.get()),
})
}
async fn download_object(&self, request: GetObjectRequest) -> Result<Download, DownloadError> {
let _guard = self
.concurrency_limiter
.acquire()
.await
.context("Concurrency limiter semaphore got closed during S3 download")
.map_err(DownloadError::Other)?;
metrics::inc_get_object();
match self.client.get_object(request).await {
Ok(object_output) => match object_output.body {
None => {
metrics::inc_get_object_fail();
Err(DownloadError::Other(anyhow::anyhow!(
"Got no body for the S3 object given"
)))
}
Some(body) => Ok(Download {
metadata: object_output.metadata.map(StorageMetadata),
download_stream: Box::pin(io::BufReader::new(body.into_async_read())),
}),
},
Err(RusotoError::Service(GetObjectError::NoSuchKey(_))) => Err(DownloadError::NotFound),
Err(e) => {
metrics::inc_get_object_fail();
Err(DownloadError::Other(anyhow::anyhow!(
"Failed to download S3 object: {e}"
)))
}
}
}
}
#[async_trait::async_trait]
@@ -271,9 +152,6 @@ impl RemoteStorage for S3Bucket {
.acquire()
.await
.context("Concurrency limiter semaphore got closed during S3 list")?;
metrics::inc_list_objects();
let fetch_response = self
.client
.list_objects_v2(ListObjectsV2Request {
@@ -282,11 +160,7 @@ impl RemoteStorage for S3Bucket {
continuation_token,
..ListObjectsV2Request::default()
})
.await
.map_err(|e| {
metrics::inc_list_objects_fail();
e
})?;
.await?;
document_keys.extend(
fetch_response
.contents
@@ -304,77 +178,6 @@ impl RemoteStorage for S3Bucket {
Ok(document_keys)
}
/// Note: it wont include empty "directories"
async fn list_prefixes(
&self,
prefix: Option<Self::RemoteObjectId>,
) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
let list_prefix = match prefix {
Some(prefix) => {
let mut prefix_in_bucket = self.prefix_in_bucket.clone().unwrap_or_default();
// if there is no trailing / in default prefix and
// supplied prefix does not start with "/" insert it
if !(prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR)
|| prefix.0.starts_with(S3_PREFIX_SEPARATOR))
{
prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
}
prefix_in_bucket.push_str(&prefix.0);
// required to end with a separator
// otherwise request will return only the entry of a prefix
if !prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR) {
prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
}
Some(prefix_in_bucket)
}
None => self.prefix_in_bucket.clone(),
};
let mut document_keys = Vec::new();
let mut continuation_token = None;
loop {
let _guard = self
.concurrency_limiter
.acquire()
.await
.context("Concurrency limiter semaphore got closed during S3 list")?;
metrics::inc_list_objects();
let fetch_response = self
.client
.list_objects_v2(ListObjectsV2Request {
bucket: self.bucket_name.clone(),
prefix: list_prefix.clone(),
continuation_token,
delimiter: Some(S3_PREFIX_SEPARATOR.to_string()),
..ListObjectsV2Request::default()
})
.await
.map_err(|e| {
metrics::inc_list_objects_fail();
e
})?;
document_keys.extend(
fetch_response
.common_prefixes
.unwrap_or_default()
.into_iter()
.filter_map(|o| Some(S3ObjectKey(o.prefix?))),
);
match fetch_response.continuation_token {
Some(new_token) => continuation_token = Some(new_token),
None => break,
}
}
Ok(document_keys)
}
async fn upload(
&self,
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
@@ -387,8 +190,6 @@ impl RemoteStorage for S3Bucket {
.acquire()
.await
.context("Concurrency limiter semaphore got closed during S3 upload")?;
metrics::inc_put_object();
self.client
.put_object(PutObjectRequest {
body: Some(StreamingBody::new_with_size(
@@ -400,21 +201,35 @@ impl RemoteStorage for S3Bucket {
metadata: metadata.map(|m| m.0),
..PutObjectRequest::default()
})
.await
.map_err(|e| {
metrics::inc_put_object_fail();
e
})?;
.await?;
Ok(())
}
async fn download(&self, from: &Self::RemoteObjectId) -> Result<Download, DownloadError> {
self.download_object(GetObjectRequest {
bucket: self.bucket_name.clone(),
key: from.key().to_owned(),
..GetObjectRequest::default()
})
.await
async fn download(
&self,
from: &Self::RemoteObjectId,
to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
) -> anyhow::Result<Option<StorageMetadata>> {
let _guard = self
.concurrency_limiter
.acquire()
.await
.context("Concurrency limiter semaphore got closed during S3 download")?;
let object_output = self
.client
.get_object(GetObjectRequest {
bucket: self.bucket_name.clone(),
key: from.key().to_owned(),
..GetObjectRequest::default()
})
.await?;
if let Some(body) = object_output.body {
let mut from = io::BufReader::new(body.into_async_read());
io::copy(&mut from, to).await?;
}
Ok(object_output.metadata.map(StorageMetadata))
}
async fn download_byte_range(
@@ -422,7 +237,8 @@ impl RemoteStorage for S3Bucket {
from: &Self::RemoteObjectId,
start_inclusive: u64,
end_exclusive: Option<u64>,
) -> Result<Download, DownloadError> {
to: &mut (impl io::AsyncWrite + Unpin + Send + Sync),
) -> anyhow::Result<Option<StorageMetadata>> {
// S3 accepts ranges as https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
// and needs both ends to be exclusive
let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
@@ -430,14 +246,27 @@ impl RemoteStorage for S3Bucket {
Some(end_inclusive) => format!("bytes={}-{}", start_inclusive, end_inclusive),
None => format!("bytes={}-", start_inclusive),
});
let _guard = self
.concurrency_limiter
.acquire()
.await
.context("Concurrency limiter semaphore got closed during S3 range download")?;
let object_output = self
.client
.get_object(GetObjectRequest {
bucket: self.bucket_name.clone(),
key: from.key().to_owned(),
range,
..GetObjectRequest::default()
})
.await?;
self.download_object(GetObjectRequest {
bucket: self.bucket_name.clone(),
key: from.key().to_owned(),
range,
..GetObjectRequest::default()
})
.await
if let Some(body) = object_output.body {
let mut from = io::BufReader::new(body.into_async_read());
io::copy(&mut from, to).await?;
}
Ok(object_output.metadata.map(StorageMetadata))
}
async fn delete(&self, path: &Self::RemoteObjectId) -> anyhow::Result<()> {
@@ -446,20 +275,13 @@ impl RemoteStorage for S3Bucket {
.acquire()
.await
.context("Concurrency limiter semaphore got closed during S3 delete")?;
metrics::inc_delete_object();
self.client
.delete_object(DeleteObjectRequest {
bucket: self.bucket_name.clone(),
key: path.key().to_owned(),
..DeleteObjectRequest::default()
})
.await
.map_err(|e| {
metrics::inc_delete_object_fail();
e
})?;
.await?;
Ok(())
}
}
@@ -470,25 +292,6 @@ mod tests {
use super::*;
#[test]
fn object_name() {
let k = S3ObjectKey("a/b/c".to_owned());
assert_eq!(k.object_name(), Some("c"));
let k = S3ObjectKey("a/b/c/".to_owned());
assert_eq!(k.object_name(), Some("c"));
let k = S3ObjectKey("a/".to_owned());
assert_eq!(k.object_name(), Some("a"));
// XXX is it impossible to have an empty key?
let k = S3ObjectKey("".to_owned());
assert_eq!(k.object_name(), None);
let k = S3ObjectKey("/".to_owned());
assert_eq!(k.object_name(), None);
}
#[test]
fn download_destination() -> anyhow::Result<()> {
let workdir = tempdir()?.path().to_owned();

View File

@@ -5,7 +5,7 @@ DATA_DIR=$3
PORT=$4
SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
rm -fr $DATA_DIR
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
echo port=$PORT >> $DATA_DIR/postgresql.conf
REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
declare -i WAL_SIZE=$REDO_POS+114

View File

@@ -5,7 +5,7 @@ PORT=$4
SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
rm -fr $DATA_DIR /tmp/pg_wals
mkdir /tmp/pg_wals
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U zenith_admin -D $DATA_DIR --sysid=$SYSID
echo port=$PORT >> $DATA_DIR/postgresql.conf
REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
declare -i WAL_SIZE=$REDO_POS+114

View File

@@ -71,7 +71,7 @@ impl From<bincode::Error> for SerializeError {
/// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01)
///
/// Does not allow trailing bytes in deserialization. If this is desired, you
/// may set [`Options::allow_trailing_bytes`] to explicitly accommodate this.
/// may set [`Options::allow_trailing_bytes`] to explicitly accomodate this.
pub fn be_coder() -> impl Options {
bincode::DefaultOptions::new()
.with_big_endian()
@@ -85,7 +85,7 @@ pub fn be_coder() -> impl Options {
/// - Fixed integer encoding (i.e. 1u32 is 00000001 not 01)
///
/// Does not allow trailing bytes in deserialization. If this is desired, you
/// may set [`Options::allow_trailing_bytes`] to explicitly accommodate this.
/// may set [`Options::allow_trailing_bytes`] to explicitly accomodate this.
pub fn le_coder() -> impl Options {
bincode::DefaultOptions::new()
.with_little_endian()

View File

@@ -64,7 +64,7 @@ pub mod signals;
/// One thing to note is that .git is not available in docker (and it is bad to include it there).
/// So everything becides docker build is covered by git_version crate, and docker uses a `GIT_VERSION` argument to get the value required.
/// It takes variable from build process env and puts it to the rustc env. And then we can retrieve it here by using env! macro.
/// Git version received from environment variable used as a fallback in git_version invocation.
/// Git version received from environment variable used as a fallback in git_version invokation.
/// And to avoid running buildscript every recompilation, we use rerun-if-env-changed option.
/// So the build script will be run only when GIT_VERSION envvar has changed.
///

View File

@@ -26,9 +26,6 @@ impl Lsn {
/// Maximum possible value for an LSN
pub const MAX: Lsn = Lsn(u64::MAX);
/// Invalid value for InvalidXLogRecPtr, as defined in xlogdefs.h
pub const INVALID: Lsn = Lsn(0);
/// Subtract a number, returning None on overflow.
pub fn checked_sub<T: Into<u64>>(self, other: T) -> Option<Lsn> {
let other: u64 = other.into();
@@ -106,12 +103,6 @@ impl Lsn {
pub fn is_aligned(&self) -> bool {
*self == self.align()
}
/// Return if the LSN is valid
/// mimics postgres XLogRecPtrIsInvalid macro
pub fn is_valid(self) -> bool {
self != Lsn::INVALID
}
}
impl From<u64> for Lsn {

View File

@@ -13,10 +13,13 @@ use std::fmt;
use std::io::{self, Write};
use std::net::{Shutdown, SocketAddr, TcpStream};
use std::str::FromStr;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use tracing::*;
static PGBACKEND_SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false);
pub trait Handler {
/// Handle single query.
/// postgres_backend will issue ReadyForQuery after calling this (this
@@ -42,10 +45,6 @@ pub trait Handler {
fn check_auth_jwt(&mut self, _pgb: &mut PostgresBackend, _jwt_response: &[u8]) -> Result<()> {
bail!("JWT auth failed")
}
fn is_shutdown_requested(&self) -> bool {
false
}
}
/// PostgresBackend protocol state.
@@ -275,7 +274,7 @@ impl PostgresBackend {
let mut unnamed_query_string = Bytes::new();
while !handler.is_shutdown_requested() {
while !PGBACKEND_SHUTDOWN_REQUESTED.load(Ordering::Relaxed) {
match self.read_message() {
Ok(message) => {
if let Some(msg) = message {
@@ -337,11 +336,11 @@ impl PostgresBackend {
let have_tls = self.tls_config.is_some();
match msg {
FeMessage::StartupPacket(m) => {
trace!("got startup message {m:?}");
trace!("got startup message {:?}", m);
match m {
FeStartupPacket::SslRequest => {
debug!("SSL requested");
info!("SSL requested");
self.write_message(&BeMessage::EncryptionResponse(have_tls))?;
if have_tls {
@@ -350,7 +349,7 @@ impl PostgresBackend {
}
}
FeStartupPacket::GssEncRequest => {
debug!("GSS requested");
info!("GSS requested");
self.write_message(&BeMessage::EncryptionResponse(false))?;
}
FeStartupPacket::StartupMessage { .. } => {
@@ -434,7 +433,12 @@ impl PostgresBackend {
// full cause of the error, not just the top-level context + its trace.
// We don't want to send that in the ErrorResponse though,
// because it's not relevant to the compute node logs.
error!("query handler for '{}' failed: {:?}", query_string, e);
if query_string.starts_with("callmemaybe") {
// FIXME avoid printing a backtrace for tenant x not found errors until this is properly fixed
error!("query handler for '{}' failed: {}", query_string, e);
} else {
error!("query handler for '{}' failed: {:?}", query_string, e);
}
self.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?;
// TODO: untangle convoluted control flow
if e.to_string().contains("failed to run") {
@@ -471,7 +475,7 @@ impl PostgresBackend {
self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
}
// NOTE there is no ReadyForQuery message. This handler is used
// for basebackup and it uses CopyOut which doesn't require
// for basebackup and it uses CopyOut which doesnt require
// ReadyForQuery message and backend just switches back to
// processing mode after sending CopyDone or ErrorResponse.
}
@@ -494,3 +498,8 @@ impl PostgresBackend {
Ok(ProcessMsgResult::Continue)
}
}
// Set the flag to inform connections to cancel
pub fn set_pgbackend_shutdown_requested() {
PGBACKEND_SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed);
}

View File

@@ -269,14 +269,7 @@ impl FeStartupPacket {
.next()
.context("expected even number of params in StartupMessage")?;
if name == "options" {
// parsing options arguments "...&options=<var0>%3D<val0>+<var1>=<var1>..."
// '%3D' is '=' and '+' is ' '
// Note: we allow users that don't have SNI capabilities,
// to pass a special keyword argument 'project'
// to be used to determine the cluster name by the proxy.
//TODO: write unit test for this and refactor in its own function.
// deprecated way of passing params as cmd line args
for cmdopt in value.split(' ') {
let nameval: Vec<&str> = cmdopt.split('=').collect();
if nameval.len() == 2 {
@@ -471,7 +464,7 @@ impl BeParameterStatusMessage<'static> {
}
}
// One row description in RowDescription packet.
// One row desciption in RowDescription packet.
#[derive(Debug)]
pub struct RowDescriptor<'a> {
pub name: &'a [u8],
@@ -620,7 +613,7 @@ fn cstr_to_str(b: &Bytes) -> Result<&str> {
impl<'a> BeMessage<'a> {
/// Write message to the given buf.
// Unlike the reading side, we use BytesMut
// here as msg len precedes its body and it is handy to write it down first
// here as msg len preceeds its body and it is handy to write it down first
// and then fill the length. With Write we would have to either calc it
// manually or have one more buffer.
pub fn write(buf: &mut BytesMut, message: &BeMessage) -> io::Result<()> {
@@ -926,10 +919,10 @@ impl<'a> BeMessage<'a> {
}
}
// Neon extension of postgres replication protocol
// See NEON_STATUS_UPDATE_TAG_BYTE
// Zenith extension of postgres replication protocol
// See ZENITH_STATUS_UPDATE_TAG_BYTE
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub struct ReplicationFeedback {
pub struct ZenithFeedback {
// Last known size of the timeline. Used to enforce timeline size limit.
pub current_timeline_size: u64,
// Parts of StandbyStatusUpdate we resend to compute via safekeeper
@@ -939,13 +932,13 @@ pub struct ReplicationFeedback {
pub ps_replytime: SystemTime,
}
// NOTE: Do not forget to increment this number when adding new fields to ReplicationFeedback.
// NOTE: Do not forget to increment this number when adding new fields to ZenithFeedback.
// Do not remove previously available fields because this might be backwards incompatible.
pub const REPLICATION_FEEDBACK_FIELDS_NUMBER: u8 = 5;
pub const ZENITH_FEEDBACK_FIELDS_NUMBER: u8 = 5;
impl ReplicationFeedback {
pub fn empty() -> ReplicationFeedback {
ReplicationFeedback {
impl ZenithFeedback {
pub fn empty() -> ZenithFeedback {
ZenithFeedback {
current_timeline_size: 0,
ps_writelsn: 0,
ps_applylsn: 0,
@@ -954,7 +947,7 @@ impl ReplicationFeedback {
}
}
// Serialize ReplicationFeedback using custom format
// Serialize ZenithFeedback using custom format
// to support protocol extensibility.
//
// Following layout is used:
@@ -965,7 +958,7 @@ impl ReplicationFeedback {
// uint32 - value length in bytes
// value itself
pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> {
buf.put_u8(REPLICATION_FEEDBACK_FIELDS_NUMBER); // # of keys
buf.put_u8(ZENITH_FEEDBACK_FIELDS_NUMBER); // # of keys
write_cstr(&Bytes::from("current_timeline_size"), buf)?;
buf.put_i32(8);
buf.put_u64(self.current_timeline_size);
@@ -992,9 +985,9 @@ impl ReplicationFeedback {
Ok(())
}
// Deserialize ReplicationFeedback message
pub fn parse(mut buf: Bytes) -> ReplicationFeedback {
let mut zf = ReplicationFeedback::empty();
// Deserialize ZenithFeedback message
pub fn parse(mut buf: Bytes) -> ZenithFeedback {
let mut zf = ZenithFeedback::empty();
let nfields = buf.get_u8();
let mut i = 0;
while i < nfields {
@@ -1035,14 +1028,14 @@ impl ReplicationFeedback {
_ => {
let len = buf.get_i32();
warn!(
"ReplicationFeedback parse. unknown key {} of len {}. Skip it.",
"ZenithFeedback parse. unknown key {} of len {}. Skip it.",
key, len
);
buf.advance(len as usize);
}
}
}
trace!("ReplicationFeedback parsed is {:?}", zf);
trace!("ZenithFeedback parsed is {:?}", zf);
zf
}
}
@@ -1052,9 +1045,9 @@ mod tests {
use super::*;
#[test]
fn test_replication_feedback_serialization() {
let mut zf = ReplicationFeedback::empty();
// Fill zf with some values
fn test_zenithfeedback_serialization() {
let mut zf = ZenithFeedback::empty();
// Fill zf wih some values
zf.current_timeline_size = 12345678;
// Set rounded time to be able to compare it with deserialized value,
// because it is rounded up to microseconds during serialization.
@@ -1062,14 +1055,14 @@ mod tests {
let mut data = BytesMut::new();
zf.serialize(&mut data).unwrap();
let zf_parsed = ReplicationFeedback::parse(data.freeze());
let zf_parsed = ZenithFeedback::parse(data.freeze());
assert_eq!(zf, zf_parsed);
}
#[test]
fn test_replication_feedback_unknown_key() {
let mut zf = ReplicationFeedback::empty();
// Fill zf with some values
fn test_zenithfeedback_unknown_key() {
let mut zf = ZenithFeedback::empty();
// Fill zf wih some values
zf.current_timeline_size = 12345678;
// Set rounded time to be able to compare it with deserialized value,
// because it is rounded up to microseconds during serialization.
@@ -1079,7 +1072,7 @@ mod tests {
// Add an extra field to the buffer and adjust number of keys
if let Some(first) = data.first_mut() {
*first = REPLICATION_FEEDBACK_FIELDS_NUMBER + 1;
*first = ZENITH_FEEDBACK_FIELDS_NUMBER + 1;
}
write_cstr(&Bytes::from("new_field_one"), &mut data).unwrap();
@@ -1087,7 +1080,7 @@ mod tests {
data.put_u64(42);
// Parse serialized data and check that new field is not parsed
let zf_parsed = ReplicationFeedback::parse(data.freeze());
let zf_parsed = ZenithFeedback::parse(data.freeze());
assert_eq!(zf, zf_parsed);
}

View File

@@ -193,7 +193,7 @@ pub struct ZTenantId(ZId);
zid_newtype!(ZTenantId);
// A pair uniquely identifying Zenith instance.
#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq, Hash)]
pub struct ZTenantTimelineId {
pub tenant_id: ZTenantId,
pub timeline_id: ZTimelineId,
@@ -218,7 +218,7 @@ impl ZTenantTimelineId {
impl fmt::Display for ZTenantTimelineId {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}/{}", self.tenant_id, self.timeline_id)
write!(f, "{}-{}", self.tenant_id, self.timeline_id)
}
}
@@ -226,9 +226,9 @@ impl fmt::Display for ZTenantTimelineId {
// by the console.
#[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash, Debug, Serialize, Deserialize)]
#[serde(transparent)]
pub struct NodeId(pub u64);
pub struct ZNodeId(pub u64);
impl fmt::Display for NodeId {
impl fmt::Display for ZNodeId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}

View File

@@ -14,7 +14,7 @@ use safekeeper::defaults::{
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
};
use std::collections::{BTreeSet, HashMap};
use std::path::{Path, PathBuf};
use std::path::Path;
use std::process::exit;
use std::str::FromStr;
use utils::{
@@ -22,14 +22,14 @@ use utils::{
lsn::Lsn,
postgres_backend::AuthType,
project_git_version,
zid::{NodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
zid::{ZNodeId, ZTenantId, ZTenantTimelineId, ZTimelineId},
};
use pageserver::timelines::TimelineInfo;
// Default id of a safekeeper node, if not specified on the command line.
const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
const DEFAULT_SAFEKEEPER_ID: ZNodeId = ZNodeId(1);
const DEFAULT_PAGESERVER_ID: ZNodeId = ZNodeId(1);
const DEFAULT_BRANCH_NAME: &str = "main";
project_git_version!(GIT_VERSION);
@@ -159,20 +159,6 @@ fn main() -> Result<()> {
.about("Create a new blank timeline")
.arg(tenant_id_arg.clone())
.arg(branch_name_arg.clone()))
.subcommand(App::new("import")
.about("Import timeline from basebackup directory")
.arg(tenant_id_arg.clone())
.arg(timeline_id_arg.clone())
.arg(Arg::new("node-name").long("node-name").takes_value(true)
.help("Name to assign to the imported timeline"))
.arg(Arg::new("base-tarfile").long("base-tarfile").takes_value(true)
.help("Basebackup tarfile to import"))
.arg(Arg::new("base-lsn").long("base-lsn").takes_value(true)
.help("Lsn the basebackup starts at"))
.arg(Arg::new("wal-tarfile").long("wal-tarfile").takes_value(true)
.help("Wal to add after base"))
.arg(Arg::new("end-lsn").long("end-lsn").takes_value(true)
.help("Lsn the basebackup ends at")))
).subcommand(
App::new("tenant")
.setting(AppSettings::ArgRequiredElseHelp)
@@ -537,13 +523,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
match tenant_match.subcommand() {
Some(("list", _)) => {
for t in pageserver.tenant_list()? {
println!(
"{} {}",
t.id,
t.state
.map(|s| s.to_string())
.unwrap_or_else(|| String::from(""))
);
println!("{} {}", t.id, t.state);
}
}
Some(("create", create_match)) => {
@@ -633,43 +613,6 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
timeline.timeline_id, last_record_lsn, tenant_id,
);
}
Some(("import", import_match)) => {
let tenant_id = get_tenant_id(import_match, env)?;
let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
let name = import_match
.value_of("node-name")
.ok_or_else(|| anyhow!("No node name provided"))?;
// Parse base inputs
let base_tarfile = import_match
.value_of("base-tarfile")
.map(|s| PathBuf::from_str(s).unwrap())
.ok_or_else(|| anyhow!("No base-tarfile provided"))?;
let base_lsn = Lsn::from_str(
import_match
.value_of("base-lsn")
.ok_or_else(|| anyhow!("No base-lsn provided"))?,
)?;
let base = (base_lsn, base_tarfile);
// Parse pg_wal inputs
let wal_tarfile = import_match
.value_of("wal-tarfile")
.map(|s| PathBuf::from_str(s).unwrap());
let end_lsn = import_match
.value_of("end-lsn")
.map(|s| Lsn::from_str(s).unwrap());
// TODO validate both or none are provided
let pg_wal = end_lsn.zip(wal_tarfile);
let mut cplane = ComputeControlPlane::load(env.clone())?;
println!("Importing timeline into pageserver ...");
pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal)?;
println!("Creating node for imported timeline ...");
env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
cplane.new_node(tenant_id, name, timeline_id, None, None)?;
println!("Done");
}
Some(("branch", branch_match)) => {
let tenant_id = get_tenant_id(branch_match, env)?;
let new_branch_name = branch_match
@@ -917,7 +860,7 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
Ok(())
}
fn get_safekeeper(env: &local_env::LocalEnv, id: NodeId) -> Result<SafekeeperNode> {
fn get_safekeeper(env: &local_env::LocalEnv, id: ZNodeId) -> Result<SafekeeperNode> {
if let Some(node) = env.safekeepers.iter().find(|node| node.id == id) {
Ok(SafekeeperNode::from_env(env, node))
} else {
@@ -933,7 +876,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
// All the commands take an optional safekeeper name argument
let sk_id = if let Some(id_str) = sub_args.value_of("id") {
NodeId(id_str.parse().context("while parsing safekeeper id")?)
ZNodeId(id_str.parse().context("while parsing safekeeper id")?)
} else {
DEFAULT_SAFEKEEPER_ID
};

View File

@@ -5,7 +5,7 @@ edition = "2021"
[features]
# It is simpler infra-wise to have failpoints enabled by default
# It shouldn't affect performance in any way because failpoints
# It shouldn't affect perf in any way because failpoints
# are not placed in hot code paths
default = ["failpoints"]
profiling = ["pprof"]
@@ -60,8 +60,6 @@ metrics = { path = "../libs/metrics" }
utils = { path = "../libs/utils" }
remote_storage = { path = "../libs/remote_storage" }
workspace_hack = { version = "0.1", path = "../workspace_hack" }
close_fds = "0.3.2"
walkdir = "2.3.2"
[dev-dependencies]
hex-literal = "0.3"

View File

@@ -69,7 +69,7 @@ Repository
The repository stores all the page versions, or WAL records needed to
reconstruct them. Each tenant has a separate Repository, which is
stored in the .neon/tenants/<tenantid> directory.
stored in the .zenith/tenants/<tenantid> directory.
Repository is an abstract trait, defined in `repository.rs`. It is
implemented by the LayeredRepository object in
@@ -92,7 +92,7 @@ Each repository also has a WAL redo manager associated with it, see
records, whenever we need to reconstruct a page version from WAL to
satisfy a GetPage@LSN request, or to avoid accumulating too much WAL
for a page. The WAL redo manager uses a Postgres process running in
special Neon wal-redo mode to do the actual WAL redo, and
special zenith wal-redo mode to do the actual WAL redo, and
communicates with the process using a pipe.

Some files were not shown because too many files have changed in this diff Show More