mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-27 14:30:38 +00:00
Compare commits
5 Commits
relsize_ca
...
projects-m
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f7efbb2d42 | ||
|
|
0aff7c9ee9 | ||
|
|
4a2a55d9b2 | ||
|
|
99a0a5a19b | ||
|
|
263a3ea5e3 |
@@ -1,7 +1,6 @@
|
|||||||
[pageservers]
|
[pageservers]
|
||||||
#zenith-1-ps-1 console_region_id=1
|
#zenith-1-ps-1 console_region_id=1
|
||||||
zenith-1-ps-2 console_region_id=1
|
zenith-1-ps-2 console_region_id=1
|
||||||
zenith-1-ps-3 console_region_id=1
|
|
||||||
|
|
||||||
[safekeepers]
|
[safekeepers]
|
||||||
zenith-1-sk-1 console_region_id=1
|
zenith-1-sk-1 console_region_id=1
|
||||||
@@ -5,10 +5,10 @@ executors:
|
|||||||
resource_class: xlarge
|
resource_class: xlarge
|
||||||
docker:
|
docker:
|
||||||
# NB: when changed, do not forget to update rust image tag in all Dockerfiles
|
# NB: when changed, do not forget to update rust image tag in all Dockerfiles
|
||||||
- image: neondatabase/rust:1.58
|
- image: zimg/rust:1.58
|
||||||
neon-executor:
|
neon-executor:
|
||||||
docker:
|
docker:
|
||||||
- image: neondatabase/rust:1.58
|
- image: zimg/rust:1.58
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# A job to build postgres
|
# A job to build postgres
|
||||||
@@ -37,7 +37,7 @@ jobs:
|
|||||||
name: Restore postgres cache
|
name: Restore postgres cache
|
||||||
keys:
|
keys:
|
||||||
# Restore ONLY if the rev key matches exactly
|
# Restore ONLY if the rev key matches exactly
|
||||||
- v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
|
- v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
|
||||||
|
|
||||||
# Build postgres if the restore_cache didn't find a build.
|
# Build postgres if the restore_cache didn't find a build.
|
||||||
# `make` can't figure out whether the cache is valid, since
|
# `make` can't figure out whether the cache is valid, since
|
||||||
@@ -54,7 +54,7 @@ jobs:
|
|||||||
|
|
||||||
- save_cache:
|
- save_cache:
|
||||||
name: Save postgres cache
|
name: Save postgres cache
|
||||||
key: v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
|
key: v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
|
||||||
paths:
|
paths:
|
||||||
- tmp_install
|
- tmp_install
|
||||||
|
|
||||||
@@ -85,7 +85,7 @@ jobs:
|
|||||||
name: Restore postgres cache
|
name: Restore postgres cache
|
||||||
keys:
|
keys:
|
||||||
# Restore ONLY if the rev key matches exactly
|
# Restore ONLY if the rev key matches exactly
|
||||||
- v05-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
|
- v04-postgres-cache-<< parameters.build_type >>-{{ checksum "/tmp/cache-key-postgres" }}
|
||||||
|
|
||||||
- restore_cache:
|
- restore_cache:
|
||||||
name: Restore rust cache
|
name: Restore rust cache
|
||||||
@@ -93,7 +93,7 @@ jobs:
|
|||||||
# Require an exact match. While an out of date cache might speed up the build,
|
# Require an exact match. While an out of date cache might speed up the build,
|
||||||
# there's no way to clean out old packages, so the cache grows every time something
|
# there's no way to clean out old packages, so the cache grows every time something
|
||||||
# changes.
|
# changes.
|
||||||
- v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
- v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||||
|
|
||||||
# Build the rust code, including test binaries
|
# Build the rust code, including test binaries
|
||||||
- run:
|
- run:
|
||||||
@@ -107,7 +107,7 @@ jobs:
|
|||||||
|
|
||||||
export CARGO_INCREMENTAL=0
|
export CARGO_INCREMENTAL=0
|
||||||
export CACHEPOT_BUCKET=zenith-rust-cachepot
|
export CACHEPOT_BUCKET=zenith-rust-cachepot
|
||||||
export RUSTC_WRAPPER=""
|
export RUSTC_WRAPPER=cachepot
|
||||||
export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
|
export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
|
||||||
export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
|
export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
|
||||||
mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
|
mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
|
||||||
@@ -115,7 +115,7 @@ jobs:
|
|||||||
|
|
||||||
- save_cache:
|
- save_cache:
|
||||||
name: Save rust cache
|
name: Save rust cache
|
||||||
key: v05-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
key: v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }}
|
||||||
paths:
|
paths:
|
||||||
- ~/.cargo/registry
|
- ~/.cargo/registry
|
||||||
- ~/.cargo/git
|
- ~/.cargo/git
|
||||||
@@ -142,6 +142,11 @@ jobs:
|
|||||||
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
test_exe_paths=$(
|
||||||
|
cargo test --message-format=json --no-run |
|
||||||
|
jq -r '.executable | select(. != null)'
|
||||||
|
)
|
||||||
|
|
||||||
mkdir -p /tmp/zenith/bin
|
mkdir -p /tmp/zenith/bin
|
||||||
mkdir -p /tmp/zenith/test_bin
|
mkdir -p /tmp/zenith/test_bin
|
||||||
mkdir -p /tmp/zenith/etc
|
mkdir -p /tmp/zenith/etc
|
||||||
@@ -325,6 +330,274 @@ jobs:
|
|||||||
paths:
|
paths:
|
||||||
- "*"
|
- "*"
|
||||||
|
|
||||||
|
# Build neondatabase/neon:latest image and push it to Docker hub
|
||||||
|
docker-image:
|
||||||
|
docker:
|
||||||
|
- image: cimg/base:2021.04
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- setup_remote_docker:
|
||||||
|
docker_layer_caching: true
|
||||||
|
- run:
|
||||||
|
name: Init postgres submodule
|
||||||
|
command: git submodule update --init --depth 1
|
||||||
|
- run:
|
||||||
|
name: Build and push Docker image
|
||||||
|
command: |
|
||||||
|
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||||
|
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||||
|
docker build \
|
||||||
|
--pull \
|
||||||
|
--build-arg GIT_VERSION=${CIRCLE_SHA1} \
|
||||||
|
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||||
|
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||||
|
--tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:latest .
|
||||||
|
docker push neondatabase/neon:${DOCKER_TAG}
|
||||||
|
docker push neondatabase/neon:latest
|
||||||
|
|
||||||
|
# Build neondatabase/compute-node:latest image and push it to Docker hub
|
||||||
|
docker-image-compute:
|
||||||
|
docker:
|
||||||
|
- image: cimg/base:2021.04
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- setup_remote_docker:
|
||||||
|
docker_layer_caching: true
|
||||||
|
- run:
|
||||||
|
name: Build and push compute-tools Docker image
|
||||||
|
command: |
|
||||||
|
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||||
|
docker build \
|
||||||
|
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||||
|
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||||
|
--tag neondatabase/compute-tools:local \
|
||||||
|
--tag neondatabase/compute-tools:latest \
|
||||||
|
-f Dockerfile.compute-tools .
|
||||||
|
# Only push :latest image
|
||||||
|
docker push neondatabase/compute-tools:latest
|
||||||
|
- run:
|
||||||
|
name: Init postgres submodule
|
||||||
|
command: git submodule update --init --depth 1
|
||||||
|
- run:
|
||||||
|
name: Build and push compute-node Docker image
|
||||||
|
command: |
|
||||||
|
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||||
|
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||||
|
docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
|
||||||
|
--tag neondatabase/compute-node:latest vendor/postgres \
|
||||||
|
--build-arg COMPUTE_TOOLS_TAG=local
|
||||||
|
docker push neondatabase/compute-node:${DOCKER_TAG}
|
||||||
|
docker push neondatabase/compute-node:latest
|
||||||
|
|
||||||
|
# Build production neondatabase/neon:release image and push it to Docker hub
|
||||||
|
docker-image-release:
|
||||||
|
docker:
|
||||||
|
- image: cimg/base:2021.04
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- setup_remote_docker:
|
||||||
|
docker_layer_caching: true
|
||||||
|
- run:
|
||||||
|
name: Init postgres submodule
|
||||||
|
command: git submodule update --init --depth 1
|
||||||
|
- run:
|
||||||
|
name: Build and push Docker image
|
||||||
|
command: |
|
||||||
|
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||||
|
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||||
|
docker build \
|
||||||
|
--pull \
|
||||||
|
--build-arg GIT_VERSION=${CIRCLE_SHA1} \
|
||||||
|
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||||
|
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||||
|
--tag neondatabase/neon:${DOCKER_TAG} --tag neondatabase/neon:release .
|
||||||
|
docker push neondatabase/neon:${DOCKER_TAG}
|
||||||
|
docker push neondatabase/neon:release
|
||||||
|
|
||||||
|
# Build production neondatabase/compute-node:release image and push it to Docker hub
|
||||||
|
docker-image-compute-release:
|
||||||
|
docker:
|
||||||
|
- image: cimg/base:2021.04
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- setup_remote_docker:
|
||||||
|
docker_layer_caching: true
|
||||||
|
- run:
|
||||||
|
name: Build and push compute-tools Docker image
|
||||||
|
command: |
|
||||||
|
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||||
|
docker build \
|
||||||
|
--build-arg AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" \
|
||||||
|
--build-arg AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" \
|
||||||
|
--tag neondatabase/compute-tools:release \
|
||||||
|
--tag neondatabase/compute-tools:local \
|
||||||
|
-f Dockerfile.compute-tools .
|
||||||
|
# Only push :release image
|
||||||
|
docker push neondatabase/compute-tools:release
|
||||||
|
- run:
|
||||||
|
name: Init postgres submodule
|
||||||
|
command: git submodule update --init --depth 1
|
||||||
|
- run:
|
||||||
|
name: Build and push compute-node Docker image
|
||||||
|
command: |
|
||||||
|
echo $NEON_DOCKER_PWD | docker login -u $NEON_DOCKER_LOGIN --password-stdin
|
||||||
|
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||||
|
docker build --tag neondatabase/compute-node:${DOCKER_TAG} \
|
||||||
|
--tag neondatabase/compute-node:release vendor/postgres \
|
||||||
|
--build-arg COMPUTE_TOOLS_TAG=local
|
||||||
|
docker push neondatabase/compute-node:${DOCKER_TAG}
|
||||||
|
docker push neondatabase/compute-node:release
|
||||||
|
|
||||||
|
deploy-staging:
|
||||||
|
docker:
|
||||||
|
- image: cimg/python:3.10
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- setup_remote_docker
|
||||||
|
- run:
|
||||||
|
name: Setup ansible
|
||||||
|
command: |
|
||||||
|
pip install --progress-bar off --user ansible boto3
|
||||||
|
- run:
|
||||||
|
name: Redeploy
|
||||||
|
command: |
|
||||||
|
cd "$(pwd)/.circleci/ansible"
|
||||||
|
|
||||||
|
./get_binaries.sh
|
||||||
|
|
||||||
|
echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key
|
||||||
|
echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
|
||||||
|
chmod 0600 ssh-key
|
||||||
|
ssh-add ssh-key
|
||||||
|
rm -f ssh-key ssh-key-cert.pub
|
||||||
|
|
||||||
|
ansible-playbook deploy.yaml -i staging.hosts
|
||||||
|
rm -f neon_install.tar.gz .neon_current_version
|
||||||
|
|
||||||
|
deploy-staging-proxy:
|
||||||
|
docker:
|
||||||
|
- image: cimg/base:2021.04
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: .kubeconfig
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Store kubeconfig file
|
||||||
|
command: |
|
||||||
|
echo "${STAGING_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
|
||||||
|
chmod 0600 ${KUBECONFIG}
|
||||||
|
- run:
|
||||||
|
name: Setup helm v3
|
||||||
|
command: |
|
||||||
|
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||||
|
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||||
|
- run:
|
||||||
|
name: Re-deploy proxy
|
||||||
|
command: |
|
||||||
|
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||||
|
helm upgrade neon-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||||
|
helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/staging.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||||
|
|
||||||
|
deploy-neon-stress:
|
||||||
|
docker:
|
||||||
|
- image: cimg/python:3.10
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- setup_remote_docker
|
||||||
|
- run:
|
||||||
|
name: Setup ansible
|
||||||
|
command: |
|
||||||
|
pip install --progress-bar off --user ansible boto3
|
||||||
|
- run:
|
||||||
|
name: Redeploy
|
||||||
|
command: |
|
||||||
|
cd "$(pwd)/.circleci/ansible"
|
||||||
|
|
||||||
|
./get_binaries.sh
|
||||||
|
|
||||||
|
echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key
|
||||||
|
echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
|
||||||
|
chmod 0600 ssh-key
|
||||||
|
ssh-add ssh-key
|
||||||
|
rm -f ssh-key ssh-key-cert.pub
|
||||||
|
|
||||||
|
ansible-playbook deploy.yaml -i neon-stress.hosts
|
||||||
|
rm -f neon_install.tar.gz .neon_current_version
|
||||||
|
|
||||||
|
deploy-neon-stress-proxy:
|
||||||
|
docker:
|
||||||
|
- image: cimg/base:2021.04
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: .kubeconfig
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Store kubeconfig file
|
||||||
|
command: |
|
||||||
|
echo "${NEON_STRESS_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
|
||||||
|
chmod 0600 ${KUBECONFIG}
|
||||||
|
- run:
|
||||||
|
name: Setup helm v3
|
||||||
|
command: |
|
||||||
|
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||||
|
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||||
|
- run:
|
||||||
|
name: Re-deploy proxy
|
||||||
|
command: |
|
||||||
|
DOCKER_TAG=$(git log --oneline|wc -l)
|
||||||
|
helm upgrade neon-stress-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||||
|
helm upgrade neon-stress-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/neon-stress.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||||
|
|
||||||
|
deploy-release:
|
||||||
|
docker:
|
||||||
|
- image: cimg/python:3.10
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- setup_remote_docker
|
||||||
|
- run:
|
||||||
|
name: Setup ansible
|
||||||
|
command: |
|
||||||
|
pip install --progress-bar off --user ansible boto3
|
||||||
|
- run:
|
||||||
|
name: Redeploy
|
||||||
|
command: |
|
||||||
|
cd "$(pwd)/.circleci/ansible"
|
||||||
|
|
||||||
|
RELEASE=true ./get_binaries.sh
|
||||||
|
|
||||||
|
echo "${TELEPORT_SSH_KEY}" | tr -d '\n'| base64 --decode >ssh-key
|
||||||
|
echo "${TELEPORT_SSH_CERT}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
|
||||||
|
chmod 0600 ssh-key
|
||||||
|
ssh-add ssh-key
|
||||||
|
rm -f ssh-key ssh-key-cert.pub
|
||||||
|
|
||||||
|
ansible-playbook deploy.yaml -i production.hosts
|
||||||
|
rm -f neon_install.tar.gz .neon_current_version
|
||||||
|
|
||||||
|
deploy-release-proxy:
|
||||||
|
docker:
|
||||||
|
- image: cimg/base:2021.04
|
||||||
|
environment:
|
||||||
|
KUBECONFIG: .kubeconfig
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Store kubeconfig file
|
||||||
|
command: |
|
||||||
|
echo "${PRODUCTION_KUBECONFIG_DATA}" | base64 --decode > ${KUBECONFIG}
|
||||||
|
chmod 0600 ${KUBECONFIG}
|
||||||
|
- run:
|
||||||
|
name: Setup helm v3
|
||||||
|
command: |
|
||||||
|
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||||
|
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||||
|
- run:
|
||||||
|
name: Re-deploy proxy
|
||||||
|
command: |
|
||||||
|
DOCKER_TAG="release-$(git log --oneline|wc -l)"
|
||||||
|
helm upgrade neon-proxy neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||||
|
helm upgrade neon-proxy-scram neondatabase/neon-proxy --install -f .circleci/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait
|
||||||
|
|
||||||
workflows:
|
workflows:
|
||||||
build_and_test:
|
build_and_test:
|
||||||
jobs:
|
jobs:
|
||||||
@@ -367,3 +640,103 @@ workflows:
|
|||||||
save_perf_report: true
|
save_perf_report: true
|
||||||
requires:
|
requires:
|
||||||
- build-neon-release
|
- build-neon-release
|
||||||
|
- docker-image:
|
||||||
|
# Context gives an ability to login
|
||||||
|
context: Docker Hub
|
||||||
|
# Build image only for commits to main
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
requires:
|
||||||
|
- pg_regress-tests-release
|
||||||
|
- other-tests-release
|
||||||
|
- docker-image-compute:
|
||||||
|
# Context gives an ability to login
|
||||||
|
context: Docker Hub
|
||||||
|
# Build image only for commits to main
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
requires:
|
||||||
|
- pg_regress-tests-release
|
||||||
|
- other-tests-release
|
||||||
|
- deploy-staging:
|
||||||
|
# Context gives an ability to login
|
||||||
|
context: Docker Hub
|
||||||
|
# deploy only for commits to main
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
requires:
|
||||||
|
- docker-image
|
||||||
|
- deploy-staging-proxy:
|
||||||
|
# deploy only for commits to main
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
requires:
|
||||||
|
- docker-image
|
||||||
|
|
||||||
|
- deploy-neon-stress:
|
||||||
|
# Context gives an ability to login
|
||||||
|
context: Docker Hub
|
||||||
|
# deploy only for commits to main
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
requires:
|
||||||
|
- docker-image
|
||||||
|
- deploy-neon-stress-proxy:
|
||||||
|
# deploy only for commits to main
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
requires:
|
||||||
|
- docker-image
|
||||||
|
|
||||||
|
- docker-image-release:
|
||||||
|
# Context gives an ability to login
|
||||||
|
context: Docker Hub
|
||||||
|
# Build image only for commits to main
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- release
|
||||||
|
requires:
|
||||||
|
- pg_regress-tests-release
|
||||||
|
- other-tests-release
|
||||||
|
- docker-image-compute-release:
|
||||||
|
# Context gives an ability to login
|
||||||
|
context: Docker Hub
|
||||||
|
# Build image only for commits to main
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- release
|
||||||
|
requires:
|
||||||
|
- pg_regress-tests-release
|
||||||
|
- other-tests-release
|
||||||
|
- deploy-release:
|
||||||
|
# Context gives an ability to login
|
||||||
|
context: Docker Hub
|
||||||
|
# deploy only for commits to main
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- release
|
||||||
|
requires:
|
||||||
|
- docker-image-release
|
||||||
|
- deploy-release-proxy:
|
||||||
|
# deploy only for commits to main
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- release
|
||||||
|
requires:
|
||||||
|
- docker-image-release
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ runs:
|
|||||||
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
|
EXTRA_PARAMS="-n4 $EXTRA_PARAMS"
|
||||||
fi
|
fi
|
||||||
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
|
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
|
||||||
if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
|
if [[ "$GITHUB_REF" == "main" ]]; then
|
||||||
mkdir -p "$PERF_REPORT_DIR"
|
mkdir -p "$PERF_REPORT_DIR"
|
||||||
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
|
EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
|
||||||
fi
|
fi
|
||||||
@@ -115,7 +115,7 @@ runs:
|
|||||||
-rA $TEST_SELECTION $EXTRA_PARAMS
|
-rA $TEST_SELECTION $EXTRA_PARAMS
|
||||||
|
|
||||||
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
|
if [[ "${{ inputs.save_perf_report }}" == "true" ]]; then
|
||||||
if [[ "$GITHUB_REF" == "refs/heads/main" ]]; then
|
if [[ "$GITHUB_REF" == "main" ]]; then
|
||||||
export REPORT_FROM="$PERF_REPORT_DIR"
|
export REPORT_FROM="$PERF_REPORT_DIR"
|
||||||
export REPORT_TO=local
|
export REPORT_TO=local
|
||||||
scripts/generate_and_push_perf_report.sh
|
scripts/generate_and_push_perf_report.sh
|
||||||
|
|||||||
8
.github/workflows/benchmarking.yml
vendored
8
.github/workflows/benchmarking.yml
vendored
@@ -26,11 +26,11 @@ jobs:
|
|||||||
runs-on: [self-hosted, zenith-benchmarker]
|
runs-on: [self-hosted, zenith-benchmarker]
|
||||||
|
|
||||||
env:
|
env:
|
||||||
POSTGRES_DISTRIB_DIR: "/usr/pgsql-14"
|
POSTGRES_DISTRIB_DIR: "/usr/pgsql-13"
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout zenith repo
|
- name: Checkout zenith repo
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
# actions/setup-python@v2 is not working correctly on self-hosted runners
|
# actions/setup-python@v2 is not working correctly on self-hosted runners
|
||||||
# see https://github.com/actions/setup-python/issues/162
|
# see https://github.com/actions/setup-python/issues/162
|
||||||
@@ -88,7 +88,7 @@ jobs:
|
|||||||
# Plus time needed to initialize the test databases.
|
# Plus time needed to initialize the test databases.
|
||||||
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
||||||
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
||||||
PLATFORM: "neon-staging"
|
PLATFORM: "zenith-staging"
|
||||||
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
||||||
REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
|
REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
|
||||||
run: |
|
run: |
|
||||||
@@ -96,7 +96,7 @@ jobs:
|
|||||||
# since it might generate duplicates when calling ingest_perf_test_result.py
|
# since it might generate duplicates when calling ingest_perf_test_result.py
|
||||||
rm -rf perf-report-staging
|
rm -rf perf-report-staging
|
||||||
mkdir -p perf-report-staging
|
mkdir -p perf-report-staging
|
||||||
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging --timeout 3600
|
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging
|
||||||
|
|
||||||
- name: Submit result
|
- name: Submit result
|
||||||
env:
|
env:
|
||||||
|
|||||||
253
.github/workflows/build_and_test.yml
vendored
253
.github/workflows/build_and_test.yml
vendored
@@ -271,9 +271,6 @@ jobs:
|
|||||||
test_selection: performance
|
test_selection: performance
|
||||||
run_in_parallel: false
|
run_in_parallel: false
|
||||||
save_perf_report: true
|
save_perf_report: true
|
||||||
env:
|
|
||||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
|
||||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
|
||||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||||
# while coverage is currently collected for the debug ones
|
# while coverage is currently collected for the debug ones
|
||||||
|
|
||||||
@@ -390,253 +387,3 @@ jobs:
|
|||||||
\"remote_repo\": \"${{ github.repository }}\"
|
\"remote_repo\": \"${{ github.repository }}\"
|
||||||
}
|
}
|
||||||
}"
|
}"
|
||||||
|
|
||||||
docker-image:
|
|
||||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
|
||||||
needs: [ pg_regress-tests, other-tests ]
|
|
||||||
if: |
|
|
||||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
|
||||||
github.event_name != 'workflow_dispatch'
|
|
||||||
outputs:
|
|
||||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Login to DockerHub
|
|
||||||
uses: docker/login-action@v1
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v1
|
|
||||||
with:
|
|
||||||
driver: docker
|
|
||||||
|
|
||||||
- name: Get build tag
|
|
||||||
run: |
|
|
||||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
|
||||||
echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
|
||||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
|
||||||
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
|
||||||
else
|
|
||||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
id: build-tag
|
|
||||||
|
|
||||||
- name: Get legacy build tag
|
|
||||||
run: |
|
|
||||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
|
||||||
echo "::set-output name=tag::latest
|
|
||||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
|
||||||
echo "::set-output name=tag::release
|
|
||||||
else
|
|
||||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
id: legacy-build-tag
|
|
||||||
|
|
||||||
- name: Build neon Docker image
|
|
||||||
uses: docker/build-push-action@v2
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
build-args: |
|
|
||||||
GIT_VERSION="${{github.sha}}"
|
|
||||||
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
|
|
||||||
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
|
|
||||||
pull: true
|
|
||||||
push: true
|
|
||||||
tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
|
|
||||||
|
|
||||||
docker-image-compute:
|
|
||||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
|
||||||
needs: [ pg_regress-tests, other-tests ]
|
|
||||||
if: |
|
|
||||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
|
||||||
github.event_name != 'workflow_dispatch'
|
|
||||||
outputs:
|
|
||||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Login to DockerHub
|
|
||||||
uses: docker/login-action@v1
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
|
||||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v1
|
|
||||||
with:
|
|
||||||
driver: docker
|
|
||||||
|
|
||||||
- name: Get build tag
|
|
||||||
run: |
|
|
||||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
|
||||||
echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
|
||||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
|
||||||
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
|
||||||
else
|
|
||||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
id: build-tag
|
|
||||||
|
|
||||||
- name: Get legacy build tag
|
|
||||||
run: |
|
|
||||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
|
||||||
echo "::set-output name=tag::latest
|
|
||||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
|
||||||
echo "::set-output name=tag::release
|
|
||||||
else
|
|
||||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
id: legacy-build-tag
|
|
||||||
|
|
||||||
- name: Build compute-tools Docker image
|
|
||||||
uses: docker/build-push-action@v2
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
build-args: |
|
|
||||||
GIT_VERSION="${{github.sha}}"
|
|
||||||
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
|
|
||||||
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
|
|
||||||
push: false
|
|
||||||
file: Dockerfile.compute-tools
|
|
||||||
tags: neondatabase/compute-tools:local
|
|
||||||
|
|
||||||
- name: Push compute-tools Docker image
|
|
||||||
uses: docker/build-push-action@v2
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
build-args: |
|
|
||||||
GIT_VERSION="${{github.sha}}"
|
|
||||||
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
|
|
||||||
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
|
|
||||||
push: true
|
|
||||||
file: Dockerfile.compute-tools
|
|
||||||
tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}
|
|
||||||
|
|
||||||
- name: Build compute-node Docker image
|
|
||||||
uses: docker/build-push-action@v2
|
|
||||||
with:
|
|
||||||
context: ./vendor/postgres/
|
|
||||||
build-args:
|
|
||||||
COMPUTE_TOOLS_TAG=local
|
|
||||||
push: true
|
|
||||||
tags: neondatabase/compute-node:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/compute-node:${{steps.build-tag.outputs.tag}}
|
|
||||||
|
|
||||||
calculate-deploy-targets:
|
|
||||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
|
||||||
if: |
|
|
||||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
|
||||||
github.event_name != 'workflow_dispatch'
|
|
||||||
outputs:
|
|
||||||
matrix-include: ${{ steps.set-matrix.outputs.include }}
|
|
||||||
steps:
|
|
||||||
- id: set-matrix
|
|
||||||
run: |
|
|
||||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
|
||||||
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA"}'
|
|
||||||
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA"}'
|
|
||||||
echo "::set-output name=include::[$STAGING, $NEON_STRESS]"
|
|
||||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
|
||||||
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA"}'
|
|
||||||
echo "::set-output name=include::[$PRODUCTION]"
|
|
||||||
else
|
|
||||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
deploy:
|
|
||||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
|
||||||
# We need both storage **and** compute images for deploy, because control plane
|
|
||||||
# picks the compute version based on the storage version. If it notices a fresh
|
|
||||||
# storage it may bump the compute version. And if compute image failed to build
|
|
||||||
# it may break things badly.
|
|
||||||
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
|
|
||||||
if: |
|
|
||||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
|
||||||
github.event_name != 'workflow_dispatch'
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Setup ansible
|
|
||||||
run: |
|
|
||||||
pip install --progress-bar off --user ansible boto3
|
|
||||||
|
|
||||||
- name: Redeploy
|
|
||||||
run: |
|
|
||||||
cd "$(pwd)/.github/ansible"
|
|
||||||
|
|
||||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
|
||||||
./get_binaries.sh
|
|
||||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
|
||||||
RELEASE=true ./get_binaries.sh
|
|
||||||
else
|
|
||||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
eval $(ssh-agent)
|
|
||||||
echo "${{ secrets.TELEPORT_SSH_KEY }}" | tr -d '\n'| base64 --decode >ssh-key
|
|
||||||
echo "${{ secrets.TELEPORT_SSH_CERT }}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
|
|
||||||
chmod 0600 ssh-key
|
|
||||||
ssh-add ssh-key
|
|
||||||
rm -f ssh-key ssh-key-cert.pub
|
|
||||||
|
|
||||||
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts
|
|
||||||
rm -f neon_install.tar.gz .neon_current_version
|
|
||||||
|
|
||||||
deploy-proxy:
|
|
||||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
|
||||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it
|
|
||||||
# to run all deploy jobs consistently.
|
|
||||||
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
|
|
||||||
if: |
|
|
||||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
|
||||||
github.event_name != 'workflow_dispatch'
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
|
|
||||||
env:
|
|
||||||
KUBECONFIG: .kubeconfig
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
with:
|
|
||||||
submodules: true
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Store kubeconfig file
|
|
||||||
run: |
|
|
||||||
echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
|
|
||||||
chmod 0600 ${KUBECONFIG}
|
|
||||||
|
|
||||||
- name: Setup helm v3
|
|
||||||
run: |
|
|
||||||
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
|
||||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
|
||||||
|
|
||||||
- name: Re-deploy proxy
|
|
||||||
run: |
|
|
||||||
DOCKER_TAG=${{needs.docker-image.outputs.build-tag}}
|
|
||||||
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
|
||||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
|
||||||
|
|||||||
3
.github/workflows/pg_clients.yml
vendored
3
.github/workflows/pg_clients.yml
vendored
@@ -48,6 +48,9 @@ jobs:
|
|||||||
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
||||||
TEST_OUTPUT: /tmp/test_output
|
TEST_OUTPUT: /tmp/test_output
|
||||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||||
|
# this variable will be embedded in perf test report
|
||||||
|
# and is needed to distinguish different environments
|
||||||
|
PLATFORM: github-actions-selfhosted
|
||||||
shell: bash -ex {0}
|
shell: bash -ex {0}
|
||||||
run: |
|
run: |
|
||||||
# Test framework expects we have psql binary;
|
# Test framework expects we have psql binary;
|
||||||
|
|||||||
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -2151,7 +2151,7 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"utils",
|
"utils",
|
||||||
"wal_craft",
|
"wal_generate",
|
||||||
"workspace_hack",
|
"workspace_hack",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -3753,16 +3753,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wal_craft"
|
name = "wal_generate"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap 3.0.14",
|
"clap 3.0.14",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"log",
|
"log",
|
||||||
"once_cell",
|
|
||||||
"postgres",
|
"postgres",
|
||||||
"postgres_ffi",
|
|
||||||
"tempfile",
|
"tempfile",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
39
Makefile
39
Makefile
@@ -1,8 +1,3 @@
|
|||||||
ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
|
||||||
|
|
||||||
# Where to install Postgres, default is ./tmp_install, maybe useful for package managers
|
|
||||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/tmp_install
|
|
||||||
|
|
||||||
# Seccomp BPF is only available for Linux
|
# Seccomp BPF is only available for Linux
|
||||||
UNAME_S := $(shell uname -s)
|
UNAME_S := $(shell uname -s)
|
||||||
ifeq ($(UNAME_S),Linux)
|
ifeq ($(UNAME_S),Linux)
|
||||||
@@ -60,55 +55,55 @@ zenith: postgres-headers
|
|||||||
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
|
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
|
||||||
|
|
||||||
### PostgreSQL parts
|
### PostgreSQL parts
|
||||||
$(POSTGRES_INSTALL_DIR)/build/config.status:
|
tmp_install/build/config.status:
|
||||||
+@echo "Configuring postgres build"
|
+@echo "Configuring postgres build"
|
||||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build
|
mkdir -p tmp_install/build
|
||||||
(cd $(POSTGRES_INSTALL_DIR)/build && \
|
(cd tmp_install/build && \
|
||||||
$(ROOT_PROJECT_DIR)/vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
|
../../vendor/postgres/configure CFLAGS='$(PG_CFLAGS)' \
|
||||||
$(PG_CONFIGURE_OPTS) \
|
$(PG_CONFIGURE_OPTS) \
|
||||||
$(SECCOMP) \
|
$(SECCOMP) \
|
||||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR)) > configure.log)
|
--prefix=$(abspath tmp_install) > configure.log)
|
||||||
|
|
||||||
# nicer alias for running 'configure'
|
# nicer alias for running 'configure'
|
||||||
.PHONY: postgres-configure
|
.PHONY: postgres-configure
|
||||||
postgres-configure: $(POSTGRES_INSTALL_DIR)/build/config.status
|
postgres-configure: tmp_install/build/config.status
|
||||||
|
|
||||||
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/include
|
# Install the PostgreSQL header files into tmp_install/include
|
||||||
.PHONY: postgres-headers
|
.PHONY: postgres-headers
|
||||||
postgres-headers: postgres-configure
|
postgres-headers: postgres-configure
|
||||||
+@echo "Installing PostgreSQL headers"
|
+@echo "Installing PostgreSQL headers"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/src/include MAKELEVEL=0 install
|
$(MAKE) -C tmp_install/build/src/include MAKELEVEL=0 install
|
||||||
|
|
||||||
# Compile and install PostgreSQL and contrib/neon
|
# Compile and install PostgreSQL and contrib/neon
|
||||||
.PHONY: postgres
|
.PHONY: postgres
|
||||||
postgres: postgres-configure \
|
postgres: postgres-configure \
|
||||||
postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
|
postgres-headers # to prevent `make install` conflicts with zenith's `postgres-headers`
|
||||||
+@echo "Compiling PostgreSQL"
|
+@echo "Compiling PostgreSQL"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 install
|
$(MAKE) -C tmp_install/build MAKELEVEL=0 install
|
||||||
+@echo "Compiling contrib/neon"
|
+@echo "Compiling contrib/neon"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon install
|
$(MAKE) -C tmp_install/build/contrib/neon install
|
||||||
+@echo "Compiling contrib/neon_test_utils"
|
+@echo "Compiling contrib/neon_test_utils"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/neon_test_utils install
|
$(MAKE) -C tmp_install/build/contrib/neon_test_utils install
|
||||||
+@echo "Compiling pg_buffercache"
|
+@echo "Compiling pg_buffercache"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pg_buffercache install
|
$(MAKE) -C tmp_install/build/contrib/pg_buffercache install
|
||||||
+@echo "Compiling pageinspect"
|
+@echo "Compiling pageinspect"
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/contrib/pageinspect install
|
$(MAKE) -C tmp_install/build/contrib/pageinspect install
|
||||||
|
|
||||||
|
|
||||||
.PHONY: postgres-clean
|
.PHONY: postgres-clean
|
||||||
postgres-clean:
|
postgres-clean:
|
||||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build MAKELEVEL=0 clean
|
$(MAKE) -C tmp_install/build MAKELEVEL=0 clean
|
||||||
|
|
||||||
# This doesn't remove the effects of 'configure'.
|
# This doesn't remove the effects of 'configure'.
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
cd $(POSTGRES_INSTALL_DIR)/build && $(MAKE) clean
|
cd tmp_install/build && $(MAKE) clean
|
||||||
$(CARGO_CMD_PREFIX) cargo clean
|
$(CARGO_CMD_PREFIX) cargo clean
|
||||||
|
|
||||||
# This removes everything
|
# This removes everything
|
||||||
.PHONY: distclean
|
.PHONY: distclean
|
||||||
distclean:
|
distclean:
|
||||||
rm -rf $(POSTGRES_INSTALL_DIR)
|
rm -rf tmp_install
|
||||||
$(CARGO_CMD_PREFIX) cargo clean
|
$(CARGO_CMD_PREFIX) cargo clean
|
||||||
|
|
||||||
.PHONY: fmt
|
.PHONY: fmt
|
||||||
@@ -117,4 +112,4 @@ fmt:
|
|||||||
|
|
||||||
.PHONY: setup-pre-commit-hook
|
.PHONY: setup-pre-commit-hook
|
||||||
setup-pre-commit-hook:
|
setup-pre-commit-hook:
|
||||||
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
|
ln -s -f ../../pre-commit.py .git/hooks/pre-commit
|
||||||
|
|||||||
@@ -295,7 +295,7 @@ impl ComputeNode {
|
|||||||
handle_roles(&self.spec, &mut client)?;
|
handle_roles(&self.spec, &mut client)?;
|
||||||
handle_databases(&self.spec, &mut client)?;
|
handle_databases(&self.spec, &mut client)?;
|
||||||
handle_role_deletions(self, &mut client)?;
|
handle_role_deletions(self, &mut client)?;
|
||||||
handle_grants(self, &mut client)?;
|
handle_grants(&self.spec, &mut client)?;
|
||||||
create_writablity_check_data(&mut client)?;
|
create_writablity_check_data(&mut client)?;
|
||||||
|
|
||||||
// 'Close' connection
|
// 'Close' connection
|
||||||
|
|||||||
@@ -248,20 +248,18 @@ pub fn wait_for_postgres(pg: &mut Child, port: &str, pgdata: &Path) -> Result<()
|
|||||||
bail!("Postgres exited unexpectedly with code {}", code);
|
bail!("Postgres exited unexpectedly with code {}", code);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that we can open pid file first.
|
if pid_path.exists() {
|
||||||
if let Ok(file) = File::open(&pid_path) {
|
let file = BufReader::new(File::open(&pid_path)?);
|
||||||
let file = BufReader::new(file);
|
let status = file
|
||||||
let last_line = file.lines().last();
|
.lines()
|
||||||
|
.last()
|
||||||
|
.unwrap()
|
||||||
|
.unwrap_or_else(|_| "unknown".to_string());
|
||||||
|
let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
|
||||||
|
|
||||||
// Pid file could be there and we could read it, but it could be empty, for example.
|
// Now Postgres is ready to accept connections
|
||||||
if let Some(Ok(line)) = last_line {
|
if status.trim() == "ready" && can_connect {
|
||||||
let status = line.trim();
|
break;
|
||||||
let can_connect = TcpStream::connect_timeout(&addr, timeout).is_ok();
|
|
||||||
|
|
||||||
// Now Postgres is ready to accept connections
|
|
||||||
if status == "ready" && can_connect {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::{anyhow, Result};
|
use anyhow::Result;
|
||||||
use log::{info, log_enabled, warn, Level};
|
use log::{info, log_enabled, warn, Level};
|
||||||
use postgres::error::SqlState;
|
|
||||||
use postgres::{Client, NoTls};
|
use postgres::{Client, NoTls};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
|
||||||
@@ -350,11 +349,9 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
|
// Grant CREATE ON DATABASE to the database owner
|
||||||
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
|
// to allow clients create trusted extensions.
|
||||||
pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
pub fn handle_grants(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||||
let spec = &node.spec;
|
|
||||||
|
|
||||||
info!("cluster spec grants:");
|
info!("cluster spec grants:");
|
||||||
|
|
||||||
// We now have a separate `web_access` role to connect to the database
|
// We now have a separate `web_access` role to connect to the database
|
||||||
@@ -383,33 +380,5 @@ pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
|
|||||||
client.execute(query.as_str(), &[])?;
|
client.execute(query.as_str(), &[])?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do some per-database access adjustments. We'd better do this at db creation time,
|
|
||||||
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
|
|
||||||
// atomically.
|
|
||||||
let mut db_connstr = node.connstr.clone();
|
|
||||||
for db in &node.spec.cluster.databases {
|
|
||||||
// database name is always the last and the only component of the path
|
|
||||||
db_connstr.set_path(&db.name);
|
|
||||||
|
|
||||||
let mut db_client = Client::connect(db_connstr.as_str(), NoTls)?;
|
|
||||||
|
|
||||||
// This will only change ownership on the schema itself, not the objects
|
|
||||||
// inside it. Without it owner of the `public` schema will be `cloud_admin`
|
|
||||||
// and database owner cannot do anything with it.
|
|
||||||
let alter_query = format!("ALTER SCHEMA public OWNER TO {}", db.owner.quote());
|
|
||||||
let res = db_client.simple_query(&alter_query);
|
|
||||||
|
|
||||||
if let Err(e) = res {
|
|
||||||
if e.code() == Some(&SqlState::INVALID_SCHEMA_NAME) {
|
|
||||||
// This is OK, db just don't have a `public` schema.
|
|
||||||
// Probably user dropped it manually.
|
|
||||||
info!("no 'public' schema found in the database {}", db.name);
|
|
||||||
} else {
|
|
||||||
// Something different happened, propagate the error
|
|
||||||
return Err(anyhow!(e));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
|||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
env_logger = "0.9"
|
env_logger = "0.9"
|
||||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||||
wal_craft = { path = "wal_craft" }
|
wal_generate = { path = "wal_generate" }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
bindgen = "0.59.1"
|
bindgen = "0.59.1"
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ extern crate bindgen;
|
|||||||
|
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::process::Command;
|
|
||||||
|
|
||||||
use bindgen::callbacks::ParseCallbacks;
|
use bindgen::callbacks::ParseCallbacks;
|
||||||
|
|
||||||
@@ -46,43 +45,6 @@ fn main() {
|
|||||||
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
||||||
println!("cargo:rerun-if-changed=pg_control_ffi.h");
|
println!("cargo:rerun-if-changed=pg_control_ffi.h");
|
||||||
|
|
||||||
// Finding the location of C headers for the Postgres server:
|
|
||||||
// - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/tmp_install`
|
|
||||||
// - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/tmp_install/include/postgresql/server`
|
|
||||||
let mut pg_install_dir: PathBuf;
|
|
||||||
if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR") {
|
|
||||||
pg_install_dir = postgres_install_dir.into();
|
|
||||||
} else {
|
|
||||||
pg_install_dir = PathBuf::from("tmp_install")
|
|
||||||
}
|
|
||||||
|
|
||||||
if pg_install_dir.is_relative() {
|
|
||||||
let cwd = env::current_dir().unwrap();
|
|
||||||
pg_install_dir = cwd.join("..").join("..").join(pg_install_dir);
|
|
||||||
}
|
|
||||||
|
|
||||||
let pg_config_bin = pg_install_dir.join("bin").join("pg_config");
|
|
||||||
let inc_server_path: String = if pg_config_bin.exists() {
|
|
||||||
let output = Command::new(pg_config_bin)
|
|
||||||
.arg("--includedir-server")
|
|
||||||
.output()
|
|
||||||
.expect("failed to execute `pg_config --includedir-server`");
|
|
||||||
|
|
||||||
if !output.status.success() {
|
|
||||||
panic!("`pg_config --includedir-server` failed")
|
|
||||||
}
|
|
||||||
|
|
||||||
String::from_utf8(output.stdout).unwrap().trim_end().into()
|
|
||||||
} else {
|
|
||||||
pg_install_dir
|
|
||||||
.join("include")
|
|
||||||
.join("postgresql")
|
|
||||||
.join("server")
|
|
||||||
.into_os_string()
|
|
||||||
.into_string()
|
|
||||||
.unwrap()
|
|
||||||
};
|
|
||||||
|
|
||||||
// The bindgen::Builder is the main entry point
|
// The bindgen::Builder is the main entry point
|
||||||
// to bindgen, and lets you build up options for
|
// to bindgen, and lets you build up options for
|
||||||
// the resulting bindings.
|
// the resulting bindings.
|
||||||
@@ -119,7 +81,15 @@ fn main() {
|
|||||||
// explicit padding fields.
|
// explicit padding fields.
|
||||||
.explicit_padding(true)
|
.explicit_padding(true)
|
||||||
//
|
//
|
||||||
.clang_arg(format!("-I{inc_server_path}"))
|
// Path the server include dir. It is in tmp_install/include/server, if you did
|
||||||
|
// "configure --prefix=<path to tmp_install>". But if you used "configure --prefix=/",
|
||||||
|
// and used DESTDIR to move it into tmp_install, then it's in
|
||||||
|
// tmp_install/include/postgres/server
|
||||||
|
// 'pg_config --includedir-server' would perhaps be the more proper way to find it,
|
||||||
|
// but this will do for now.
|
||||||
|
//
|
||||||
|
.clang_arg("-I../../tmp_install/include/server")
|
||||||
|
.clang_arg("-I../../tmp_install/include/postgresql/server")
|
||||||
//
|
//
|
||||||
// Finish the builder and generate the bindings.
|
// Finish the builder and generate the bindings.
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -82,17 +82,7 @@ impl WalStreamDecoder {
|
|||||||
// that cross page boundaries.
|
// that cross page boundaries.
|
||||||
loop {
|
loop {
|
||||||
// parse and verify page boundaries as we go
|
// parse and verify page boundaries as we go
|
||||||
if self.padlen > 0 {
|
if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
|
||||||
// We should first skip padding, as we may have to skip some page headers if we're processing the XLOG_SWITCH record.
|
|
||||||
if self.inputbuf.remaining() < self.padlen as usize {
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
|
|
||||||
// skip padding
|
|
||||||
self.inputbuf.advance(self.padlen as usize);
|
|
||||||
self.lsn += self.padlen as u64;
|
|
||||||
self.padlen = 0;
|
|
||||||
} else if self.lsn.segment_offset(pg_constants::WAL_SEGMENT_SIZE) == 0 {
|
|
||||||
// parse long header
|
// parse long header
|
||||||
|
|
||||||
if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
|
if self.inputbuf.remaining() < XLOG_SIZE_OF_XLOG_LONG_PHD {
|
||||||
@@ -138,6 +128,15 @@ impl WalStreamDecoder {
|
|||||||
|
|
||||||
self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
|
self.lsn += XLOG_SIZE_OF_XLOG_SHORT_PHD as u64;
|
||||||
continue;
|
continue;
|
||||||
|
} else if self.padlen > 0 {
|
||||||
|
if self.inputbuf.remaining() < self.padlen as usize {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// skip padding
|
||||||
|
self.inputbuf.advance(self.padlen as usize);
|
||||||
|
self.lsn += self.padlen as u64;
|
||||||
|
self.padlen = 0;
|
||||||
} else if self.contlen == 0 {
|
} else if self.contlen == 0 {
|
||||||
assert!(self.recordbuf.is_empty());
|
assert!(self.recordbuf.is_empty());
|
||||||
|
|
||||||
@@ -227,10 +226,10 @@ impl WalStreamDecoder {
|
|||||||
self.padlen = self.lsn.calc_padding(8u32) as u32;
|
self.padlen = self.lsn.calc_padding(8u32) as u32;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We should return LSN of the next record, not the last byte of this record or
|
// Always align resulting LSN on 0x8 boundary -- that is important for getPage()
|
||||||
// the byte immediately after. Note that this handles both XLOG_SWITCH and usual
|
// and WalReceiver integration. Since this code is used both for WalReceiver and
|
||||||
// records, the former "spans" until the next WAL segment (see test_xlog_switch).
|
// initial WAL import let's force alignment right here.
|
||||||
let result = (self.lsn + self.padlen as u64, recordbuf);
|
let result = (self.lsn.align(), recordbuf);
|
||||||
Ok(Some(result))
|
Ok(Some(result))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -597,18 +597,19 @@ mod tests {
|
|||||||
fn init_logging() {
|
fn init_logging() {
|
||||||
let _ = env_logger::Builder::from_env(
|
let _ = env_logger::Builder::from_env(
|
||||||
env_logger::Env::default()
|
env_logger::Env::default()
|
||||||
.default_filter_or("wal_craft=info,postgres_ffi::xlog_utils=trace"),
|
.default_filter_or("wal_generate=info,postgres_ffi::xlog_utils=trace"),
|
||||||
)
|
)
|
||||||
.is_test(true)
|
.is_test(true)
|
||||||
.try_init();
|
.try_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn test_end_of_wal<C: wal_craft::Crafter>(
|
fn test_end_of_wal(
|
||||||
test_name: &str,
|
test_name: &str,
|
||||||
|
generate_wal: impl Fn(&mut postgres::Client) -> anyhow::Result<postgres::types::PgLsn>,
|
||||||
expected_end_of_wal_non_partial: Lsn,
|
expected_end_of_wal_non_partial: Lsn,
|
||||||
last_segment: &str,
|
last_segment: &str,
|
||||||
) {
|
) {
|
||||||
use wal_craft::*;
|
use wal_generate::*;
|
||||||
// 1. Generate some WAL
|
// 1. Generate some WAL
|
||||||
let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||||
.join("..")
|
.join("..")
|
||||||
@@ -621,9 +622,9 @@ mod tests {
|
|||||||
fs::remove_dir_all(&cfg.datadir).unwrap();
|
fs::remove_dir_all(&cfg.datadir).unwrap();
|
||||||
}
|
}
|
||||||
cfg.initdb().unwrap();
|
cfg.initdb().unwrap();
|
||||||
let srv = cfg.start_server().unwrap();
|
let mut srv = cfg.start_server().unwrap();
|
||||||
let expected_wal_end: Lsn =
|
let expected_wal_end: Lsn =
|
||||||
u64::from(C::craft(&mut srv.connect_with_timeout().unwrap()).unwrap()).into();
|
u64::from(generate_wal(&mut srv.connect_with_timeout().unwrap()).unwrap()).into();
|
||||||
srv.kill();
|
srv.kill();
|
||||||
|
|
||||||
// 2. Pick WAL generated by initdb
|
// 2. Pick WAL generated by initdb
|
||||||
@@ -680,8 +681,9 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
pub fn test_find_end_of_wal_simple() {
|
pub fn test_find_end_of_wal_simple() {
|
||||||
init_logging();
|
init_logging();
|
||||||
test_end_of_wal::<wal_craft::Simple>(
|
test_end_of_wal(
|
||||||
"test_find_end_of_wal_simple",
|
"test_find_end_of_wal_simple",
|
||||||
|
wal_generate::generate_simple,
|
||||||
"0/2000000".parse::<Lsn>().unwrap(),
|
"0/2000000".parse::<Lsn>().unwrap(),
|
||||||
"000000010000000000000001",
|
"000000010000000000000001",
|
||||||
);
|
);
|
||||||
@@ -690,8 +692,9 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
pub fn test_find_end_of_wal_crossing_segment_followed_by_small_one() {
|
pub fn test_find_end_of_wal_crossing_segment_followed_by_small_one() {
|
||||||
init_logging();
|
init_logging();
|
||||||
test_end_of_wal::<wal_craft::WalRecordCrossingSegmentFollowedBySmallOne>(
|
test_end_of_wal(
|
||||||
"test_find_end_of_wal_crossing_segment_followed_by_small_one",
|
"test_find_end_of_wal_crossing_segment_followed_by_small_one",
|
||||||
|
wal_generate::generate_wal_record_crossing_segment_followed_by_small_one,
|
||||||
"0/3000000".parse::<Lsn>().unwrap(),
|
"0/3000000".parse::<Lsn>().unwrap(),
|
||||||
"000000010000000000000002",
|
"000000010000000000000002",
|
||||||
);
|
);
|
||||||
@@ -701,8 +704,9 @@ mod tests {
|
|||||||
#[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
|
#[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
|
||||||
pub fn test_find_end_of_wal_last_crossing_segment() {
|
pub fn test_find_end_of_wal_last_crossing_segment() {
|
||||||
init_logging();
|
init_logging();
|
||||||
test_end_of_wal::<wal_craft::LastWalRecordCrossingSegment>(
|
test_end_of_wal(
|
||||||
"test_find_end_of_wal_last_crossing_segment",
|
"test_find_end_of_wal_last_crossing_segment",
|
||||||
|
wal_generate::generate_last_wal_record_crossing_segment,
|
||||||
"0/3000000".parse::<Lsn>().unwrap(),
|
"0/3000000".parse::<Lsn>().unwrap(),
|
||||||
"000000010000000000000002",
|
"000000010000000000000002",
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -1,100 +0,0 @@
|
|||||||
use anyhow::*;
|
|
||||||
use clap::{App, Arg, ArgMatches};
|
|
||||||
use std::str::FromStr;
|
|
||||||
use wal_craft::*;
|
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
|
||||||
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("wal_craft=info"))
|
|
||||||
.init();
|
|
||||||
let type_arg = &Arg::new("type")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Type of WAL to craft")
|
|
||||||
.possible_values([
|
|
||||||
Simple::NAME,
|
|
||||||
LastWalRecordXlogSwitch::NAME,
|
|
||||||
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME,
|
|
||||||
WalRecordCrossingSegmentFollowedBySmallOne::NAME,
|
|
||||||
LastWalRecordCrossingSegment::NAME,
|
|
||||||
])
|
|
||||||
.required(true);
|
|
||||||
let arg_matches = App::new("Postgres WAL crafter")
|
|
||||||
.about("Crafts Postgres databases with specific WAL properties")
|
|
||||||
.subcommand(
|
|
||||||
App::new("print-postgres-config")
|
|
||||||
.about("Print the configuration required for PostgreSQL server before running this script")
|
|
||||||
)
|
|
||||||
.subcommand(
|
|
||||||
App::new("with-initdb")
|
|
||||||
.about("Craft WAL in a new data directory first initialized with initdb")
|
|
||||||
.arg(type_arg)
|
|
||||||
.arg(
|
|
||||||
Arg::new("datadir")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Data directory for the Postgres server")
|
|
||||||
.required(true)
|
|
||||||
)
|
|
||||||
.arg(
|
|
||||||
Arg::new("pg-distrib-dir")
|
|
||||||
.long("pg-distrib-dir")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
|
|
||||||
.default_value("/usr/local")
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.subcommand(
|
|
||||||
App::new("in-existing")
|
|
||||||
.about("Craft WAL at an existing recently created Postgres database. Note that server may append new WAL entries on shutdown.")
|
|
||||||
.arg(type_arg)
|
|
||||||
.arg(
|
|
||||||
Arg::new("connection")
|
|
||||||
.takes_value(true)
|
|
||||||
.help("Connection string to the Postgres database to populate")
|
|
||||||
.required(true)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.get_matches();
|
|
||||||
|
|
||||||
let wal_craft = |arg_matches: &ArgMatches, client| {
|
|
||||||
let lsn = match arg_matches.value_of("type").unwrap() {
|
|
||||||
Simple::NAME => Simple::craft(client)?,
|
|
||||||
LastWalRecordXlogSwitch::NAME => LastWalRecordXlogSwitch::craft(client)?,
|
|
||||||
LastWalRecordXlogSwitchEndsOnPageBoundary::NAME => {
|
|
||||||
LastWalRecordXlogSwitchEndsOnPageBoundary::craft(client)?
|
|
||||||
}
|
|
||||||
WalRecordCrossingSegmentFollowedBySmallOne::NAME => {
|
|
||||||
WalRecordCrossingSegmentFollowedBySmallOne::craft(client)?
|
|
||||||
}
|
|
||||||
LastWalRecordCrossingSegment::NAME => LastWalRecordCrossingSegment::craft(client)?,
|
|
||||||
a => panic!("Unknown --type argument: {}", a),
|
|
||||||
};
|
|
||||||
println!("end_of_wal = {}", lsn);
|
|
||||||
Ok(())
|
|
||||||
};
|
|
||||||
|
|
||||||
match arg_matches.subcommand() {
|
|
||||||
None => panic!("No subcommand provided"),
|
|
||||||
Some(("print-postgres-config", _)) => {
|
|
||||||
for cfg in REQUIRED_POSTGRES_CONFIG.iter() {
|
|
||||||
println!("{}", cfg);
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
Some(("with-initdb", arg_matches)) => {
|
|
||||||
let cfg = Conf {
|
|
||||||
pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
|
|
||||||
datadir: arg_matches.value_of("datadir").unwrap().into(),
|
|
||||||
};
|
|
||||||
cfg.initdb()?;
|
|
||||||
let srv = cfg.start_server()?;
|
|
||||||
wal_craft(arg_matches, &mut srv.connect_with_timeout()?)?;
|
|
||||||
srv.kill();
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
Some(("in-existing", arg_matches)) => wal_craft(
|
|
||||||
arg_matches,
|
|
||||||
&mut postgres::Config::from_str(arg_matches.value_of("connection").unwrap())?
|
|
||||||
.connect(postgres::NoTls)?,
|
|
||||||
),
|
|
||||||
Some(_) => panic!("Unknown subcommand"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "wal_craft"
|
name = "wal_generate"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
@@ -10,7 +10,5 @@ anyhow = "1.0"
|
|||||||
clap = "3.0"
|
clap = "3.0"
|
||||||
env_logger = "0.9"
|
env_logger = "0.9"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
once_cell = "1.8.0"
|
|
||||||
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
postgres = { git = "https://github.com/zenithdb/rust-postgres.git", rev="d052ee8b86fff9897c77b0fe89ea9daba0e1fa38" }
|
||||||
postgres_ffi = { path = "../" }
|
|
||||||
tempfile = "3.2"
|
tempfile = "3.2"
|
||||||
58
libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
Normal file
58
libs/postgres_ffi/wal_generate/src/bin/wal_generate.rs
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
use anyhow::*;
|
||||||
|
use clap::{App, Arg};
|
||||||
|
use wal_generate::*;
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
env_logger::Builder::from_env(
|
||||||
|
env_logger::Env::default().default_filter_or("wal_generate=info"),
|
||||||
|
)
|
||||||
|
.init();
|
||||||
|
let arg_matches = App::new("Postgres WAL generator")
|
||||||
|
.about("Generates Postgres databases with specific WAL properties")
|
||||||
|
.arg(
|
||||||
|
Arg::new("datadir")
|
||||||
|
.short('D')
|
||||||
|
.long("datadir")
|
||||||
|
.takes_value(true)
|
||||||
|
.help("Data directory for the Postgres server")
|
||||||
|
.required(true)
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::new("pg-distrib-dir")
|
||||||
|
.long("pg-distrib-dir")
|
||||||
|
.takes_value(true)
|
||||||
|
.help("Directory with Postgres distribution (bin and lib directories, e.g. tmp_install)")
|
||||||
|
.default_value("/usr/local")
|
||||||
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::new("type")
|
||||||
|
.long("type")
|
||||||
|
.takes_value(true)
|
||||||
|
.help("Type of WAL to generate")
|
||||||
|
.possible_values(["simple", "last_wal_record_crossing_segment", "wal_record_crossing_segment_followed_by_small_one"])
|
||||||
|
.required(true)
|
||||||
|
)
|
||||||
|
.get_matches();
|
||||||
|
|
||||||
|
let cfg = Conf {
|
||||||
|
pg_distrib_dir: arg_matches.value_of("pg-distrib-dir").unwrap().into(),
|
||||||
|
datadir: arg_matches.value_of("datadir").unwrap().into(),
|
||||||
|
};
|
||||||
|
cfg.initdb()?;
|
||||||
|
let mut srv = cfg.start_server()?;
|
||||||
|
let lsn = match arg_matches.value_of("type").unwrap() {
|
||||||
|
"simple" => generate_simple(&mut srv.connect_with_timeout()?)?,
|
||||||
|
"last_wal_record_crossing_segment" => {
|
||||||
|
generate_last_wal_record_crossing_segment(&mut srv.connect_with_timeout()?)?
|
||||||
|
}
|
||||||
|
"wal_record_crossing_segment_followed_by_small_one" => {
|
||||||
|
generate_wal_record_crossing_segment_followed_by_small_one(
|
||||||
|
&mut srv.connect_with_timeout()?,
|
||||||
|
)?
|
||||||
|
}
|
||||||
|
a => panic!("Unknown --type argument: {}", a),
|
||||||
|
};
|
||||||
|
println!("end_of_wal = {}", lsn);
|
||||||
|
srv.kill();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
@@ -1,12 +1,8 @@
|
|||||||
use anyhow::*;
|
use anyhow::*;
|
||||||
use core::time::Duration;
|
use core::time::Duration;
|
||||||
use log::*;
|
use log::*;
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
use postgres::types::PgLsn;
|
use postgres::types::PgLsn;
|
||||||
use postgres::Client;
|
use postgres::Client;
|
||||||
use postgres_ffi::xlog_utils::{
|
|
||||||
XLOG_BLCKSZ, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
|
|
||||||
};
|
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
@@ -26,16 +22,6 @@ pub struct PostgresServer {
|
|||||||
client_config: postgres::Config,
|
client_config: postgres::Config,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
|
|
||||||
vec![
|
|
||||||
"wal_keep_size=50MB", // Ensure old WAL is not removed
|
|
||||||
"shared_preload_libraries=neon", // can only be loaded at startup
|
|
||||||
// Disable background processes as much as possible
|
|
||||||
"wal_writer_delay=10s",
|
|
||||||
"autovacuum=off",
|
|
||||||
]
|
|
||||||
});
|
|
||||||
|
|
||||||
impl Conf {
|
impl Conf {
|
||||||
fn pg_bin_dir(&self) -> PathBuf {
|
fn pg_bin_dir(&self) -> PathBuf {
|
||||||
self.pg_distrib_dir.join("bin")
|
self.pg_distrib_dir.join("bin")
|
||||||
@@ -99,8 +85,12 @@ impl Conf {
|
|||||||
.arg(unix_socket_dir_path.as_os_str())
|
.arg(unix_socket_dir_path.as_os_str())
|
||||||
.arg("-D")
|
.arg("-D")
|
||||||
.arg(self.datadir.as_os_str())
|
.arg(self.datadir.as_os_str())
|
||||||
|
.args(&["-c", "wal_keep_size=50MB"]) // Ensure old WAL is not removed
|
||||||
.args(&["-c", "logging_collector=on"]) // stderr will mess up with tests output
|
.args(&["-c", "logging_collector=on"]) // stderr will mess up with tests output
|
||||||
.args(REQUIRED_POSTGRES_CONFIG.iter().flat_map(|cfg| ["-c", cfg]))
|
.args(&["-c", "shared_preload_libraries=neon"]) // can only be loaded at startup
|
||||||
|
// Disable background processes as much as possible
|
||||||
|
.args(&["-c", "wal_writer_delay=10s"])
|
||||||
|
.args(&["-c", "autovacuum=off"])
|
||||||
.stderr(Stdio::from(log_file))
|
.stderr(Stdio::from(log_file))
|
||||||
.spawn()?;
|
.spawn()?;
|
||||||
let server = PostgresServer {
|
let server = PostgresServer {
|
||||||
@@ -154,7 +144,7 @@ impl PostgresServer {
|
|||||||
bail!("Connection timed out");
|
bail!("Connection timed out");
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn kill(mut self) {
|
pub fn kill(&mut self) {
|
||||||
self.process.kill().unwrap();
|
self.process.kill().unwrap();
|
||||||
self.process.wait().unwrap();
|
self.process.wait().unwrap();
|
||||||
}
|
}
|
||||||
@@ -191,16 +181,12 @@ pub trait PostgresClientExt: postgres::GenericClient {
|
|||||||
|
|
||||||
impl<C: postgres::GenericClient> PostgresClientExt for C {}
|
impl<C: postgres::GenericClient> PostgresClientExt for C {}
|
||||||
|
|
||||||
pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result<()> {
|
fn generate_internal<C: postgres::GenericClient>(
|
||||||
|
client: &mut C,
|
||||||
|
f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
|
||||||
|
) -> Result<PgLsn> {
|
||||||
client.execute("create extension if not exists neon_test_utils", &[])?;
|
client.execute("create extension if not exists neon_test_utils", &[])?;
|
||||||
|
|
||||||
let wal_keep_size: String = client.query_one("SHOW wal_keep_size", &[])?.get(0);
|
|
||||||
ensure!(wal_keep_size == "50MB");
|
|
||||||
let wal_writer_delay: String = client.query_one("SHOW wal_writer_delay", &[])?.get(0);
|
|
||||||
ensure!(wal_writer_delay == "10s");
|
|
||||||
let autovacuum: String = client.query_one("SHOW autovacuum", &[])?.get(0);
|
|
||||||
ensure!(autovacuum == "off");
|
|
||||||
|
|
||||||
let wal_segment_size = client.query_one(
|
let wal_segment_size = client.query_one(
|
||||||
"select cast(setting as bigint) as setting, unit \
|
"select cast(setting as bigint) as setting, unit \
|
||||||
from pg_settings where name = 'wal_segment_size'",
|
from pg_settings where name = 'wal_segment_size'",
|
||||||
@@ -215,29 +201,13 @@ pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result
|
|||||||
"Unexpected wal_segment_size in bytes"
|
"Unexpected wal_segment_size in bytes"
|
||||||
);
|
);
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait Crafter {
|
|
||||||
const NAME: &'static str;
|
|
||||||
|
|
||||||
/// Generates WAL using the client `client`. Returns the expected end-of-wal LSN.
|
|
||||||
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn>;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn craft_internal<C: postgres::GenericClient>(
|
|
||||||
client: &mut C,
|
|
||||||
f: impl Fn(&mut C, PgLsn) -> Result<Option<PgLsn>>,
|
|
||||||
) -> Result<PgLsn> {
|
|
||||||
ensure_server_config(client)?;
|
|
||||||
|
|
||||||
let initial_lsn = client.pg_current_wal_insert_lsn()?;
|
let initial_lsn = client.pg_current_wal_insert_lsn()?;
|
||||||
info!("LSN initial = {}", initial_lsn);
|
info!("LSN initial = {}", initial_lsn);
|
||||||
|
|
||||||
let last_lsn = match f(client, initial_lsn)? {
|
let last_lsn = match f(client, initial_lsn)? {
|
||||||
None => client.pg_current_wal_insert_lsn()?,
|
None => client.pg_current_wal_insert_lsn()?,
|
||||||
Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
|
Some(last_lsn) => match last_lsn.cmp(&client.pg_current_wal_insert_lsn()?) {
|
||||||
Ordering::Less => bail!("Some records were inserted after the crafted WAL"),
|
Ordering::Less => bail!("Some records were inserted after the generated WAL"),
|
||||||
Ordering::Equal => last_lsn,
|
Ordering::Equal => last_lsn,
|
||||||
Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
|
Ordering::Greater => bail!("Reported LSN is greater than insert_lsn"),
|
||||||
},
|
},
|
||||||
@@ -246,116 +216,25 @@ fn craft_internal<C: postgres::GenericClient>(
|
|||||||
// Some records may be not flushed, e.g. non-transactional logical messages.
|
// Some records may be not flushed, e.g. non-transactional logical messages.
|
||||||
client.execute("select neon_xlogflush(pg_current_wal_insert_lsn())", &[])?;
|
client.execute("select neon_xlogflush(pg_current_wal_insert_lsn())", &[])?;
|
||||||
match last_lsn.cmp(&client.pg_current_wal_flush_lsn()?) {
|
match last_lsn.cmp(&client.pg_current_wal_flush_lsn()?) {
|
||||||
Ordering::Less => bail!("Some records were flushed after the crafted WAL"),
|
Ordering::Less => bail!("Some records were flushed after the generated WAL"),
|
||||||
Ordering::Equal => {}
|
Ordering::Equal => {}
|
||||||
Ordering::Greater => bail!("Reported LSN is greater than flush_lsn"),
|
Ordering::Greater => bail!("Reported LSN is greater than flush_lsn"),
|
||||||
}
|
}
|
||||||
Ok(last_lsn)
|
Ok(last_lsn)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Simple;
|
pub fn generate_simple(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
|
||||||
impl Crafter for Simple {
|
generate_internal(client, |client, _| {
|
||||||
const NAME: &'static str = "simple";
|
|
||||||
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
|
|
||||||
craft_internal(client, |client, _| {
|
|
||||||
client.execute("CREATE table t(x int)", &[])?;
|
|
||||||
Ok(None)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct LastWalRecordXlogSwitch;
|
|
||||||
impl Crafter for LastWalRecordXlogSwitch {
|
|
||||||
const NAME: &'static str = "last_wal_record_xlog_switch";
|
|
||||||
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
|
|
||||||
// Do not use generate_internal because here we end up with flush_lsn exactly on
|
|
||||||
// the segment boundary and insert_lsn after the initial page header, which is unusual.
|
|
||||||
ensure_server_config(client)?;
|
|
||||||
|
|
||||||
client.execute("CREATE table t(x int)", &[])?;
|
client.execute("CREATE table t(x int)", &[])?;
|
||||||
let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
|
Ok(None)
|
||||||
let next_segment = PgLsn::from(0x0200_0000);
|
})
|
||||||
ensure!(
|
|
||||||
after_xlog_switch <= next_segment,
|
|
||||||
"XLOG_SWITCH message ended after the expected segment boundary: {} > {}",
|
|
||||||
after_xlog_switch,
|
|
||||||
next_segment
|
|
||||||
);
|
|
||||||
Ok(next_segment)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct LastWalRecordXlogSwitchEndsOnPageBoundary;
|
fn generate_single_logical_message(
|
||||||
impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
|
|
||||||
const NAME: &'static str = "last_wal_record_xlog_switch_ends_on_page_boundary";
|
|
||||||
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
|
|
||||||
// Do not use generate_internal because here we end up with flush_lsn exactly on
|
|
||||||
// the segment boundary and insert_lsn after the initial page header, which is unusual.
|
|
||||||
ensure_server_config(client)?;
|
|
||||||
|
|
||||||
client.execute("CREATE table t(x int)", &[])?;
|
|
||||||
|
|
||||||
// Add padding so the XLOG_SWITCH record ends exactly on XLOG_BLCKSZ boundary.
|
|
||||||
// We will use logical message as the padding. We start with detecting how much WAL
|
|
||||||
// it takes for one logical message, considering all alignments and headers.
|
|
||||||
let base_wal_advance = {
|
|
||||||
let before_lsn = client.pg_current_wal_insert_lsn()?;
|
|
||||||
// Small non-empty message bigger than few bytes is more likely than an empty
|
|
||||||
// message to have the same format as the big padding message.
|
|
||||||
client.execute(
|
|
||||||
"SELECT pg_logical_emit_message(false, 'swch', REPEAT('a', 10))",
|
|
||||||
&[],
|
|
||||||
)?;
|
|
||||||
// The XLOG_SWITCH record has no data => its size is exactly XLOG_SIZE_OF_XLOG_RECORD.
|
|
||||||
(u64::from(client.pg_current_wal_insert_lsn()?) - u64::from(before_lsn)) as usize
|
|
||||||
+ XLOG_SIZE_OF_XLOG_RECORD
|
|
||||||
};
|
|
||||||
let mut remaining_lsn =
|
|
||||||
XLOG_BLCKSZ - u64::from(client.pg_current_wal_insert_lsn()?) as usize % XLOG_BLCKSZ;
|
|
||||||
if remaining_lsn < base_wal_advance {
|
|
||||||
remaining_lsn += XLOG_BLCKSZ;
|
|
||||||
}
|
|
||||||
let repeats = 10 + remaining_lsn - base_wal_advance;
|
|
||||||
info!(
|
|
||||||
"current_wal_insert_lsn={}, remaining_lsn={}, base_wal_advance={}, repeats={}",
|
|
||||||
client.pg_current_wal_insert_lsn()?,
|
|
||||||
remaining_lsn,
|
|
||||||
base_wal_advance,
|
|
||||||
repeats
|
|
||||||
);
|
|
||||||
client.execute(
|
|
||||||
"SELECT pg_logical_emit_message(false, 'swch', REPEAT('a', $1))",
|
|
||||||
&[&(repeats as i32)],
|
|
||||||
)?;
|
|
||||||
info!(
|
|
||||||
"current_wal_insert_lsn={}, XLOG_SIZE_OF_XLOG_RECORD={}",
|
|
||||||
client.pg_current_wal_insert_lsn()?,
|
|
||||||
XLOG_SIZE_OF_XLOG_RECORD
|
|
||||||
);
|
|
||||||
|
|
||||||
// Emit the XLOG_SWITCH
|
|
||||||
let after_xlog_switch: PgLsn = client.query_one("SELECT pg_switch_wal()", &[])?.get(0);
|
|
||||||
let next_segment = PgLsn::from(0x0200_0000);
|
|
||||||
ensure!(
|
|
||||||
after_xlog_switch < next_segment,
|
|
||||||
"XLOG_SWITCH message ended on or after the expected segment boundary: {} > {}",
|
|
||||||
after_xlog_switch,
|
|
||||||
next_segment
|
|
||||||
);
|
|
||||||
ensure!(
|
|
||||||
u64::from(after_xlog_switch) as usize % XLOG_BLCKSZ == XLOG_SIZE_OF_XLOG_SHORT_PHD,
|
|
||||||
"XLOG_SWITCH message ended not on page boundary: {}",
|
|
||||||
after_xlog_switch
|
|
||||||
);
|
|
||||||
Ok(next_segment)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn craft_single_logical_message(
|
|
||||||
client: &mut impl postgres::GenericClient,
|
client: &mut impl postgres::GenericClient,
|
||||||
transactional: bool,
|
transactional: bool,
|
||||||
) -> Result<PgLsn> {
|
) -> Result<PgLsn> {
|
||||||
craft_internal(client, |client, initial_lsn| {
|
generate_internal(client, |client, initial_lsn| {
|
||||||
ensure!(
|
ensure!(
|
||||||
initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
|
initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
|
||||||
"Initial LSN is too far in the future"
|
"Initial LSN is too far in the future"
|
||||||
@@ -393,18 +272,14 @@ fn craft_single_logical_message(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct WalRecordCrossingSegmentFollowedBySmallOne;
|
pub fn generate_wal_record_crossing_segment_followed_by_small_one(
|
||||||
impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
|
client: &mut impl postgres::GenericClient,
|
||||||
const NAME: &'static str = "wal_record_crossing_segment_followed_by_small_one";
|
) -> Result<PgLsn> {
|
||||||
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
|
generate_single_logical_message(client, true)
|
||||||
craft_single_logical_message(client, true)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct LastWalRecordCrossingSegment;
|
pub fn generate_last_wal_record_crossing_segment<C: postgres::GenericClient>(
|
||||||
impl Crafter for LastWalRecordCrossingSegment {
|
client: &mut C,
|
||||||
const NAME: &'static str = "last_wal_record_crossing_segment";
|
) -> Result<PgLsn> {
|
||||||
fn craft(client: &mut impl postgres::GenericClient) -> Result<PgLsn> {
|
generate_single_logical_message(client, false)
|
||||||
craft_single_logical_message(client, false)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
@@ -42,19 +42,13 @@ pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
|
|||||||
/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
|
/// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
|
||||||
pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
|
pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
|
||||||
|
|
||||||
pub trait RemoteObjectName {
|
|
||||||
// Needed to retrieve last component for RemoteObjectId.
|
|
||||||
// In other words a file name
|
|
||||||
fn object_name(&self) -> Option<&str>;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Storage (potentially remote) API to manage its state.
|
/// Storage (potentially remote) API to manage its state.
|
||||||
/// This storage tries to be unaware of any layered repository context,
|
/// This storage tries to be unaware of any layered repository context,
|
||||||
/// providing basic CRUD operations for storage files.
|
/// providing basic CRUD operations for storage files.
|
||||||
#[async_trait::async_trait]
|
#[async_trait::async_trait]
|
||||||
pub trait RemoteStorage: Send + Sync {
|
pub trait RemoteStorage: Send + Sync {
|
||||||
/// A way to uniquely reference a file in the remote storage.
|
/// A way to uniquely reference a file in the remote storage.
|
||||||
type RemoteObjectId: RemoteObjectName;
|
type RemoteObjectId;
|
||||||
|
|
||||||
/// Attempts to derive the storage path out of the local path, if the latter is correct.
|
/// Attempts to derive the storage path out of the local path, if the latter is correct.
|
||||||
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId>;
|
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<Self::RemoteObjectId>;
|
||||||
@@ -65,12 +59,6 @@ pub trait RemoteStorage: Send + Sync {
|
|||||||
/// Lists all items the storage has right now.
|
/// Lists all items the storage has right now.
|
||||||
async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;
|
async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>>;
|
||||||
|
|
||||||
/// Lists all top level subdirectories for a given prefix
|
|
||||||
async fn list_prefixes(
|
|
||||||
&self,
|
|
||||||
prefix: Option<Self::RemoteObjectId>,
|
|
||||||
) -> anyhow::Result<Vec<Self::RemoteObjectId>>;
|
|
||||||
|
|
||||||
/// Streams the local file contents into remote into the remote storage entry.
|
/// Streams the local file contents into remote into the remote storage entry.
|
||||||
async fn upload(
|
async fn upload(
|
||||||
&self,
|
&self,
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
//! volume is mounted to the local FS.
|
//! volume is mounted to the local FS.
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
borrow::Cow,
|
|
||||||
future::Future,
|
future::Future,
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
pin::Pin,
|
pin::Pin,
|
||||||
@@ -18,16 +17,10 @@ use tokio::{
|
|||||||
};
|
};
|
||||||
use tracing::*;
|
use tracing::*;
|
||||||
|
|
||||||
use crate::{path_with_suffix_extension, Download, DownloadError, RemoteObjectName};
|
use crate::{path_with_suffix_extension, Download, DownloadError};
|
||||||
|
|
||||||
use super::{strip_path_prefix, RemoteStorage, StorageMetadata};
|
use super::{strip_path_prefix, RemoteStorage, StorageMetadata};
|
||||||
|
|
||||||
impl RemoteObjectName for PathBuf {
|
|
||||||
fn object_name(&self) -> Option<&str> {
|
|
||||||
self.file_stem().and_then(|n| n.to_str())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct LocalFs {
|
pub struct LocalFs {
|
||||||
working_directory: PathBuf,
|
working_directory: PathBuf,
|
||||||
storage_root: PathBuf,
|
storage_root: PathBuf,
|
||||||
@@ -108,18 +101,7 @@ impl RemoteStorage for LocalFs {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
|
async fn list(&self) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
|
||||||
get_all_files(&self.storage_root, true).await
|
get_all_files(&self.storage_root).await
|
||||||
}
|
|
||||||
|
|
||||||
async fn list_prefixes(
|
|
||||||
&self,
|
|
||||||
prefix: Option<Self::RemoteObjectId>,
|
|
||||||
) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
|
|
||||||
let path = match prefix {
|
|
||||||
Some(prefix) => Cow::Owned(self.storage_root.join(prefix)),
|
|
||||||
None => Cow::Borrowed(&self.storage_root),
|
|
||||||
};
|
|
||||||
get_all_files(path.as_ref(), false).await
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn upload(
|
async fn upload(
|
||||||
@@ -317,7 +299,6 @@ fn storage_metadata_path(original_path: &Path) -> PathBuf {
|
|||||||
|
|
||||||
fn get_all_files<'a, P>(
|
fn get_all_files<'a, P>(
|
||||||
directory_path: P,
|
directory_path: P,
|
||||||
recursive: bool,
|
|
||||||
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
|
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
|
||||||
where
|
where
|
||||||
P: AsRef<Path> + Send + Sync + 'a,
|
P: AsRef<Path> + Send + Sync + 'a,
|
||||||
@@ -334,11 +315,7 @@ where
|
|||||||
if file_type.is_symlink() {
|
if file_type.is_symlink() {
|
||||||
debug!("{:?} us a symlink, skipping", entry_path)
|
debug!("{:?} us a symlink, skipping", entry_path)
|
||||||
} else if file_type.is_dir() {
|
} else if file_type.is_dir() {
|
||||||
if recursive {
|
paths.extend(get_all_files(entry_path).await?.into_iter())
|
||||||
paths.extend(get_all_files(entry_path, true).await?.into_iter())
|
|
||||||
} else {
|
|
||||||
paths.push(dir_entry.path())
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
paths.push(dir_entry.path());
|
paths.push(dir_entry.path());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,9 +19,7 @@ use tokio::{io, sync::Semaphore};
|
|||||||
use tokio_util::io::ReaderStream;
|
use tokio_util::io::ReaderStream;
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::{
|
use crate::{strip_path_prefix, Download, DownloadError, RemoteStorage, S3Config};
|
||||||
strip_path_prefix, Download, DownloadError, RemoteObjectName, RemoteStorage, S3Config,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::StorageMetadata;
|
use super::StorageMetadata;
|
||||||
|
|
||||||
@@ -119,25 +117,6 @@ impl S3ObjectKey {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RemoteObjectName for S3ObjectKey {
|
|
||||||
/// Turn a/b/c or a/b/c/ into c
|
|
||||||
fn object_name(&self) -> Option<&str> {
|
|
||||||
// corner case, char::to_string is not const, thats why this is more verbose than it needs to be
|
|
||||||
// see https://github.com/rust-lang/rust/issues/88674
|
|
||||||
if self.0.len() == 1 && self.0.chars().next().unwrap() == S3_PREFIX_SEPARATOR {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.0.ends_with(S3_PREFIX_SEPARATOR) {
|
|
||||||
self.0.rsplit(S3_PREFIX_SEPARATOR).nth(1)
|
|
||||||
} else {
|
|
||||||
self.0
|
|
||||||
.rsplit_once(S3_PREFIX_SEPARATOR)
|
|
||||||
.map(|(_, last)| last)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// AWS S3 storage.
|
/// AWS S3 storage.
|
||||||
pub struct S3Bucket {
|
pub struct S3Bucket {
|
||||||
workdir: PathBuf,
|
workdir: PathBuf,
|
||||||
@@ -304,77 +283,6 @@ impl RemoteStorage for S3Bucket {
|
|||||||
Ok(document_keys)
|
Ok(document_keys)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Note: it wont include empty "directories"
|
|
||||||
async fn list_prefixes(
|
|
||||||
&self,
|
|
||||||
prefix: Option<Self::RemoteObjectId>,
|
|
||||||
) -> anyhow::Result<Vec<Self::RemoteObjectId>> {
|
|
||||||
let list_prefix = match prefix {
|
|
||||||
Some(prefix) => {
|
|
||||||
let mut prefix_in_bucket = self.prefix_in_bucket.clone().unwrap_or_default();
|
|
||||||
// if there is no trailing / in default prefix and
|
|
||||||
// supplied prefix does not start with "/" insert it
|
|
||||||
if !(prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR)
|
|
||||||
|| prefix.0.starts_with(S3_PREFIX_SEPARATOR))
|
|
||||||
{
|
|
||||||
prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
|
|
||||||
}
|
|
||||||
|
|
||||||
prefix_in_bucket.push_str(&prefix.0);
|
|
||||||
// required to end with a separator
|
|
||||||
// otherwise request will return only the entry of a prefix
|
|
||||||
if !prefix_in_bucket.ends_with(S3_PREFIX_SEPARATOR) {
|
|
||||||
prefix_in_bucket.push(S3_PREFIX_SEPARATOR);
|
|
||||||
}
|
|
||||||
Some(prefix_in_bucket)
|
|
||||||
}
|
|
||||||
None => self.prefix_in_bucket.clone(),
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut document_keys = Vec::new();
|
|
||||||
|
|
||||||
let mut continuation_token = None;
|
|
||||||
loop {
|
|
||||||
let _guard = self
|
|
||||||
.concurrency_limiter
|
|
||||||
.acquire()
|
|
||||||
.await
|
|
||||||
.context("Concurrency limiter semaphore got closed during S3 list")?;
|
|
||||||
|
|
||||||
metrics::inc_list_objects();
|
|
||||||
|
|
||||||
let fetch_response = self
|
|
||||||
.client
|
|
||||||
.list_objects_v2(ListObjectsV2Request {
|
|
||||||
bucket: self.bucket_name.clone(),
|
|
||||||
prefix: list_prefix.clone(),
|
|
||||||
continuation_token,
|
|
||||||
delimiter: Some(S3_PREFIX_SEPARATOR.to_string()),
|
|
||||||
..ListObjectsV2Request::default()
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.map_err(|e| {
|
|
||||||
metrics::inc_list_objects_fail();
|
|
||||||
e
|
|
||||||
})?;
|
|
||||||
|
|
||||||
document_keys.extend(
|
|
||||||
fetch_response
|
|
||||||
.common_prefixes
|
|
||||||
.unwrap_or_default()
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|o| Some(S3ObjectKey(o.prefix?))),
|
|
||||||
);
|
|
||||||
|
|
||||||
match fetch_response.continuation_token {
|
|
||||||
Some(new_token) => continuation_token = Some(new_token),
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(document_keys)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn upload(
|
async fn upload(
|
||||||
&self,
|
&self,
|
||||||
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
|
||||||
@@ -470,25 +378,6 @@ mod tests {
|
|||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn object_name() {
|
|
||||||
let k = S3ObjectKey("a/b/c".to_owned());
|
|
||||||
assert_eq!(k.object_name(), Some("c"));
|
|
||||||
|
|
||||||
let k = S3ObjectKey("a/b/c/".to_owned());
|
|
||||||
assert_eq!(k.object_name(), Some("c"));
|
|
||||||
|
|
||||||
let k = S3ObjectKey("a/".to_owned());
|
|
||||||
assert_eq!(k.object_name(), Some("a"));
|
|
||||||
|
|
||||||
// XXX is it impossible to have an empty key?
|
|
||||||
let k = S3ObjectKey("".to_owned());
|
|
||||||
assert_eq!(k.object_name(), None);
|
|
||||||
|
|
||||||
let k = S3ObjectKey("/".to_owned());
|
|
||||||
assert_eq!(k.object_name(), None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn download_destination() -> anyhow::Result<()> {
|
fn download_destination() -> anyhow::Result<()> {
|
||||||
let workdir = tempdir()?.path().to_owned();
|
let workdir = tempdir()?.path().to_owned();
|
||||||
|
|||||||
@@ -537,13 +537,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
|
|||||||
match tenant_match.subcommand() {
|
match tenant_match.subcommand() {
|
||||||
Some(("list", _)) => {
|
Some(("list", _)) => {
|
||||||
for t in pageserver.tenant_list()? {
|
for t in pageserver.tenant_list()? {
|
||||||
println!(
|
println!("{} {}", t.id, t.state);
|
||||||
"{} {}",
|
|
||||||
t.id,
|
|
||||||
t.state
|
|
||||||
.map(|s| s.to_string())
|
|
||||||
.unwrap_or_else(|| String::from(""))
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(("create", create_match)) => {
|
Some(("create", create_match)) => {
|
||||||
|
|||||||
@@ -60,7 +60,6 @@ where
|
|||||||
write: W,
|
write: W,
|
||||||
timeline: &'a Arc<DatadirTimelineImpl>,
|
timeline: &'a Arc<DatadirTimelineImpl>,
|
||||||
req_lsn: Option<Lsn>,
|
req_lsn: Option<Lsn>,
|
||||||
prev_lsn: Option<Lsn>,
|
|
||||||
full_backup: bool,
|
full_backup: bool,
|
||||||
) -> Result<Basebackup<'a, W>> {
|
) -> Result<Basebackup<'a, W>> {
|
||||||
// Compute postgres doesn't have any previous WAL files, but the first
|
// Compute postgres doesn't have any previous WAL files, but the first
|
||||||
@@ -97,26 +96,16 @@ where
|
|||||||
(end_of_timeline.prev, end_of_timeline.last)
|
(end_of_timeline.prev, end_of_timeline.last)
|
||||||
};
|
};
|
||||||
|
|
||||||
// Consolidate the derived and the provided prev_lsn values
|
|
||||||
let prev_lsn = if let Some(provided_prev_lsn) = prev_lsn {
|
|
||||||
if backup_prev != Lsn(0) {
|
|
||||||
ensure!(backup_prev == provided_prev_lsn)
|
|
||||||
}
|
|
||||||
provided_prev_lsn
|
|
||||||
} else {
|
|
||||||
backup_prev
|
|
||||||
};
|
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
"taking basebackup lsn={}, prev_lsn={} (full_backup={})",
|
"taking basebackup lsn={}, prev_lsn={} (full_backup={})",
|
||||||
backup_lsn, prev_lsn, full_backup
|
backup_lsn, backup_prev, full_backup
|
||||||
);
|
);
|
||||||
|
|
||||||
Ok(Basebackup {
|
Ok(Basebackup {
|
||||||
ar: Builder::new(AbortableWrite::new(write)),
|
ar: Builder::new(AbortableWrite::new(write)),
|
||||||
timeline,
|
timeline,
|
||||||
lsn: backup_lsn,
|
lsn: backup_lsn,
|
||||||
prev_record_lsn: prev_lsn,
|
prev_record_lsn: backup_prev,
|
||||||
full_backup,
|
full_backup,
|
||||||
finished: false,
|
finished: false,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -22,49 +22,6 @@ paths:
|
|||||||
properties:
|
properties:
|
||||||
id:
|
id:
|
||||||
type: integer
|
type: integer
|
||||||
|
|
||||||
/v1/tenant/{tenant_id}:
|
|
||||||
parameters:
|
|
||||||
- name: tenant_id
|
|
||||||
in: path
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
format: hex
|
|
||||||
get:
|
|
||||||
description: Get tenant status
|
|
||||||
responses:
|
|
||||||
"200":
|
|
||||||
description: Currently returns the flag whether the tenant has inprogress timeline downloads
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/TenantInfo"
|
|
||||||
"400":
|
|
||||||
description: Error when no tenant id found in path or no timeline id
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/Error"
|
|
||||||
"401":
|
|
||||||
description: Unauthorized Error
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/UnauthorizedError"
|
|
||||||
"403":
|
|
||||||
description: Forbidden Error
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/ForbiddenError"
|
|
||||||
"500":
|
|
||||||
description: Generic operation error
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/Error"
|
|
||||||
|
|
||||||
/v1/tenant/{tenant_id}/timeline:
|
/v1/tenant/{tenant_id}/timeline:
|
||||||
parameters:
|
parameters:
|
||||||
- name: tenant_id
|
- name: tenant_id
|
||||||
@@ -113,7 +70,6 @@ paths:
|
|||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/Error"
|
$ref: "#/components/schemas/Error"
|
||||||
|
|
||||||
/v1/tenant/{tenant_id}/timeline/{timeline_id}:
|
/v1/tenant/{tenant_id}/timeline/{timeline_id}:
|
||||||
parameters:
|
parameters:
|
||||||
- name: tenant_id
|
- name: tenant_id
|
||||||
@@ -128,14 +84,13 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
format: hex
|
format: hex
|
||||||
|
- name: include-non-incremental-logical-size
|
||||||
|
in: query
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
description: Controls calculation of current_logical_size_non_incremental
|
||||||
get:
|
get:
|
||||||
description: Get info about the timeline
|
description: Get info about the timeline
|
||||||
parameters:
|
|
||||||
- name: include-non-incremental-logical-size
|
|
||||||
in: query
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
description: Controls calculation of current_logical_size_non_incremental
|
|
||||||
responses:
|
responses:
|
||||||
"200":
|
"200":
|
||||||
description: TimelineInfo
|
description: TimelineInfo
|
||||||
@@ -167,35 +122,6 @@ paths:
|
|||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/Error"
|
$ref: "#/components/schemas/Error"
|
||||||
delete:
|
|
||||||
description: "Attempts to delete specified timeline. On 500 errors should be retried"
|
|
||||||
responses:
|
|
||||||
"200":
|
|
||||||
description: Ok
|
|
||||||
"400":
|
|
||||||
description: Error when no tenant id found in path or no timeline id
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/Error"
|
|
||||||
"401":
|
|
||||||
description: Unauthorized Error
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/UnauthorizedError"
|
|
||||||
"403":
|
|
||||||
description: Forbidden Error
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/ForbiddenError"
|
|
||||||
"500":
|
|
||||||
description: Generic operation error
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/Error"
|
|
||||||
|
|
||||||
/v1/tenant/{tenant_id}/timeline/{timeline_id}/wal_receiver:
|
/v1/tenant/{tenant_id}/timeline/{timeline_id}/wal_receiver:
|
||||||
parameters:
|
parameters:
|
||||||
@@ -245,7 +171,7 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/Error"
|
$ref: "#/components/schemas/Error"
|
||||||
|
|
||||||
/v1/tenant/{tenant_id}/attach:
|
/v1/tenant/{tenant_id}/timeline/{timeline_id}/attach:
|
||||||
parameters:
|
parameters:
|
||||||
- name: tenant_id
|
- name: tenant_id
|
||||||
in: path
|
in: path
|
||||||
@@ -253,13 +179,19 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
format: hex
|
format: hex
|
||||||
|
- name: timeline_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
format: hex
|
||||||
post:
|
post:
|
||||||
description: Schedules attach operation to happen in the background for given tenant
|
description: Attach remote timeline
|
||||||
responses:
|
responses:
|
||||||
"202":
|
"200":
|
||||||
description: Tenant attaching scheduled
|
description: Timeline attaching scheduled
|
||||||
"400":
|
"400":
|
||||||
description: Error when no tenant id found in path parameters
|
description: Error when no tenant id found in path or no timeline id
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
@@ -283,7 +215,7 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/NotFoundError"
|
$ref: "#/components/schemas/NotFoundError"
|
||||||
"409":
|
"409":
|
||||||
description: Tenant download is already in progress
|
description: Timeline download is already in progress
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
@@ -295,6 +227,7 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/Error"
|
$ref: "#/components/schemas/Error"
|
||||||
|
|
||||||
|
|
||||||
/v1/tenant/{tenant_id}/timeline/{timeline_id}/detach:
|
/v1/tenant/{tenant_id}/timeline/{timeline_id}/detach:
|
||||||
parameters:
|
parameters:
|
||||||
- name: tenant_id
|
- name: tenant_id
|
||||||
@@ -310,11 +243,10 @@ paths:
|
|||||||
type: string
|
type: string
|
||||||
format: hex
|
format: hex
|
||||||
post:
|
post:
|
||||||
description: Deprecated, use DELETE /v1/tenant/{tenant_id}/timeline/{timeline_id} instead
|
description: Detach local timeline
|
||||||
deprecated: true
|
|
||||||
responses:
|
responses:
|
||||||
"200":
|
"200":
|
||||||
description: Ok
|
description: Timeline detached
|
||||||
"400":
|
"400":
|
||||||
description: Error when no tenant id found in path or no timeline id
|
description: Error when no tenant id found in path or no timeline id
|
||||||
content:
|
content:
|
||||||
@@ -340,43 +272,6 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/Error"
|
$ref: "#/components/schemas/Error"
|
||||||
|
|
||||||
/v1/tenant/{tenant_id}/detach:
|
|
||||||
parameters:
|
|
||||||
- name: tenant_id
|
|
||||||
in: path
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
format: hex
|
|
||||||
post:
|
|
||||||
description: Detach local tenant
|
|
||||||
responses:
|
|
||||||
"200":
|
|
||||||
description: Tenant detached
|
|
||||||
"400":
|
|
||||||
description: Error when no tenant id found in path parameters
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/Error"
|
|
||||||
"401":
|
|
||||||
description: Unauthorized Error
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/UnauthorizedError"
|
|
||||||
"403":
|
|
||||||
description: Forbidden Error
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/ForbiddenError"
|
|
||||||
"500":
|
|
||||||
description: Generic operation error
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: "#/components/schemas/Error"
|
|
||||||
|
|
||||||
/v1/tenant/{tenant_id}/timeline/:
|
/v1/tenant/{tenant_id}/timeline/:
|
||||||
parameters:
|
parameters:
|
||||||
@@ -572,13 +467,12 @@ components:
|
|||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- id
|
- id
|
||||||
|
- state
|
||||||
properties:
|
properties:
|
||||||
id:
|
id:
|
||||||
type: string
|
type: string
|
||||||
state:
|
state:
|
||||||
type: string
|
type: string
|
||||||
has_in_progress_downloads:
|
|
||||||
type: boolean
|
|
||||||
TenantCreateInfo:
|
TenantCreateInfo:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
@@ -673,7 +567,6 @@ components:
|
|||||||
type: integer
|
type: integer
|
||||||
current_logical_size_non_incremental:
|
current_logical_size_non_incremental:
|
||||||
type: integer
|
type: integer
|
||||||
|
|
||||||
WalReceiverEntry:
|
WalReceiverEntry:
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ use crate::repository::Repository;
|
|||||||
use crate::storage_sync;
|
use crate::storage_sync;
|
||||||
use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
|
use crate::storage_sync::index::{RemoteIndex, RemoteTimeline};
|
||||||
use crate::tenant_config::TenantConfOpt;
|
use crate::tenant_config::TenantConfOpt;
|
||||||
use crate::tenant_mgr::TenantInfo;
|
|
||||||
use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
|
use crate::timelines::{LocalTimelineInfo, RemoteTimelineInfo, TimelineInfo};
|
||||||
use crate::{config::PageServerConf, tenant_mgr, timelines};
|
use crate::{config::PageServerConf, tenant_mgr, timelines};
|
||||||
use utils::{
|
use utils::{
|
||||||
@@ -210,9 +209,9 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
|||||||
.await;
|
.await;
|
||||||
|
|
||||||
if local_timeline_info.is_none() && remote_timeline_info.is_none() {
|
if local_timeline_info.is_none() && remote_timeline_info.is_none() {
|
||||||
return Err(ApiError::NotFound(format!(
|
return Err(ApiError::NotFound(
|
||||||
"Timeline {tenant_id}/{timeline_id} is not found neither locally nor remotely"
|
"Timeline is not found neither locally nor remotely".to_string(),
|
||||||
)));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let timeline_info = TimelineInfo {
|
let timeline_info = TimelineInfo {
|
||||||
@@ -242,157 +241,123 @@ async fn wal_receiver_get_handler(request: Request<Body>) -> Result<Response<Bod
|
|||||||
json_response(StatusCode::OK, &wal_receiver_entry)
|
json_response(StatusCode::OK, &wal_receiver_entry)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO makes sense to provide tenant config right away the same way as it handled in tenant_create
|
async fn timeline_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||||
async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
|
||||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||||
check_permission(&request, Some(tenant_id))?;
|
check_permission(&request, Some(tenant_id))?;
|
||||||
|
|
||||||
info!("Handling tenant attach {}", tenant_id,);
|
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
|
||||||
|
info!(
|
||||||
|
"Handling timeline {} attach for tenant: {}",
|
||||||
|
timeline_id, tenant_id,
|
||||||
|
);
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
if tenant_mgr::get_tenant_state(tenant_id).is_some() {
|
if tenant_mgr::get_local_timeline_with_load(tenant_id, timeline_id).is_ok() {
|
||||||
anyhow::bail!("Tenant is already present locally")
|
// TODO: maybe answer with 309 Not Modified here?
|
||||||
|
anyhow::bail!("Timeline is already present locally")
|
||||||
};
|
};
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.map_err(ApiError::from_err)??;
|
.map_err(ApiError::from_err)??;
|
||||||
|
|
||||||
|
let sync_id = ZTenantTimelineId {
|
||||||
|
tenant_id,
|
||||||
|
timeline_id,
|
||||||
|
};
|
||||||
let state = get_state(&request);
|
let state = get_state(&request);
|
||||||
let remote_index = &state.remote_index;
|
let remote_index = &state.remote_index;
|
||||||
|
|
||||||
let mut index_accessor = remote_index.write().await;
|
let mut index_accessor = remote_index.write().await;
|
||||||
if let Some(tenant_entry) = index_accessor.tenant_entry_mut(&tenant_id) {
|
if let Some(remote_timeline) = index_accessor.timeline_entry_mut(&sync_id) {
|
||||||
if tenant_entry.has_in_progress_downloads() {
|
if remote_timeline.awaits_download {
|
||||||
return Err(ApiError::Conflict(
|
return Err(ApiError::Conflict(
|
||||||
"Tenant download is already in progress".to_string(),
|
"Timeline download is already in progress".to_string(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
for (timeline_id, remote_timeline) in tenant_entry.iter_mut() {
|
|
||||||
storage_sync::schedule_layer_download(tenant_id, *timeline_id);
|
|
||||||
remote_timeline.awaits_download = true;
|
|
||||||
}
|
|
||||||
return json_response(StatusCode::ACCEPTED, ());
|
|
||||||
}
|
|
||||||
// no tenant in the index, release the lock to make the potentially lengthy download opetation
|
|
||||||
drop(index_accessor);
|
|
||||||
|
|
||||||
// download index parts for every tenant timeline
|
|
||||||
let remote_timelines = match gather_tenant_timelines_index_parts(state, tenant_id).await {
|
|
||||||
Ok(Some(remote_timelines)) => remote_timelines,
|
|
||||||
Ok(None) => return Err(ApiError::NotFound("Unknown remote tenant".to_string())),
|
|
||||||
Err(e) => {
|
|
||||||
error!("Failed to retrieve remote tenant data: {:?}", e);
|
|
||||||
return Err(ApiError::NotFound(
|
|
||||||
"Failed to retrieve remote tenant".to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// recheck that download is not in progress because
|
|
||||||
// we've released the lock to avoid holding it during the download
|
|
||||||
let mut index_accessor = remote_index.write().await;
|
|
||||||
let tenant_entry = match index_accessor.tenant_entry_mut(&tenant_id) {
|
|
||||||
Some(tenant_entry) => {
|
|
||||||
if tenant_entry.has_in_progress_downloads() {
|
|
||||||
return Err(ApiError::Conflict(
|
|
||||||
"Tenant download is already in progress".to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
tenant_entry
|
|
||||||
}
|
|
||||||
None => index_accessor.add_tenant_entry(tenant_id),
|
|
||||||
};
|
|
||||||
|
|
||||||
// populate remote index with the data from index part and create directories on the local filesystem
|
|
||||||
for (timeline_id, mut remote_timeline) in remote_timelines {
|
|
||||||
tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
|
|
||||||
.await
|
|
||||||
.context("Failed to create new timeline directory")?;
|
|
||||||
|
|
||||||
remote_timeline.awaits_download = true;
|
remote_timeline.awaits_download = true;
|
||||||
tenant_entry.insert(timeline_id, remote_timeline);
|
|
||||||
// schedule actual download
|
|
||||||
storage_sync::schedule_layer_download(tenant_id, timeline_id);
|
storage_sync::schedule_layer_download(tenant_id, timeline_id);
|
||||||
|
return json_response(StatusCode::ACCEPTED, ());
|
||||||
|
} else {
|
||||||
|
// no timeline in the index, release the lock to make the potentially lengthy download opetation
|
||||||
|
drop(index_accessor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let new_timeline = match try_download_index_part_data(state, sync_id).await {
|
||||||
|
Ok(Some(mut new_timeline)) => {
|
||||||
|
tokio::fs::create_dir_all(state.conf.timeline_path(&timeline_id, &tenant_id))
|
||||||
|
.await
|
||||||
|
.context("Failed to create new timeline directory")?;
|
||||||
|
new_timeline.awaits_download = true;
|
||||||
|
new_timeline
|
||||||
|
}
|
||||||
|
Ok(None) => return Err(ApiError::NotFound("Unknown remote timeline".to_string())),
|
||||||
|
Err(e) => {
|
||||||
|
error!("Failed to retrieve remote timeline data: {:?}", e);
|
||||||
|
return Err(ApiError::NotFound(
|
||||||
|
"Failed to retrieve remote timeline".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut index_accessor = remote_index.write().await;
|
||||||
|
match index_accessor.timeline_entry_mut(&sync_id) {
|
||||||
|
Some(remote_timeline) => {
|
||||||
|
if remote_timeline.awaits_download {
|
||||||
|
return Err(ApiError::Conflict(
|
||||||
|
"Timeline download is already in progress".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
remote_timeline.awaits_download = true;
|
||||||
|
}
|
||||||
|
None => index_accessor.add_timeline_entry(sync_id, new_timeline),
|
||||||
|
}
|
||||||
|
storage_sync::schedule_layer_download(tenant_id, timeline_id);
|
||||||
json_response(StatusCode::ACCEPTED, ())
|
json_response(StatusCode::ACCEPTED, ())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Note: is expensive from s3 access perspective,
|
async fn try_download_index_part_data(
|
||||||
/// for details see comment to `storage_sync::gather_tenant_timelines_index_parts`
|
|
||||||
async fn gather_tenant_timelines_index_parts(
|
|
||||||
state: &State,
|
state: &State,
|
||||||
tenant_id: ZTenantId,
|
sync_id: ZTenantTimelineId,
|
||||||
) -> anyhow::Result<Option<Vec<(ZTimelineId, RemoteTimeline)>>> {
|
) -> anyhow::Result<Option<RemoteTimeline>> {
|
||||||
let index_parts = match state.remote_storage.as_ref() {
|
let index_part = match state.remote_storage.as_ref() {
|
||||||
Some(GenericRemoteStorage::Local(local_storage)) => {
|
Some(GenericRemoteStorage::Local(local_storage)) => {
|
||||||
storage_sync::gather_tenant_timelines_index_parts(state.conf, local_storage, tenant_id)
|
storage_sync::download_index_part(state.conf, local_storage, sync_id).await
|
||||||
.await
|
|
||||||
}
|
}
|
||||||
// FIXME here s3 storage contains its own limits, that are separate from sync storage thread ones
|
|
||||||
// because it is a different instance. We can move this limit to some global static
|
|
||||||
// or use one instance everywhere.
|
|
||||||
Some(GenericRemoteStorage::S3(s3_storage)) => {
|
Some(GenericRemoteStorage::S3(s3_storage)) => {
|
||||||
storage_sync::gather_tenant_timelines_index_parts(state.conf, s3_storage, tenant_id)
|
storage_sync::download_index_part(state.conf, s3_storage, sync_id).await
|
||||||
.await
|
|
||||||
}
|
}
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
}
|
}
|
||||||
.with_context(|| format!("Failed to download index parts for tenant {tenant_id}"))?;
|
.with_context(|| format!("Failed to download index part for timeline {sync_id}"))?;
|
||||||
|
|
||||||
let mut remote_timelines = Vec::with_capacity(index_parts.len());
|
let timeline_path = state
|
||||||
for (timeline_id, index_part) in index_parts {
|
.conf
|
||||||
let timeline_path = state.conf.timeline_path(&timeline_id, &tenant_id);
|
.timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
|
||||||
let remote_timeline = RemoteTimeline::from_index_part(&timeline_path, index_part)
|
RemoteTimeline::from_index_part(&timeline_path, index_part)
|
||||||
.with_context(|| {
|
.map(Some)
|
||||||
format!("Failed to convert index part into remote timeline for timeline {tenant_id}/{timeline_id}")
|
.with_context(|| {
|
||||||
})?;
|
format!("Failed to convert index part into remote timeline for timeline {sync_id}")
|
||||||
remote_timelines.push((timeline_id, remote_timeline));
|
})
|
||||||
}
|
|
||||||
Ok(Some(remote_timelines))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn timeline_delete_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
async fn timeline_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
||||||
check_permission(&request, Some(tenant_id))?;
|
check_permission(&request, Some(tenant_id))?;
|
||||||
|
|
||||||
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
|
let timeline_id: ZTimelineId = parse_request_param(&request, "timeline_id")?;
|
||||||
|
|
||||||
let state = get_state(&request);
|
|
||||||
tokio::task::spawn_blocking(move || {
|
tokio::task::spawn_blocking(move || {
|
||||||
let _enter = info_span!("tenant_detach_handler", tenant = %tenant_id).entered();
|
let _enter =
|
||||||
tenant_mgr::delete_timeline(tenant_id, timeline_id)
|
info_span!("timeline_detach_handler", tenant = %tenant_id, timeline = %timeline_id)
|
||||||
|
.entered();
|
||||||
|
let state = get_state(&request);
|
||||||
|
tenant_mgr::detach_timeline(state.conf, tenant_id, timeline_id)
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.map_err(ApiError::from_err)??;
|
.map_err(ApiError::from_err)??;
|
||||||
|
|
||||||
let mut remote_index = state.remote_index.write().await;
|
|
||||||
remote_index.remove_timeline_entry(ZTenantTimelineId {
|
|
||||||
tenant_id,
|
|
||||||
timeline_id,
|
|
||||||
});
|
|
||||||
|
|
||||||
json_response(StatusCode::OK, ())
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
|
||||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
|
||||||
check_permission(&request, Some(tenant_id))?;
|
|
||||||
|
|
||||||
let state = get_state(&request);
|
|
||||||
let conf = state.conf;
|
|
||||||
tokio::task::spawn_blocking(move || {
|
|
||||||
let _enter = info_span!("tenant_detach_handler", tenant = %tenant_id).entered();
|
|
||||||
tenant_mgr::detach_tenant(conf, tenant_id)
|
|
||||||
})
|
|
||||||
.await
|
|
||||||
.map_err(ApiError::from_err)??;
|
|
||||||
|
|
||||||
let mut remote_index = state.remote_index.write().await;
|
|
||||||
remote_index.remove_tenant_entry(&tenant_id);
|
|
||||||
|
|
||||||
json_response(StatusCode::OK, ())
|
json_response(StatusCode::OK, ())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -400,13 +365,9 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
|
|||||||
// check for management permission
|
// check for management permission
|
||||||
check_permission(&request, None)?;
|
check_permission(&request, None)?;
|
||||||
|
|
||||||
let state = get_state(&request);
|
|
||||||
// clone to avoid holding the lock while awaiting for blocking task
|
|
||||||
let remote_index = state.remote_index.read().await.clone();
|
|
||||||
|
|
||||||
let response_data = tokio::task::spawn_blocking(move || {
|
let response_data = tokio::task::spawn_blocking(move || {
|
||||||
let _enter = info_span!("tenant_list").entered();
|
let _enter = info_span!("tenant_list").entered();
|
||||||
crate::tenant_mgr::list_tenants(&remote_index)
|
crate::tenant_mgr::list_tenants()
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.map_err(ApiError::from_err)?;
|
.map_err(ApiError::from_err)?;
|
||||||
@@ -414,34 +375,6 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
|
|||||||
json_response(StatusCode::OK, response_data)
|
json_response(StatusCode::OK, response_data)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
|
||||||
let tenant_id: ZTenantId = parse_request_param(&request, "tenant_id")?;
|
|
||||||
check_permission(&request, Some(tenant_id))?;
|
|
||||||
|
|
||||||
// if tenant is in progress of downloading it can be absent in global tenant map
|
|
||||||
let tenant_state = tokio::task::spawn_blocking(move || tenant_mgr::get_tenant_state(tenant_id))
|
|
||||||
.await
|
|
||||||
.map_err(ApiError::from_err)?;
|
|
||||||
|
|
||||||
let state = get_state(&request);
|
|
||||||
let remote_index = &state.remote_index;
|
|
||||||
|
|
||||||
let index_accessor = remote_index.read().await;
|
|
||||||
let has_in_progress_downloads = index_accessor
|
|
||||||
.tenant_entry(&tenant_id)
|
|
||||||
.ok_or_else(|| ApiError::NotFound("Tenant not found in remote index".to_string()))?
|
|
||||||
.has_in_progress_downloads();
|
|
||||||
|
|
||||||
json_response(
|
|
||||||
StatusCode::OK,
|
|
||||||
TenantInfo {
|
|
||||||
id: tenant_id,
|
|
||||||
state: tenant_state,
|
|
||||||
has_in_progress_downloads: Some(has_in_progress_downloads),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||||
// check for management permission
|
// check for management permission
|
||||||
check_permission(&request, None)?;
|
check_permission(&request, None)?;
|
||||||
@@ -587,28 +520,24 @@ pub fn make_router(
|
|||||||
.get("/v1/status", status_handler)
|
.get("/v1/status", status_handler)
|
||||||
.get("/v1/tenant", tenant_list_handler)
|
.get("/v1/tenant", tenant_list_handler)
|
||||||
.post("/v1/tenant", tenant_create_handler)
|
.post("/v1/tenant", tenant_create_handler)
|
||||||
.get("/v1/tenant/:tenant_id", tenant_status)
|
|
||||||
.put("/v1/tenant/config", tenant_config_handler)
|
.put("/v1/tenant/config", tenant_config_handler)
|
||||||
.get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
|
.get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
|
||||||
.post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
|
.post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
|
||||||
.post("/v1/tenant/:tenant_id/attach", tenant_attach_handler)
|
|
||||||
.post("/v1/tenant/:tenant_id/detach", tenant_detach_handler)
|
|
||||||
.get(
|
.get(
|
||||||
"/v1/tenant/:tenant_id/timeline/:timeline_id",
|
"/v1/tenant/:tenant_id/timeline/:timeline_id",
|
||||||
timeline_detail_handler,
|
timeline_detail_handler,
|
||||||
)
|
)
|
||||||
.delete(
|
|
||||||
"/v1/tenant/:tenant_id/timeline/:timeline_id",
|
|
||||||
timeline_delete_handler,
|
|
||||||
)
|
|
||||||
// for backward compatibility
|
|
||||||
.post(
|
|
||||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
|
|
||||||
timeline_delete_handler,
|
|
||||||
)
|
|
||||||
.get(
|
.get(
|
||||||
"/v1/tenant/:tenant_id/timeline/:timeline_id/wal_receiver",
|
"/v1/tenant/:tenant_id/timeline/:timeline_id/wal_receiver",
|
||||||
wal_receiver_get_handler,
|
wal_receiver_get_handler,
|
||||||
)
|
)
|
||||||
|
.post(
|
||||||
|
"/v1/tenant/:tenant_id/timeline/:timeline_id/attach",
|
||||||
|
timeline_attach_handler,
|
||||||
|
)
|
||||||
|
.post(
|
||||||
|
"/v1/tenant/:tenant_id/timeline/:timeline_id/detach",
|
||||||
|
timeline_detach_handler,
|
||||||
|
)
|
||||||
.any(handler_404))
|
.any(handler_404))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -57,7 +57,6 @@ pub fn import_timeline_from_postgres_datadir<R: Repository>(
|
|||||||
if let Some(control_file) = import_file(&mut modification, relative_path, file, len)? {
|
if let Some(control_file) = import_file(&mut modification, relative_path, file, len)? {
|
||||||
pg_control = Some(control_file);
|
pg_control = Some(control_file);
|
||||||
}
|
}
|
||||||
modification.flush()?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -318,7 +317,6 @@ pub fn import_basebackup_from_tar<R: Repository, Reader: Read>(
|
|||||||
// We found the pg_control file.
|
// We found the pg_control file.
|
||||||
pg_control = Some(res);
|
pg_control = Some(res);
|
||||||
}
|
}
|
||||||
modification.flush()?;
|
|
||||||
}
|
}
|
||||||
tar::EntryType::Directory => {
|
tar::EntryType::Directory => {
|
||||||
debug!("directory {:?}", file_path);
|
debug!("directory {:?}", file_path);
|
||||||
@@ -518,23 +516,10 @@ pub fn import_file<R: Repository, Reader: Read>(
|
|||||||
// Parse zenith signal file to set correct previous LSN
|
// Parse zenith signal file to set correct previous LSN
|
||||||
let bytes = read_all_bytes(reader)?;
|
let bytes = read_all_bytes(reader)?;
|
||||||
// zenith.signal format is "PREV LSN: prev_lsn"
|
// zenith.signal format is "PREV LSN: prev_lsn"
|
||||||
// TODO write serialization and deserialization in the same place.
|
let zenith_signal = std::str::from_utf8(&bytes)?;
|
||||||
let zenith_signal = std::str::from_utf8(&bytes)?.trim();
|
let zenith_signal = zenith_signal.split(':').collect::<Vec<_>>();
|
||||||
let prev_lsn = match zenith_signal {
|
let prev_lsn = zenith_signal[1].trim().parse::<Lsn>()?;
|
||||||
"PREV LSN: none" => Lsn(0),
|
|
||||||
"PREV LSN: invalid" => Lsn(0),
|
|
||||||
other => {
|
|
||||||
let split = other.split(':').collect::<Vec<_>>();
|
|
||||||
split[1]
|
|
||||||
.trim()
|
|
||||||
.parse::<Lsn>()
|
|
||||||
.context("can't parse zenith.signal")?
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// zenith.signal is not necessarily the last file, that we handle
|
|
||||||
// but it is ok to call `finish_write()`, because final `modification.commit()`
|
|
||||||
// will update lsn once more to the final one.
|
|
||||||
let writer = modification.tline.tline.writer();
|
let writer = modification.tline.tline.writer();
|
||||||
writer.finish_write(prev_lsn);
|
writer.finish_write(prev_lsn);
|
||||||
|
|
||||||
|
|||||||
@@ -34,11 +34,13 @@ use std::time::{Duration, Instant, SystemTime};
|
|||||||
|
|
||||||
use self::metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME};
|
use self::metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME};
|
||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::keyspace::{KeyPartitioning, KeySpace};
|
use crate::keyspace::KeySpace;
|
||||||
use crate::storage_sync::index::RemoteIndex;
|
use crate::storage_sync::index::RemoteIndex;
|
||||||
use crate::tenant_config::{TenantConf, TenantConfOpt};
|
use crate::tenant_config::{TenantConf, TenantConfOpt};
|
||||||
|
|
||||||
use crate::repository::{GcResult, Repository, RepositoryTimeline, Timeline, TimelineWriter};
|
use crate::repository::{
|
||||||
|
GcResult, Repository, RepositoryTimeline, Timeline, TimelineSyncStatusUpdate, TimelineWriter,
|
||||||
|
};
|
||||||
use crate::repository::{Key, Value};
|
use crate::repository::{Key, Value};
|
||||||
use crate::tenant_mgr;
|
use crate::tenant_mgr;
|
||||||
use crate::thread_mgr;
|
use crate::thread_mgr;
|
||||||
@@ -270,12 +272,7 @@ impl Repository for LayeredRepository {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Branch a timeline
|
/// Branch a timeline
|
||||||
fn branch_timeline(
|
fn branch_timeline(&self, src: ZTimelineId, dst: ZTimelineId, start_lsn: Lsn) -> Result<()> {
|
||||||
&self,
|
|
||||||
src: ZTimelineId,
|
|
||||||
dst: ZTimelineId,
|
|
||||||
start_lsn: Option<Lsn>,
|
|
||||||
) -> Result<()> {
|
|
||||||
// We need to hold this lock to prevent GC from starting at the same time. GC scans the directory to learn
|
// We need to hold this lock to prevent GC from starting at the same time. GC scans the directory to learn
|
||||||
// about timelines, so otherwise a race condition is possible, where we create new timeline and GC
|
// about timelines, so otherwise a race condition is possible, where we create new timeline and GC
|
||||||
// concurrently removes data that is needed by the new timeline.
|
// concurrently removes data that is needed by the new timeline.
|
||||||
@@ -288,14 +285,6 @@ impl Repository for LayeredRepository {
|
|||||||
.context("failed to load timeline for branching")?
|
.context("failed to load timeline for branching")?
|
||||||
.ok_or_else(|| anyhow::anyhow!("unknown timeline id: {}", &src))?;
|
.ok_or_else(|| anyhow::anyhow!("unknown timeline id: {}", &src))?;
|
||||||
let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn();
|
let latest_gc_cutoff_lsn = src_timeline.get_latest_gc_cutoff_lsn();
|
||||||
|
|
||||||
// If no start LSN is specified, we branch the new timeline from the source timeline's last record LSN
|
|
||||||
let start_lsn = start_lsn.unwrap_or_else(|| {
|
|
||||||
let lsn = src_timeline.get_last_record_lsn();
|
|
||||||
info!("branching timeline {dst} from timeline {src} at last record LSN: {lsn}");
|
|
||||||
lsn
|
|
||||||
});
|
|
||||||
|
|
||||||
src_timeline
|
src_timeline
|
||||||
.check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn)
|
.check_lsn_is_in_scope(start_lsn, &latest_gc_cutoff_lsn)
|
||||||
.context("invalid branch start lsn")?;
|
.context("invalid branch start lsn")?;
|
||||||
@@ -342,19 +331,19 @@ impl Repository for LayeredRepository {
|
|||||||
/// metrics collection.
|
/// metrics collection.
|
||||||
fn gc_iteration(
|
fn gc_iteration(
|
||||||
&self,
|
&self,
|
||||||
target_timeline_id: Option<ZTimelineId>,
|
target_timelineid: Option<ZTimelineId>,
|
||||||
horizon: u64,
|
horizon: u64,
|
||||||
pitr: Duration,
|
pitr: Duration,
|
||||||
checkpoint_before_gc: bool,
|
checkpoint_before_gc: bool,
|
||||||
) -> Result<GcResult> {
|
) -> Result<GcResult> {
|
||||||
let timeline_str = target_timeline_id
|
let timeline_str = target_timelineid
|
||||||
.map(|x| x.to_string())
|
.map(|x| x.to_string())
|
||||||
.unwrap_or_else(|| "-".to_string());
|
.unwrap_or_else(|| "-".to_string());
|
||||||
|
|
||||||
STORAGE_TIME
|
STORAGE_TIME
|
||||||
.with_label_values(&["gc", &self.tenant_id.to_string(), &timeline_str])
|
.with_label_values(&["gc", &self.tenant_id.to_string(), &timeline_str])
|
||||||
.observe_closure_duration(|| {
|
.observe_closure_duration(|| {
|
||||||
self.gc_iteration_internal(target_timeline_id, horizon, pitr, checkpoint_before_gc)
|
self.gc_iteration_internal(target_timelineid, horizon, pitr, checkpoint_before_gc)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -421,60 +410,50 @@ impl Repository for LayeredRepository {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn delete_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()> {
|
fn detach_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()> {
|
||||||
// in order to be retriable detach needs to be idempotent
|
|
||||||
// (or at least to a point that each time the detach is called it can make progress)
|
|
||||||
let mut timelines = self.timelines.lock().unwrap();
|
let mut timelines = self.timelines.lock().unwrap();
|
||||||
|
// check no child timelines, because detach will remove files, which will brake child branches
|
||||||
// Ensure that there are no child timelines **attached to that pageserver**,
|
// FIXME this can still be violated because we do not guarantee
|
||||||
// because detach removes files, which will break child branches
|
// that all ancestors are downloaded/attached to the same pageserver
|
||||||
let children_exist = timelines
|
let num_children = timelines
|
||||||
.iter()
|
.iter()
|
||||||
.any(|(_, entry)| entry.ancestor_timeline_id() == Some(timeline_id));
|
.filter(|(_, entry)| entry.ancestor_timeline_id() == Some(timeline_id))
|
||||||
|
.count();
|
||||||
|
|
||||||
ensure!(
|
ensure!(
|
||||||
!children_exist,
|
num_children == 0,
|
||||||
"Cannot detach timeline which has child timelines"
|
"Cannot detach timeline which has child timelines"
|
||||||
);
|
);
|
||||||
let timeline_entry = match timelines.entry(timeline_id) {
|
|
||||||
Entry::Occupied(e) => e,
|
|
||||||
Entry::Vacant(_) => bail!("timeline not found"),
|
|
||||||
};
|
|
||||||
|
|
||||||
// try to acquire gc and compaction locks to prevent errors from missing files
|
|
||||||
let _gc_guard = self
|
|
||||||
.gc_cs
|
|
||||||
.try_lock()
|
|
||||||
.map_err(|e| anyhow::anyhow!("cannot acquire gc lock {e}"))?;
|
|
||||||
|
|
||||||
let compaction_guard = timeline_entry.get().compaction_guard()?;
|
|
||||||
|
|
||||||
let local_timeline_directory = self.conf.timeline_path(&timeline_id, &self.tenant_id);
|
|
||||||
std::fs::remove_dir_all(&local_timeline_directory).with_context(|| {
|
|
||||||
format!(
|
|
||||||
"Failed to remove local timeline directory '{}'",
|
|
||||||
local_timeline_directory.display()
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
info!("detach removed files");
|
|
||||||
|
|
||||||
drop(compaction_guard);
|
|
||||||
timeline_entry.remove();
|
|
||||||
|
|
||||||
|
ensure!(
|
||||||
|
timelines.remove(&timeline_id).is_some(),
|
||||||
|
"Cannot detach timeline {timeline_id} that is not available locally"
|
||||||
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn attach_timeline(&self, timeline_id: ZTimelineId) -> Result<()> {
|
fn apply_timeline_remote_sync_status_update(
|
||||||
debug!("attach timeline_id: {}", timeline_id,);
|
&self,
|
||||||
match self.timelines.lock().unwrap().entry(timeline_id) {
|
timeline_id: ZTimelineId,
|
||||||
Entry::Occupied(_) => bail!("We completed a download for a timeline that already exists in repository. This is a bug."),
|
timeline_sync_status_update: TimelineSyncStatusUpdate,
|
||||||
Entry::Vacant(entry) => {
|
) -> Result<()> {
|
||||||
// we need to get metadata of a timeline, another option is to pass it along with Downloaded status
|
debug!(
|
||||||
let metadata = load_metadata(self.conf, timeline_id, self.tenant_id).context("failed to load local metadata")?;
|
"apply_timeline_remote_sync_status_update timeline_id: {} update: {:?}",
|
||||||
// finally we make newly downloaded timeline visible to repository
|
timeline_id, timeline_sync_status_update
|
||||||
entry.insert(LayeredTimelineEntry::Unloaded { id: timeline_id, metadata, })
|
);
|
||||||
},
|
match timeline_sync_status_update {
|
||||||
};
|
TimelineSyncStatusUpdate::Downloaded => {
|
||||||
|
match self.timelines.lock().unwrap().entry(timeline_id) {
|
||||||
|
Entry::Occupied(_) => bail!("We completed a download for a timeline that already exists in repository. This is a bug."),
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
// we need to get metadata of a timeline, another option is to pass it along with Downloaded status
|
||||||
|
let metadata = load_metadata(self.conf, timeline_id, self.tenant_id).context("failed to load local metadata")?;
|
||||||
|
// finally we make newly downloaded timeline visible to repository
|
||||||
|
entry.insert(LayeredTimelineEntry::Unloaded { id: timeline_id, metadata, })
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -524,18 +503,6 @@ impl LayeredTimelineEntry {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compaction_guard(&self) -> Result<Option<MutexGuard<()>>, anyhow::Error> {
|
|
||||||
match self {
|
|
||||||
LayeredTimelineEntry::Loaded(timeline) => timeline
|
|
||||||
.compaction_cs
|
|
||||||
.try_lock()
|
|
||||||
.map_err(|e| anyhow::anyhow!("cannot lock compaction critical section {e}"))
|
|
||||||
.map(Some),
|
|
||||||
|
|
||||||
LayeredTimelineEntry::Unloaded { .. } => Ok(None),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<LayeredTimelineEntry> for RepositoryTimeline<LayeredTimeline> {
|
impl From<LayeredTimelineEntry> for RepositoryTimeline<LayeredTimeline> {
|
||||||
@@ -872,13 +839,13 @@ impl LayeredRepository {
|
|||||||
// we do.
|
// we do.
|
||||||
fn gc_iteration_internal(
|
fn gc_iteration_internal(
|
||||||
&self,
|
&self,
|
||||||
target_timeline_id: Option<ZTimelineId>,
|
target_timelineid: Option<ZTimelineId>,
|
||||||
horizon: u64,
|
horizon: u64,
|
||||||
pitr: Duration,
|
pitr: Duration,
|
||||||
checkpoint_before_gc: bool,
|
checkpoint_before_gc: bool,
|
||||||
) -> Result<GcResult> {
|
) -> Result<GcResult> {
|
||||||
let _span_guard =
|
let _span_guard =
|
||||||
info_span!("gc iteration", tenant = %self.tenant_id, timeline = ?target_timeline_id)
|
info_span!("gc iteration", tenant = %self.tenant_id, timeline = ?target_timelineid)
|
||||||
.entered();
|
.entered();
|
||||||
let mut totals: GcResult = Default::default();
|
let mut totals: GcResult = Default::default();
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
@@ -892,12 +859,6 @@ impl LayeredRepository {
|
|||||||
let mut timeline_ids = Vec::new();
|
let mut timeline_ids = Vec::new();
|
||||||
let mut timelines = self.timelines.lock().unwrap();
|
let mut timelines = self.timelines.lock().unwrap();
|
||||||
|
|
||||||
if let Some(target_timeline_id) = target_timeline_id.as_ref() {
|
|
||||||
if timelines.get(target_timeline_id).is_none() {
|
|
||||||
bail!("gc target timeline does not exist")
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
for (timeline_id, timeline_entry) in timelines.iter() {
|
for (timeline_id, timeline_entry) in timelines.iter() {
|
||||||
timeline_ids.push(*timeline_id);
|
timeline_ids.push(*timeline_id);
|
||||||
|
|
||||||
@@ -906,7 +867,7 @@ impl LayeredRepository {
|
|||||||
// Somewhat related: https://github.com/zenithdb/zenith/issues/999
|
// Somewhat related: https://github.com/zenithdb/zenith/issues/999
|
||||||
if let Some(ancestor_timeline_id) = &timeline_entry.ancestor_timeline_id() {
|
if let Some(ancestor_timeline_id) = &timeline_entry.ancestor_timeline_id() {
|
||||||
// If target_timeline is specified, we only need to know branchpoints of its children
|
// If target_timeline is specified, we only need to know branchpoints of its children
|
||||||
if let Some(timelineid) = target_timeline_id {
|
if let Some(timelineid) = target_timelineid {
|
||||||
if ancestor_timeline_id == &timelineid {
|
if ancestor_timeline_id == &timelineid {
|
||||||
all_branchpoints
|
all_branchpoints
|
||||||
.insert((*ancestor_timeline_id, timeline_entry.ancestor_lsn()));
|
.insert((*ancestor_timeline_id, timeline_entry.ancestor_lsn()));
|
||||||
@@ -921,7 +882,7 @@ impl LayeredRepository {
|
|||||||
|
|
||||||
// Ok, we now know all the branch points.
|
// Ok, we now know all the branch points.
|
||||||
// Perform GC for each timeline.
|
// Perform GC for each timeline.
|
||||||
for timeline_id in timeline_ids.into_iter() {
|
for timelineid in timeline_ids.into_iter() {
|
||||||
if thread_mgr::is_shutdown_requested() {
|
if thread_mgr::is_shutdown_requested() {
|
||||||
// We were requested to shut down. Stop and return with the progress we
|
// We were requested to shut down. Stop and return with the progress we
|
||||||
// made.
|
// made.
|
||||||
@@ -930,12 +891,12 @@ impl LayeredRepository {
|
|||||||
|
|
||||||
// Timeline is known to be local and loaded.
|
// Timeline is known to be local and loaded.
|
||||||
let timeline = self
|
let timeline = self
|
||||||
.get_timeline_load_internal(timeline_id, &mut *timelines)?
|
.get_timeline_load_internal(timelineid, &mut *timelines)?
|
||||||
.expect("checked above that timeline is local and loaded");
|
.expect("checked above that timeline is local and loaded");
|
||||||
|
|
||||||
// If target_timeline is specified, only GC it
|
// If target_timeline is specified, only GC it
|
||||||
if let Some(target_timelineid) = target_timeline_id {
|
if let Some(target_timelineid) = target_timelineid {
|
||||||
if timeline_id != target_timelineid {
|
if timelineid != target_timelineid {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -944,8 +905,8 @@ impl LayeredRepository {
|
|||||||
drop(timelines);
|
drop(timelines);
|
||||||
let branchpoints: Vec<Lsn> = all_branchpoints
|
let branchpoints: Vec<Lsn> = all_branchpoints
|
||||||
.range((
|
.range((
|
||||||
Included((timeline_id, Lsn(0))),
|
Included((timelineid, Lsn(0))),
|
||||||
Included((timeline_id, Lsn(u64::MAX))),
|
Included((timelineid, Lsn(u64::MAX))),
|
||||||
))
|
))
|
||||||
.map(|&x| x.1)
|
.map(|&x| x.1)
|
||||||
.collect();
|
.collect();
|
||||||
@@ -955,7 +916,7 @@ impl LayeredRepository {
|
|||||||
// used in tests, so we want as deterministic results as possible.
|
// used in tests, so we want as deterministic results as possible.
|
||||||
if checkpoint_before_gc {
|
if checkpoint_before_gc {
|
||||||
timeline.checkpoint(CheckpointConfig::Forced)?;
|
timeline.checkpoint(CheckpointConfig::Forced)?;
|
||||||
info!("timeline {} checkpoint_before_gc done", timeline_id);
|
info!("timeline {} checkpoint_before_gc done", timelineid);
|
||||||
}
|
}
|
||||||
timeline.update_gc_info(branchpoints, cutoff, pitr);
|
timeline.update_gc_info(branchpoints, cutoff, pitr);
|
||||||
let result = timeline.gc()?;
|
let result = timeline.gc()?;
|
||||||
@@ -1640,7 +1601,7 @@ impl LayeredTimeline {
|
|||||||
Ok(layer)
|
Ok(layer)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> Result<()> {
|
fn put_value(&self, key: Key, lsn: Lsn, val: Value) -> Result<()> {
|
||||||
//info!("PUT: key {} at {}", key, lsn);
|
//info!("PUT: key {} at {}", key, lsn);
|
||||||
let layer = self.get_layer_for_write(lsn)?;
|
let layer = self.get_layer_for_write(lsn)?;
|
||||||
layer.put_value(key, lsn, val)?;
|
layer.put_value(key, lsn, val)?;
|
||||||
@@ -1768,29 +1729,24 @@ impl LayeredTimeline {
|
|||||||
|
|
||||||
/// Flush one frozen in-memory layer to disk, as a new delta layer.
|
/// Flush one frozen in-memory layer to disk, as a new delta layer.
|
||||||
fn flush_frozen_layer(&self, frozen_layer: Arc<InMemoryLayer>) -> Result<()> {
|
fn flush_frozen_layer(&self, frozen_layer: Arc<InMemoryLayer>) -> Result<()> {
|
||||||
let layer_paths_to_upload;
|
let new_delta = frozen_layer.write_to_disk()?;
|
||||||
|
let new_delta_path = new_delta.path();
|
||||||
// As a special case, when we have just imported an image into the repository,
|
|
||||||
// instead of writing out a L0 delta layer, we directly write out image layer
|
|
||||||
// files instead. This is possible as long as *all* the data imported into the
|
|
||||||
// repository have the same LSN.
|
|
||||||
let lsn_range = frozen_layer.get_lsn_range();
|
|
||||||
if lsn_range.start == self.initdb_lsn && lsn_range.end == Lsn(self.initdb_lsn.0 + 1) {
|
|
||||||
let pgdir = tenant_mgr::get_local_timeline_with_load(self.tenant_id, self.timeline_id)?;
|
|
||||||
let (partitioning, _lsn) =
|
|
||||||
pgdir.repartition(self.initdb_lsn, self.get_compaction_target_size())?;
|
|
||||||
layer_paths_to_upload =
|
|
||||||
self.create_image_layers(&partitioning, self.initdb_lsn, true)?;
|
|
||||||
} else {
|
|
||||||
// normal case, write out a L0 delta layer file.
|
|
||||||
let delta_path = self.create_delta_layer(&frozen_layer)?;
|
|
||||||
layer_paths_to_upload = HashSet::from([delta_path]);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Sync the new layer to disk.
|
||||||
|
//
|
||||||
|
// We must also fsync the timeline dir to ensure the directory entries for
|
||||||
|
// new layer files are durable
|
||||||
|
//
|
||||||
|
// TODO: If we're running inside 'flush_frozen_layers' and there are multiple
|
||||||
|
// files to flush, it might be better to first write them all, and then fsync
|
||||||
|
// them all in parallel.
|
||||||
|
par_fsync::par_fsync(&[
|
||||||
|
new_delta_path.clone(),
|
||||||
|
self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
||||||
|
])?;
|
||||||
fail_point!("flush-frozen-before-sync");
|
fail_point!("flush-frozen-before-sync");
|
||||||
|
|
||||||
// The new on-disk layers are now in the layer map. We can remove the
|
// Finally, replace the frozen in-memory layer with the new on-disk layer
|
||||||
// in-memory layer from the map now.
|
|
||||||
{
|
{
|
||||||
let mut layers = self.layers.write().unwrap();
|
let mut layers = self.layers.write().unwrap();
|
||||||
let l = layers.frozen_layers.pop_front();
|
let l = layers.frozen_layers.pop_front();
|
||||||
@@ -1800,27 +1756,19 @@ impl LayeredTimeline {
|
|||||||
// layer to disk at the same time, that would not work.
|
// layer to disk at the same time, that would not work.
|
||||||
assert!(Arc::ptr_eq(&l.unwrap(), &frozen_layer));
|
assert!(Arc::ptr_eq(&l.unwrap(), &frozen_layer));
|
||||||
|
|
||||||
|
// Add the new delta layer to the LayerMap
|
||||||
|
layers.insert_historic(Arc::new(new_delta));
|
||||||
|
|
||||||
// release lock on 'layers'
|
// release lock on 'layers'
|
||||||
}
|
}
|
||||||
|
|
||||||
fail_point!("checkpoint-after-sync");
|
|
||||||
|
|
||||||
// Update the metadata file, with new 'disk_consistent_lsn'
|
// Update the metadata file, with new 'disk_consistent_lsn'
|
||||||
//
|
//
|
||||||
// TODO: This perhaps should be done in 'flush_frozen_layers', after flushing
|
// TODO: This perhaps should be done in 'flush_frozen_layers', after flushing
|
||||||
// *all* the layers, to avoid fsyncing the file multiple times.
|
// *all* the layers, to avoid fsyncing the file multiple times.
|
||||||
let disk_consistent_lsn = Lsn(lsn_range.end.0 - 1);
|
let disk_consistent_lsn = Lsn(frozen_layer.get_lsn_range().end.0 - 1);
|
||||||
self.update_disk_consistent_lsn(disk_consistent_lsn, layer_paths_to_upload)?;
|
fail_point!("checkpoint-after-sync");
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Update metadata file
|
|
||||||
fn update_disk_consistent_lsn(
|
|
||||||
&self,
|
|
||||||
disk_consistent_lsn: Lsn,
|
|
||||||
layer_paths_to_upload: HashSet<PathBuf>,
|
|
||||||
) -> Result<()> {
|
|
||||||
// If we were able to advance 'disk_consistent_lsn', save it the metadata file.
|
// If we were able to advance 'disk_consistent_lsn', save it the metadata file.
|
||||||
// After crash, we will restart WAL streaming and processing from that point.
|
// After crash, we will restart WAL streaming and processing from that point.
|
||||||
let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
|
let old_disk_consistent_lsn = self.disk_consistent_lsn.load();
|
||||||
@@ -1870,11 +1818,14 @@ impl LayeredTimeline {
|
|||||||
false,
|
false,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
NUM_PERSISTENT_FILES_CREATED.inc_by(1);
|
||||||
|
PERSISTENT_BYTES_WRITTEN.inc_by(new_delta_path.metadata()?.len());
|
||||||
|
|
||||||
if self.upload_layers.load(atomic::Ordering::Relaxed) {
|
if self.upload_layers.load(atomic::Ordering::Relaxed) {
|
||||||
storage_sync::schedule_layer_upload(
|
storage_sync::schedule_layer_upload(
|
||||||
self.tenant_id,
|
self.tenant_id,
|
||||||
self.timeline_id,
|
self.timeline_id,
|
||||||
layer_paths_to_upload,
|
HashSet::from([new_delta_path]),
|
||||||
Some(metadata),
|
Some(metadata),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -1886,37 +1837,6 @@ impl LayeredTimeline {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write out the given frozen in-memory layer as a new L0 delta file
|
|
||||||
fn create_delta_layer(&self, frozen_layer: &InMemoryLayer) -> Result<PathBuf> {
|
|
||||||
// Write it out
|
|
||||||
let new_delta = frozen_layer.write_to_disk()?;
|
|
||||||
let new_delta_path = new_delta.path();
|
|
||||||
|
|
||||||
// Sync it to disk.
|
|
||||||
//
|
|
||||||
// We must also fsync the timeline dir to ensure the directory entries for
|
|
||||||
// new layer files are durable
|
|
||||||
//
|
|
||||||
// TODO: If we're running inside 'flush_frozen_layers' and there are multiple
|
|
||||||
// files to flush, it might be better to first write them all, and then fsync
|
|
||||||
// them all in parallel.
|
|
||||||
par_fsync::par_fsync(&[
|
|
||||||
new_delta_path.clone(),
|
|
||||||
self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
|
||||||
])?;
|
|
||||||
|
|
||||||
// Add it to the layer map
|
|
||||||
{
|
|
||||||
let mut layers = self.layers.write().unwrap();
|
|
||||||
layers.insert_historic(Arc::new(new_delta));
|
|
||||||
}
|
|
||||||
|
|
||||||
NUM_PERSISTENT_FILES_CREATED.inc_by(1);
|
|
||||||
PERSISTENT_BYTES_WRITTEN.inc_by(new_delta_path.metadata()?.len());
|
|
||||||
|
|
||||||
Ok(new_delta_path)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn compact(&self) -> Result<()> {
|
pub fn compact(&self) -> Result<()> {
|
||||||
//
|
//
|
||||||
// High level strategy for compaction / image creation:
|
// High level strategy for compaction / image creation:
|
||||||
@@ -1960,23 +1880,29 @@ impl LayeredTimeline {
|
|||||||
if let Ok(pgdir) =
|
if let Ok(pgdir) =
|
||||||
tenant_mgr::get_local_timeline_with_load(self.tenant_id, self.timeline_id)
|
tenant_mgr::get_local_timeline_with_load(self.tenant_id, self.timeline_id)
|
||||||
{
|
{
|
||||||
// 2. Create new image layers for partitions that have been modified
|
|
||||||
// "enough".
|
|
||||||
let (partitioning, lsn) = pgdir.repartition(
|
let (partitioning, lsn) = pgdir.repartition(
|
||||||
self.get_last_record_lsn(),
|
self.get_last_record_lsn(),
|
||||||
self.get_compaction_target_size(),
|
self.get_compaction_target_size(),
|
||||||
)?;
|
)?;
|
||||||
let layer_paths_to_upload = self.create_image_layers(&partitioning, lsn, false)?;
|
let timer = self.create_images_time_histo.start_timer();
|
||||||
if !layer_paths_to_upload.is_empty()
|
// 2. Create new image layers for partitions that have been modified
|
||||||
&& self.upload_layers.load(atomic::Ordering::Relaxed)
|
// "enough".
|
||||||
{
|
let mut layer_paths_to_upload = HashSet::with_capacity(partitioning.parts.len());
|
||||||
|
for part in partitioning.parts.iter() {
|
||||||
|
if self.time_for_new_image_layer(part, lsn)? {
|
||||||
|
let new_path = self.create_image_layer(part, lsn)?;
|
||||||
|
layer_paths_to_upload.insert(new_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if self.upload_layers.load(atomic::Ordering::Relaxed) {
|
||||||
storage_sync::schedule_layer_upload(
|
storage_sync::schedule_layer_upload(
|
||||||
self.tenant_id,
|
self.tenant_id,
|
||||||
self.timeline_id,
|
self.timeline_id,
|
||||||
HashSet::from_iter(layer_paths_to_upload),
|
layer_paths_to_upload,
|
||||||
None,
|
None,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
timer.stop_and_record();
|
||||||
|
|
||||||
// 3. Compact
|
// 3. Compact
|
||||||
let timer = self.compact_time_histo.start_timer();
|
let timer = self.compact_time_histo.start_timer();
|
||||||
@@ -2030,40 +1956,21 @@ impl LayeredTimeline {
|
|||||||
Ok(false)
|
Ok(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_image_layers(
|
fn create_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> anyhow::Result<PathBuf> {
|
||||||
&self,
|
let img_range =
|
||||||
partitioning: &KeyPartitioning,
|
partition.ranges.first().unwrap().start..partition.ranges.last().unwrap().end;
|
||||||
lsn: Lsn,
|
let mut image_layer_writer =
|
||||||
force: bool,
|
ImageLayerWriter::new(self.conf, self.timeline_id, self.tenant_id, &img_range, lsn)?;
|
||||||
) -> Result<HashSet<PathBuf>> {
|
|
||||||
let timer = self.create_images_time_histo.start_timer();
|
|
||||||
let mut image_layers: Vec<ImageLayer> = Vec::new();
|
|
||||||
let mut layer_paths_to_upload = HashSet::new();
|
|
||||||
for partition in partitioning.parts.iter() {
|
|
||||||
if force || self.time_for_new_image_layer(partition, lsn)? {
|
|
||||||
let img_range =
|
|
||||||
partition.ranges.first().unwrap().start..partition.ranges.last().unwrap().end;
|
|
||||||
let mut image_layer_writer = ImageLayerWriter::new(
|
|
||||||
self.conf,
|
|
||||||
self.timeline_id,
|
|
||||||
self.tenant_id,
|
|
||||||
&img_range,
|
|
||||||
lsn,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
for range in &partition.ranges {
|
for range in &partition.ranges {
|
||||||
let mut key = range.start;
|
let mut key = range.start;
|
||||||
while key < range.end {
|
while key < range.end {
|
||||||
let img = self.get(key, lsn)?;
|
let img = self.get(key, lsn)?;
|
||||||
image_layer_writer.put_image(key, &img)?;
|
image_layer_writer.put_image(key, &img)?;
|
||||||
key = key.next();
|
key = key.next();
|
||||||
}
|
|
||||||
}
|
|
||||||
let image_layer = image_layer_writer.finish()?;
|
|
||||||
layer_paths_to_upload.insert(image_layer.path());
|
|
||||||
image_layers.push(image_layer);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
let image_layer = image_layer_writer.finish()?;
|
||||||
|
|
||||||
// Sync the new layer to disk before adding it to the layer map, to make sure
|
// Sync the new layer to disk before adding it to the layer map, to make sure
|
||||||
// we don't garbage collect something based on the new layer, before it has
|
// we don't garbage collect something based on the new layer, before it has
|
||||||
@@ -2074,18 +1981,19 @@ impl LayeredTimeline {
|
|||||||
//
|
//
|
||||||
// Compaction creates multiple image layers. It would be better to create them all
|
// Compaction creates multiple image layers. It would be better to create them all
|
||||||
// and fsync them all in parallel.
|
// and fsync them all in parallel.
|
||||||
let mut all_paths = Vec::from_iter(layer_paths_to_upload.clone());
|
par_fsync::par_fsync(&[
|
||||||
all_paths.push(self.conf.timeline_path(&self.timeline_id, &self.tenant_id));
|
image_layer.path(),
|
||||||
par_fsync::par_fsync(&all_paths)?;
|
self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
||||||
|
])?;
|
||||||
|
|
||||||
|
// FIXME: Do we need to do something to upload it to remote storage here?
|
||||||
|
|
||||||
let mut layers = self.layers.write().unwrap();
|
let mut layers = self.layers.write().unwrap();
|
||||||
for l in image_layers {
|
let new_path = image_layer.path();
|
||||||
layers.insert_historic(Arc::new(l));
|
layers.insert_historic(Arc::new(image_layer));
|
||||||
}
|
|
||||||
drop(layers);
|
drop(layers);
|
||||||
timer.stop_and_record();
|
|
||||||
|
|
||||||
Ok(layer_paths_to_upload)
|
Ok(new_path)
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
@@ -2608,7 +2516,7 @@ impl Deref for LayeredTimelineWriter<'_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> TimelineWriter<'_> for LayeredTimelineWriter<'a> {
|
impl<'a> TimelineWriter<'_> for LayeredTimelineWriter<'a> {
|
||||||
fn put(&self, key: Key, lsn: Lsn, value: &Value) -> Result<()> {
|
fn put(&self, key: Key, lsn: Lsn, value: Value) -> Result<()> {
|
||||||
self.tl.put_value(key, lsn, value)
|
self.tl.put_value(key, lsn, value)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2750,7 +2658,7 @@ pub mod tests {
|
|||||||
let TEST_KEY: Key = Key::from_hex("112222222233333333444444445500000001").unwrap();
|
let TEST_KEY: Key = Key::from_hex("112222222233333333444444445500000001").unwrap();
|
||||||
|
|
||||||
let writer = tline.writer();
|
let writer = tline.writer();
|
||||||
writer.put(TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?;
|
writer.put(TEST_KEY, Lsn(0x10), Value::Image(TEST_IMG("foo at 0x10")))?;
|
||||||
writer.finish_write(Lsn(0x10));
|
writer.finish_write(Lsn(0x10));
|
||||||
drop(writer);
|
drop(writer);
|
||||||
|
|
||||||
@@ -2758,7 +2666,7 @@ pub mod tests {
|
|||||||
tline.compact()?;
|
tline.compact()?;
|
||||||
|
|
||||||
let writer = tline.writer();
|
let writer = tline.writer();
|
||||||
writer.put(TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?;
|
writer.put(TEST_KEY, Lsn(0x20), Value::Image(TEST_IMG("foo at 0x20")))?;
|
||||||
writer.finish_write(Lsn(0x20));
|
writer.finish_write(Lsn(0x20));
|
||||||
drop(writer);
|
drop(writer);
|
||||||
|
|
||||||
@@ -2766,7 +2674,7 @@ pub mod tests {
|
|||||||
tline.compact()?;
|
tline.compact()?;
|
||||||
|
|
||||||
let writer = tline.writer();
|
let writer = tline.writer();
|
||||||
writer.put(TEST_KEY, Lsn(0x30), &Value::Image(TEST_IMG("foo at 0x30")))?;
|
writer.put(TEST_KEY, Lsn(0x30), Value::Image(TEST_IMG("foo at 0x30")))?;
|
||||||
writer.finish_write(Lsn(0x30));
|
writer.finish_write(Lsn(0x30));
|
||||||
drop(writer);
|
drop(writer);
|
||||||
|
|
||||||
@@ -2774,7 +2682,7 @@ pub mod tests {
|
|||||||
tline.compact()?;
|
tline.compact()?;
|
||||||
|
|
||||||
let writer = tline.writer();
|
let writer = tline.writer();
|
||||||
writer.put(TEST_KEY, Lsn(0x40), &Value::Image(TEST_IMG("foo at 0x40")))?;
|
writer.put(TEST_KEY, Lsn(0x40), Value::Image(TEST_IMG("foo at 0x40")))?;
|
||||||
writer.finish_write(Lsn(0x40));
|
writer.finish_write(Lsn(0x40));
|
||||||
drop(writer);
|
drop(writer);
|
||||||
|
|
||||||
@@ -2812,7 +2720,7 @@ pub mod tests {
|
|||||||
writer.put(
|
writer.put(
|
||||||
test_key,
|
test_key,
|
||||||
lsn,
|
lsn,
|
||||||
&Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
|
Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
|
||||||
)?;
|
)?;
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
drop(writer);
|
drop(writer);
|
||||||
@@ -2858,7 +2766,7 @@ pub mod tests {
|
|||||||
writer.put(
|
writer.put(
|
||||||
test_key,
|
test_key,
|
||||||
lsn,
|
lsn,
|
||||||
&Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
|
Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
|
||||||
)?;
|
)?;
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
updated[blknum] = lsn;
|
updated[blknum] = lsn;
|
||||||
@@ -2876,7 +2784,7 @@ pub mod tests {
|
|||||||
writer.put(
|
writer.put(
|
||||||
test_key,
|
test_key,
|
||||||
lsn,
|
lsn,
|
||||||
&Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
|
Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
|
||||||
)?;
|
)?;
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
drop(writer);
|
drop(writer);
|
||||||
@@ -2928,7 +2836,7 @@ pub mod tests {
|
|||||||
writer.put(
|
writer.put(
|
||||||
test_key,
|
test_key,
|
||||||
lsn,
|
lsn,
|
||||||
&Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
|
Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
|
||||||
)?;
|
)?;
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
updated[blknum] = lsn;
|
updated[blknum] = lsn;
|
||||||
@@ -2940,7 +2848,7 @@ pub mod tests {
|
|||||||
let mut tline_id = TIMELINE_ID;
|
let mut tline_id = TIMELINE_ID;
|
||||||
for _ in 0..50 {
|
for _ in 0..50 {
|
||||||
let new_tline_id = ZTimelineId::generate();
|
let new_tline_id = ZTimelineId::generate();
|
||||||
repo.branch_timeline(tline_id, new_tline_id, Some(lsn))?;
|
repo.branch_timeline(tline_id, new_tline_id, lsn)?;
|
||||||
tline = repo.get_timeline_load(new_tline_id)?;
|
tline = repo.get_timeline_load(new_tline_id)?;
|
||||||
tline_id = new_tline_id;
|
tline_id = new_tline_id;
|
||||||
|
|
||||||
@@ -2952,7 +2860,7 @@ pub mod tests {
|
|||||||
writer.put(
|
writer.put(
|
||||||
test_key,
|
test_key,
|
||||||
lsn,
|
lsn,
|
||||||
&Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
|
Value::Image(TEST_IMG(&format!("{} at {}", blknum, lsn))),
|
||||||
)?;
|
)?;
|
||||||
println!("updating {} at {}", blknum, lsn);
|
println!("updating {} at {}", blknum, lsn);
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
@@ -2999,7 +2907,7 @@ pub mod tests {
|
|||||||
#[allow(clippy::needless_range_loop)]
|
#[allow(clippy::needless_range_loop)]
|
||||||
for idx in 0..NUM_TLINES {
|
for idx in 0..NUM_TLINES {
|
||||||
let new_tline_id = ZTimelineId::generate();
|
let new_tline_id = ZTimelineId::generate();
|
||||||
repo.branch_timeline(tline_id, new_tline_id, Some(lsn))?;
|
repo.branch_timeline(tline_id, new_tline_id, lsn)?;
|
||||||
tline = repo.get_timeline_load(new_tline_id)?;
|
tline = repo.get_timeline_load(new_tline_id)?;
|
||||||
tline_id = new_tline_id;
|
tline_id = new_tline_id;
|
||||||
|
|
||||||
@@ -3011,7 +2919,7 @@ pub mod tests {
|
|||||||
writer.put(
|
writer.put(
|
||||||
test_key,
|
test_key,
|
||||||
lsn,
|
lsn,
|
||||||
&Value::Image(TEST_IMG(&format!("{} {} at {}", idx, blknum, lsn))),
|
Value::Image(TEST_IMG(&format!("{} {} at {}", idx, blknum, lsn))),
|
||||||
)?;
|
)?;
|
||||||
println!("updating [{}][{}] at {}", idx, blknum, lsn);
|
println!("updating [{}][{}] at {}", idx, blknum, lsn);
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
|
|||||||
@@ -267,13 +267,13 @@ impl InMemoryLayer {
|
|||||||
|
|
||||||
/// Common subroutine of the public put_wal_record() and put_page_image() functions.
|
/// Common subroutine of the public put_wal_record() and put_page_image() functions.
|
||||||
/// Adds the page version to the in-memory tree
|
/// Adds the page version to the in-memory tree
|
||||||
pub fn put_value(&self, key: Key, lsn: Lsn, val: &Value) -> Result<()> {
|
pub fn put_value(&self, key: Key, lsn: Lsn, val: Value) -> Result<()> {
|
||||||
trace!("put_value key {} at {}/{}", key, self.timelineid, lsn);
|
trace!("put_value key {} at {}/{}", key, self.timelineid, lsn);
|
||||||
let mut inner = self.inner.write().unwrap();
|
let mut inner = self.inner.write().unwrap();
|
||||||
|
|
||||||
inner.assert_writeable();
|
inner.assert_writeable();
|
||||||
|
|
||||||
let off = inner.file.write_blob(&Value::ser(val)?)?;
|
let off = inner.file.write_blob(&Value::ser(&val)?)?;
|
||||||
|
|
||||||
let vec_map = inner.index.entry(key).or_default();
|
let vec_map = inner.index.entry(key).or_default();
|
||||||
let old = vec_map.append_or_update_last(lsn, off).unwrap().0;
|
let old = vec_map.append_or_update_last(lsn, off).unwrap().0;
|
||||||
|
|||||||
@@ -772,7 +772,6 @@ impl PageServerHandler {
|
|||||||
pgb: &mut PostgresBackend,
|
pgb: &mut PostgresBackend,
|
||||||
timelineid: ZTimelineId,
|
timelineid: ZTimelineId,
|
||||||
lsn: Option<Lsn>,
|
lsn: Option<Lsn>,
|
||||||
prev_lsn: Option<Lsn>,
|
|
||||||
tenantid: ZTenantId,
|
tenantid: ZTenantId,
|
||||||
full_backup: bool,
|
full_backup: bool,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
@@ -797,8 +796,7 @@ impl PageServerHandler {
|
|||||||
{
|
{
|
||||||
let mut writer = CopyDataSink { pgb };
|
let mut writer = CopyDataSink { pgb };
|
||||||
|
|
||||||
let basebackup =
|
let basebackup = basebackup::Basebackup::new(&mut writer, &timeline, lsn, full_backup)?;
|
||||||
basebackup::Basebackup::new(&mut writer, &timeline, lsn, prev_lsn, full_backup)?;
|
|
||||||
span.record("lsn", &basebackup.lsn.to_string().as_str());
|
span.record("lsn", &basebackup.lsn.to_string().as_str());
|
||||||
basebackup.send_tarball()?;
|
basebackup.send_tarball()?;
|
||||||
}
|
}
|
||||||
@@ -901,67 +899,33 @@ impl postgres_backend::Handler for PageServerHandler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Check that the timeline exists
|
// Check that the timeline exists
|
||||||
self.handle_basebackup_request(pgb, timelineid, lsn, None, tenantid, false)?;
|
self.handle_basebackup_request(pgb, timelineid, lsn, tenantid, false)?;
|
||||||
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||||
}
|
}
|
||||||
// return pair of prev_lsn and last_lsn
|
|
||||||
else if query_string.starts_with("get_last_record_rlsn ") {
|
|
||||||
let (_, params_raw) = query_string.split_at("get_last_record_rlsn ".len());
|
|
||||||
let params = params_raw.split_whitespace().collect::<Vec<_>>();
|
|
||||||
|
|
||||||
ensure!(
|
|
||||||
params.len() == 2,
|
|
||||||
"invalid param number for get_last_record_rlsn command"
|
|
||||||
);
|
|
||||||
|
|
||||||
let tenantid = ZTenantId::from_str(params[0])?;
|
|
||||||
let timelineid = ZTimelineId::from_str(params[1])?;
|
|
||||||
|
|
||||||
self.check_permission(Some(tenantid))?;
|
|
||||||
let timeline = tenant_mgr::get_local_timeline_with_load(tenantid, timelineid)
|
|
||||||
.context("Cannot load local timeline")?;
|
|
||||||
|
|
||||||
let end_of_timeline = timeline.tline.get_last_record_rlsn();
|
|
||||||
|
|
||||||
pgb.write_message_noflush(&BeMessage::RowDescription(&[
|
|
||||||
RowDescriptor::text_col(b"prev_lsn"),
|
|
||||||
RowDescriptor::text_col(b"last_lsn"),
|
|
||||||
]))?
|
|
||||||
.write_message_noflush(&BeMessage::DataRow(&[
|
|
||||||
Some(end_of_timeline.prev.to_string().as_bytes()),
|
|
||||||
Some(end_of_timeline.last.to_string().as_bytes()),
|
|
||||||
]))?
|
|
||||||
.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
|
||||||
}
|
|
||||||
// same as basebackup, but result includes relational data as well
|
// same as basebackup, but result includes relational data as well
|
||||||
else if query_string.starts_with("fullbackup ") {
|
else if query_string.starts_with("fullbackup ") {
|
||||||
let (_, params_raw) = query_string.split_at("fullbackup ".len());
|
let (_, params_raw) = query_string.split_at("fullbackup ".len());
|
||||||
let params = params_raw.split_whitespace().collect::<Vec<_>>();
|
let params = params_raw.split_whitespace().collect::<Vec<_>>();
|
||||||
|
|
||||||
ensure!(
|
ensure!(
|
||||||
params.len() >= 2,
|
params.len() == 3,
|
||||||
"invalid param number for fullbackup command"
|
"invalid param number for fullbackup command"
|
||||||
);
|
);
|
||||||
|
|
||||||
let tenantid = ZTenantId::from_str(params[0])?;
|
let tenantid = ZTenantId::from_str(params[0])?;
|
||||||
let timelineid = ZTimelineId::from_str(params[1])?;
|
let timelineid = ZTimelineId::from_str(params[1])?;
|
||||||
|
|
||||||
// The caller is responsible for providing correct lsn and prev_lsn.
|
|
||||||
let lsn = if params.len() > 2 {
|
|
||||||
Some(Lsn::from_str(params[2])?)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
let prev_lsn = if params.len() > 3 {
|
|
||||||
Some(Lsn::from_str(params[3])?)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
self.check_permission(Some(tenantid))?;
|
self.check_permission(Some(tenantid))?;
|
||||||
|
|
||||||
|
// Lsn is required for fullbackup, because otherwise we would not know
|
||||||
|
// at which lsn to upload this backup.
|
||||||
|
//
|
||||||
|
// The caller is responsible for providing a valid lsn
|
||||||
|
// and using it in the subsequent import.
|
||||||
|
let lsn = Some(Lsn::from_str(params[2])?);
|
||||||
|
|
||||||
// Check that the timeline exists
|
// Check that the timeline exists
|
||||||
self.handle_basebackup_request(pgb, timelineid, lsn, prev_lsn, tenantid, true)?;
|
self.handle_basebackup_request(pgb, timelineid, lsn, tenantid, true)?;
|
||||||
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
|
||||||
} else if query_string.starts_with("import basebackup ") {
|
} else if query_string.starts_with("import basebackup ") {
|
||||||
// Import the `base` section (everything but the wal) of a basebackup.
|
// Import the `base` section (everything but the wal) of a basebackup.
|
||||||
@@ -987,10 +951,7 @@ impl postgres_backend::Handler for PageServerHandler {
|
|||||||
|
|
||||||
match self.handle_import_basebackup(pgb, tenant, timeline, base_lsn, end_lsn) {
|
match self.handle_import_basebackup(pgb, tenant, timeline, base_lsn, end_lsn) {
|
||||||
Ok(()) => pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?,
|
Ok(()) => pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?,
|
||||||
Err(e) => {
|
Err(e) => pgb.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?,
|
||||||
error!("error importing base backup between {base_lsn} and {end_lsn}: {e:?}");
|
|
||||||
pgb.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
} else if query_string.starts_with("import wal ") {
|
} else if query_string.starts_with("import wal ") {
|
||||||
// Import the `pg_wal` section of a basebackup.
|
// Import the `pg_wal` section of a basebackup.
|
||||||
@@ -1009,10 +970,7 @@ impl postgres_backend::Handler for PageServerHandler {
|
|||||||
|
|
||||||
match self.handle_import_wal(pgb, tenant, timeline, start_lsn, end_lsn) {
|
match self.handle_import_wal(pgb, tenant, timeline, start_lsn, end_lsn) {
|
||||||
Ok(()) => pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?,
|
Ok(()) => pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?,
|
||||||
Err(e) => {
|
Err(e) => pgb.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?,
|
||||||
error!("error importing WAL between {start_lsn} and {end_lsn}: {e:?}");
|
|
||||||
pgb.write_message_noflush(&BeMessage::ErrorResponse(&e.to_string()))?
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
} else if query_string.to_ascii_lowercase().starts_with("set ") {
|
} else if query_string.to_ascii_lowercase().starts_with("set ") {
|
||||||
// important because psycopg2 executes "SET datestyle TO 'ISO'"
|
// important because psycopg2 executes "SET datestyle TO 'ISO'"
|
||||||
|
|||||||
@@ -902,57 +902,6 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
///
|
|
||||||
/// Flush changes accumulated so far to the underlying repository.
|
|
||||||
///
|
|
||||||
/// Usually, changes made in DatadirModification are atomic, but this allows
|
|
||||||
/// you to flush them to the underlying repository before the final `commit`.
|
|
||||||
/// That allows to free up the memory used to hold the pending changes.
|
|
||||||
///
|
|
||||||
/// Currently only used during bulk import of a data directory. In that
|
|
||||||
/// context, breaking the atomicity is OK. If the import is interrupted, the
|
|
||||||
/// whole import fails and the timeline will be deleted anyway.
|
|
||||||
/// (Or to be precise, it will be left behind for debugging purposes and
|
|
||||||
/// ignored, see https://github.com/neondatabase/neon/pull/1809)
|
|
||||||
///
|
|
||||||
/// Note: A consequence of flushing the pending operations is that they
|
|
||||||
/// won't be visible to subsequent operations until `commit`. The function
|
|
||||||
/// retains all the metadata, but data pages are flushed. That's again OK
|
|
||||||
/// for bulk import, where you are just loading data pages and won't try to
|
|
||||||
/// modify the same pages twice.
|
|
||||||
pub fn flush(&mut self) -> Result<()> {
|
|
||||||
// Unless we have accumulated a decent amount of changes, it's not worth it
|
|
||||||
// to scan through the pending_updates list.
|
|
||||||
let pending_nblocks = self.pending_nblocks;
|
|
||||||
if pending_nblocks < 10000 {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let writer = self.tline.tline.writer();
|
|
||||||
|
|
||||||
// Flush relation and SLRU data blocks, keep metadata.
|
|
||||||
let mut result: Result<()> = Ok(());
|
|
||||||
self.pending_updates.retain(|&key, value| {
|
|
||||||
if result.is_ok() && (is_rel_block_key(key) || is_slru_block_key(key)) {
|
|
||||||
result = writer.put(key, self.lsn, value);
|
|
||||||
false
|
|
||||||
} else {
|
|
||||||
true
|
|
||||||
}
|
|
||||||
});
|
|
||||||
result?;
|
|
||||||
|
|
||||||
if pending_nblocks != 0 {
|
|
||||||
self.tline.current_logical_size.fetch_add(
|
|
||||||
pending_nblocks * pg_constants::BLCKSZ as isize,
|
|
||||||
Ordering::SeqCst,
|
|
||||||
);
|
|
||||||
self.pending_nblocks = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Finish this atomic update, writing all the updated keys to the
|
/// Finish this atomic update, writing all the updated keys to the
|
||||||
/// underlying timeline.
|
/// underlying timeline.
|
||||||
@@ -963,7 +912,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> {
|
|||||||
let pending_nblocks = self.pending_nblocks;
|
let pending_nblocks = self.pending_nblocks;
|
||||||
|
|
||||||
for (key, value) in self.pending_updates {
|
for (key, value) in self.pending_updates {
|
||||||
writer.put(key, self.lsn, &value)?;
|
writer.put(key, self.lsn, value)?;
|
||||||
}
|
}
|
||||||
for key_range in self.pending_deletions {
|
for key_range in self.pending_deletions {
|
||||||
writer.delete(key_range.clone(), self.lsn)?;
|
writer.delete(key_range.clone(), self.lsn)?;
|
||||||
@@ -1368,10 +1317,6 @@ pub fn key_to_rel_block(key: Key) -> Result<(RelTag, BlockNumber)> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_rel_block_key(key: Key) -> bool {
|
|
||||||
key.field1 == 0x00 && key.field4 != 0
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn key_to_slru_block(key: Key) -> Result<(SlruKind, u32, BlockNumber)> {
|
pub fn key_to_slru_block(key: Key) -> Result<(SlruKind, u32, BlockNumber)> {
|
||||||
Ok(match key.field1 {
|
Ok(match key.field1 {
|
||||||
0x01 => {
|
0x01 => {
|
||||||
@@ -1390,12 +1335,6 @@ pub fn key_to_slru_block(key: Key) -> Result<(SlruKind, u32, BlockNumber)> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_slru_block_key(key: Key) -> bool {
|
|
||||||
key.field1 == 0x01 // SLRU-related
|
|
||||||
&& key.field3 == 0x00000001 // but not SlruDir
|
|
||||||
&& key.field6 != 0xffffffff // and not SlruSegSize
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
//
|
||||||
//-- Tests that should work the same with any Repository/Timeline implementation.
|
//-- Tests that should work the same with any Repository/Timeline implementation.
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ use byteorder::{ByteOrder, BE};
|
|||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
use std::fmt::Display;
|
||||||
use std::ops::{AddAssign, Range};
|
use std::ops::{AddAssign, Range};
|
||||||
use std::sync::{Arc, RwLockReadGuard};
|
use std::sync::{Arc, RwLockReadGuard};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
@@ -181,6 +182,20 @@ impl Value {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub enum TimelineSyncStatusUpdate {
|
||||||
|
Downloaded,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for TimelineSyncStatusUpdate {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let s = match self {
|
||||||
|
TimelineSyncStatusUpdate::Downloaded => "Downloaded",
|
||||||
|
};
|
||||||
|
f.write_str(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
/// A repository corresponds to one .neon directory. One repository holds multiple
|
/// A repository corresponds to one .neon directory. One repository holds multiple
|
||||||
/// timelines, forked off from the same initial call to 'initdb'.
|
/// timelines, forked off from the same initial call to 'initdb'.
|
||||||
@@ -189,7 +204,11 @@ pub trait Repository: Send + Sync {
|
|||||||
|
|
||||||
/// Updates timeline based on the `TimelineSyncStatusUpdate`, received from the remote storage synchronization.
|
/// Updates timeline based on the `TimelineSyncStatusUpdate`, received from the remote storage synchronization.
|
||||||
/// See [`crate::remote_storage`] for more details about the synchronization.
|
/// See [`crate::remote_storage`] for more details about the synchronization.
|
||||||
fn attach_timeline(&self, timeline_id: ZTimelineId) -> Result<()>;
|
fn apply_timeline_remote_sync_status_update(
|
||||||
|
&self,
|
||||||
|
timeline_id: ZTimelineId,
|
||||||
|
timeline_sync_status_update: TimelineSyncStatusUpdate,
|
||||||
|
) -> Result<()>;
|
||||||
|
|
||||||
/// Get Timeline handle for given zenith timeline ID.
|
/// Get Timeline handle for given zenith timeline ID.
|
||||||
/// This function is idempotent. It doesn't change internal state in any way.
|
/// This function is idempotent. It doesn't change internal state in any way.
|
||||||
@@ -211,12 +230,7 @@ pub trait Repository: Send + Sync {
|
|||||||
) -> Result<Arc<Self::Timeline>>;
|
) -> Result<Arc<Self::Timeline>>;
|
||||||
|
|
||||||
/// Branch a timeline
|
/// Branch a timeline
|
||||||
fn branch_timeline(
|
fn branch_timeline(&self, src: ZTimelineId, dst: ZTimelineId, start_lsn: Lsn) -> Result<()>;
|
||||||
&self,
|
|
||||||
src: ZTimelineId,
|
|
||||||
dst: ZTimelineId,
|
|
||||||
start_lsn: Option<Lsn>,
|
|
||||||
) -> Result<()>;
|
|
||||||
|
|
||||||
/// Flush all data to disk.
|
/// Flush all data to disk.
|
||||||
///
|
///
|
||||||
@@ -246,10 +260,10 @@ pub trait Repository: Send + Sync {
|
|||||||
/// api's 'compact' command.
|
/// api's 'compact' command.
|
||||||
fn compaction_iteration(&self) -> Result<()>;
|
fn compaction_iteration(&self) -> Result<()>;
|
||||||
|
|
||||||
/// removes timeline-related in-memory data
|
/// detaches timeline-related in-memory data.
|
||||||
fn delete_timeline(&self, timeline_id: ZTimelineId) -> anyhow::Result<()>;
|
fn detach_timeline(&self, timeline_id: ZTimelineId) -> Result<()>;
|
||||||
|
|
||||||
/// Allows to retrieve remote timeline index from the repo. Used in walreceiver to grab remote consistent lsn.
|
// Allows to retrieve remote timeline index from the repo. Used in walreceiver to grab remote consistent lsn.
|
||||||
fn get_remote_index(&self) -> &RemoteIndex;
|
fn get_remote_index(&self) -> &RemoteIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -393,7 +407,7 @@ pub trait TimelineWriter<'a> {
|
|||||||
///
|
///
|
||||||
/// This will implicitly extend the relation, if the page is beyond the
|
/// This will implicitly extend the relation, if the page is beyond the
|
||||||
/// current end-of-file.
|
/// current end-of-file.
|
||||||
fn put(&self, key: Key, lsn: Lsn, value: &Value) -> Result<()>;
|
fn put(&self, key: Key, lsn: Lsn, value: Value) -> Result<()>;
|
||||||
|
|
||||||
fn delete(&self, key_range: Range<Key>, lsn: Lsn) -> Result<()>;
|
fn delete(&self, key_range: Range<Key>, lsn: Lsn) -> Result<()>;
|
||||||
|
|
||||||
@@ -523,7 +537,7 @@ pub mod repo_harness {
|
|||||||
TenantConfOpt::from(self.tenant_conf),
|
TenantConfOpt::from(self.tenant_conf),
|
||||||
walredo_mgr,
|
walredo_mgr,
|
||||||
self.tenant_id,
|
self.tenant_id,
|
||||||
RemoteIndex::default(),
|
RemoteIndex::empty(),
|
||||||
false,
|
false,
|
||||||
);
|
);
|
||||||
// populate repo with locally available timelines
|
// populate repo with locally available timelines
|
||||||
@@ -539,7 +553,10 @@ pub mod repo_harness {
|
|||||||
.parse()
|
.parse()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
repo.attach_timeline(timeline_id)?;
|
repo.apply_timeline_remote_sync_status_update(
|
||||||
|
timeline_id,
|
||||||
|
TimelineSyncStatusUpdate::Downloaded,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(repo)
|
Ok(repo)
|
||||||
@@ -603,12 +620,12 @@ mod tests {
|
|||||||
let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
|
let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
|
||||||
|
|
||||||
let writer = tline.writer();
|
let writer = tline.writer();
|
||||||
writer.put(*TEST_KEY, Lsn(0x10), &Value::Image(TEST_IMG("foo at 0x10")))?;
|
writer.put(*TEST_KEY, Lsn(0x10), Value::Image(TEST_IMG("foo at 0x10")))?;
|
||||||
writer.finish_write(Lsn(0x10));
|
writer.finish_write(Lsn(0x10));
|
||||||
drop(writer);
|
drop(writer);
|
||||||
|
|
||||||
let writer = tline.writer();
|
let writer = tline.writer();
|
||||||
writer.put(*TEST_KEY, Lsn(0x20), &Value::Image(TEST_IMG("foo at 0x20")))?;
|
writer.put(*TEST_KEY, Lsn(0x20), Value::Image(TEST_IMG("foo at 0x20")))?;
|
||||||
writer.finish_write(Lsn(0x20));
|
writer.finish_write(Lsn(0x20));
|
||||||
drop(writer);
|
drop(writer);
|
||||||
|
|
||||||
@@ -655,24 +672,24 @@ mod tests {
|
|||||||
let TEST_KEY_B: Key = Key::from_hex("112222222233333333444444445500000002").unwrap();
|
let TEST_KEY_B: Key = Key::from_hex("112222222233333333444444445500000002").unwrap();
|
||||||
|
|
||||||
// Insert a value on the timeline
|
// Insert a value on the timeline
|
||||||
writer.put(TEST_KEY_A, Lsn(0x20), &test_value("foo at 0x20"))?;
|
writer.put(TEST_KEY_A, Lsn(0x20), test_value("foo at 0x20"))?;
|
||||||
writer.put(TEST_KEY_B, Lsn(0x20), &test_value("foobar at 0x20"))?;
|
writer.put(TEST_KEY_B, Lsn(0x20), test_value("foobar at 0x20"))?;
|
||||||
writer.finish_write(Lsn(0x20));
|
writer.finish_write(Lsn(0x20));
|
||||||
|
|
||||||
writer.put(TEST_KEY_A, Lsn(0x30), &test_value("foo at 0x30"))?;
|
writer.put(TEST_KEY_A, Lsn(0x30), test_value("foo at 0x30"))?;
|
||||||
writer.finish_write(Lsn(0x30));
|
writer.finish_write(Lsn(0x30));
|
||||||
writer.put(TEST_KEY_A, Lsn(0x40), &test_value("foo at 0x40"))?;
|
writer.put(TEST_KEY_A, Lsn(0x40), test_value("foo at 0x40"))?;
|
||||||
writer.finish_write(Lsn(0x40));
|
writer.finish_write(Lsn(0x40));
|
||||||
|
|
||||||
//assert_current_logical_size(&tline, Lsn(0x40));
|
//assert_current_logical_size(&tline, Lsn(0x40));
|
||||||
|
|
||||||
// Branch the history, modify relation differently on the new timeline
|
// Branch the history, modify relation differently on the new timeline
|
||||||
repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x30)))?;
|
repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x30))?;
|
||||||
let newtline = repo
|
let newtline = repo
|
||||||
.get_timeline_load(NEW_TIMELINE_ID)
|
.get_timeline_load(NEW_TIMELINE_ID)
|
||||||
.expect("Should have a local timeline");
|
.expect("Should have a local timeline");
|
||||||
let new_writer = newtline.writer();
|
let new_writer = newtline.writer();
|
||||||
new_writer.put(TEST_KEY_A, Lsn(0x40), &test_value("bar at 0x40"))?;
|
new_writer.put(TEST_KEY_A, Lsn(0x40), test_value("bar at 0x40"))?;
|
||||||
new_writer.finish_write(Lsn(0x40));
|
new_writer.finish_write(Lsn(0x40));
|
||||||
|
|
||||||
// Check page contents on both branches
|
// Check page contents on both branches
|
||||||
@@ -703,14 +720,14 @@ mod tests {
|
|||||||
writer.put(
|
writer.put(
|
||||||
*TEST_KEY,
|
*TEST_KEY,
|
||||||
lsn,
|
lsn,
|
||||||
&Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
|
Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
|
||||||
)?;
|
)?;
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
lsn += 0x10;
|
lsn += 0x10;
|
||||||
writer.put(
|
writer.put(
|
||||||
*TEST_KEY,
|
*TEST_KEY,
|
||||||
lsn,
|
lsn,
|
||||||
&Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
|
Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
|
||||||
)?;
|
)?;
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
lsn += 0x10;
|
lsn += 0x10;
|
||||||
@@ -721,14 +738,14 @@ mod tests {
|
|||||||
writer.put(
|
writer.put(
|
||||||
*TEST_KEY,
|
*TEST_KEY,
|
||||||
lsn,
|
lsn,
|
||||||
&Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
|
Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
|
||||||
)?;
|
)?;
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
lsn += 0x10;
|
lsn += 0x10;
|
||||||
writer.put(
|
writer.put(
|
||||||
*TEST_KEY,
|
*TEST_KEY,
|
||||||
lsn,
|
lsn,
|
||||||
&Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
|
Value::Image(TEST_IMG(&format!("foo at {}", lsn))),
|
||||||
)?;
|
)?;
|
||||||
writer.finish_write(lsn);
|
writer.finish_write(lsn);
|
||||||
}
|
}
|
||||||
@@ -749,7 +766,7 @@ mod tests {
|
|||||||
repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?;
|
repo.gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, false)?;
|
||||||
|
|
||||||
// try to branch at lsn 25, should fail because we already garbage collected the data
|
// try to branch at lsn 25, should fail because we already garbage collected the data
|
||||||
match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) {
|
match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x25)) {
|
||||||
Ok(_) => panic!("branching should have failed"),
|
Ok(_) => panic!("branching should have failed"),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
assert!(err.to_string().contains("invalid branch start lsn"));
|
assert!(err.to_string().contains("invalid branch start lsn"));
|
||||||
@@ -770,7 +787,7 @@ mod tests {
|
|||||||
|
|
||||||
repo.create_empty_timeline(TIMELINE_ID, Lsn(0x50))?;
|
repo.create_empty_timeline(TIMELINE_ID, Lsn(0x50))?;
|
||||||
// try to branch at lsn 0x25, should fail because initdb lsn is 0x50
|
// try to branch at lsn 0x25, should fail because initdb lsn is 0x50
|
||||||
match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x25))) {
|
match repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x25)) {
|
||||||
Ok(_) => panic!("branching should have failed"),
|
Ok(_) => panic!("branching should have failed"),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
assert!(&err.to_string().contains("invalid branch start lsn"));
|
assert!(&err.to_string().contains("invalid branch start lsn"));
|
||||||
@@ -815,7 +832,7 @@ mod tests {
|
|||||||
let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
|
let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
|
||||||
make_some_layers(tline.as_ref(), Lsn(0x20))?;
|
make_some_layers(tline.as_ref(), Lsn(0x20))?;
|
||||||
|
|
||||||
repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
|
repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;
|
||||||
let newtline = repo
|
let newtline = repo
|
||||||
.get_timeline_load(NEW_TIMELINE_ID)
|
.get_timeline_load(NEW_TIMELINE_ID)
|
||||||
.expect("Should have a local timeline");
|
.expect("Should have a local timeline");
|
||||||
@@ -831,7 +848,7 @@ mod tests {
|
|||||||
let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
|
let tline = repo.create_empty_timeline(TIMELINE_ID, Lsn(0))?;
|
||||||
make_some_layers(tline.as_ref(), Lsn(0x20))?;
|
make_some_layers(tline.as_ref(), Lsn(0x20))?;
|
||||||
|
|
||||||
repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
|
repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;
|
||||||
let newtline = repo
|
let newtline = repo
|
||||||
.get_timeline_load(NEW_TIMELINE_ID)
|
.get_timeline_load(NEW_TIMELINE_ID)
|
||||||
.expect("Should have a local timeline");
|
.expect("Should have a local timeline");
|
||||||
@@ -889,7 +906,7 @@ mod tests {
|
|||||||
make_some_layers(tline.as_ref(), Lsn(0x20))?;
|
make_some_layers(tline.as_ref(), Lsn(0x20))?;
|
||||||
tline.checkpoint(CheckpointConfig::Forced)?;
|
tline.checkpoint(CheckpointConfig::Forced)?;
|
||||||
|
|
||||||
repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Some(Lsn(0x40)))?;
|
repo.branch_timeline(TIMELINE_ID, NEW_TIMELINE_ID, Lsn(0x40))?;
|
||||||
|
|
||||||
let newtline = repo
|
let newtline = repo
|
||||||
.get_timeline_load(NEW_TIMELINE_ID)
|
.get_timeline_load(NEW_TIMELINE_ID)
|
||||||
|
|||||||
@@ -178,8 +178,9 @@ use crate::{
|
|||||||
metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME},
|
metadata::{metadata_path, TimelineMetadata, METADATA_FILE_NAME},
|
||||||
LayeredRepository,
|
LayeredRepository,
|
||||||
},
|
},
|
||||||
|
repository::TimelineSyncStatusUpdate,
|
||||||
storage_sync::{self, index::RemoteIndex},
|
storage_sync::{self, index::RemoteIndex},
|
||||||
tenant_mgr::attach_downloaded_tenants,
|
tenant_mgr::apply_timeline_sync_status_updates,
|
||||||
thread_mgr,
|
thread_mgr,
|
||||||
thread_mgr::ThreadKind,
|
thread_mgr::ThreadKind,
|
||||||
};
|
};
|
||||||
@@ -190,8 +191,7 @@ use metrics::{
|
|||||||
};
|
};
|
||||||
use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
|
use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
|
||||||
|
|
||||||
use self::download::download_index_parts;
|
pub use self::download::download_index_part;
|
||||||
pub use self::download::gather_tenant_timelines_index_parts;
|
|
||||||
pub use self::download::TEMP_DOWNLOAD_EXTENSION;
|
pub use self::download::TEMP_DOWNLOAD_EXTENSION;
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
@@ -301,7 +301,7 @@ pub fn start_local_timeline_sync(
|
|||||||
}
|
}
|
||||||
Ok(SyncStartupData {
|
Ok(SyncStartupData {
|
||||||
local_timeline_init_statuses,
|
local_timeline_init_statuses,
|
||||||
remote_index: RemoteIndex::default(),
|
remote_index: RemoteIndex::empty(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -835,7 +835,7 @@ where
|
|||||||
.build()
|
.build()
|
||||||
.context("Failed to create storage sync runtime")?;
|
.context("Failed to create storage sync runtime")?;
|
||||||
|
|
||||||
let applicable_index_parts = runtime.block_on(download_index_parts(
|
let applicable_index_parts = runtime.block_on(try_fetch_index_parts(
|
||||||
conf,
|
conf,
|
||||||
&storage,
|
&storage,
|
||||||
local_timeline_files.keys().copied().collect(),
|
local_timeline_files.keys().copied().collect(),
|
||||||
@@ -918,48 +918,16 @@ fn storage_sync_loop<P, S>(
|
|||||||
});
|
});
|
||||||
|
|
||||||
match loop_step {
|
match loop_step {
|
||||||
ControlFlow::Continue(updated_tenants) => {
|
ControlFlow::Continue(new_timeline_states) => {
|
||||||
if updated_tenants.is_empty() {
|
if new_timeline_states.is_empty() {
|
||||||
debug!("Sync loop step completed, no new tenant states");
|
debug!("Sync loop step completed, no new timeline states");
|
||||||
} else {
|
} else {
|
||||||
info!(
|
info!(
|
||||||
"Sync loop step completed, {} new tenant state update(s)",
|
"Sync loop step completed, {} new timeline state update(s)",
|
||||||
updated_tenants.len()
|
new_timeline_states.len()
|
||||||
);
|
);
|
||||||
let mut sync_status_updates: HashMap<ZTenantId, HashSet<ZTimelineId>> =
|
|
||||||
HashMap::new();
|
|
||||||
let index_accessor = runtime.block_on(index.write());
|
|
||||||
for tenant_id in updated_tenants {
|
|
||||||
let tenant_entry = match index_accessor.tenant_entry(&tenant_id) {
|
|
||||||
Some(tenant_entry) => tenant_entry,
|
|
||||||
None => {
|
|
||||||
error!(
|
|
||||||
"cannot find tenant in remote index for timeline sync update"
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if tenant_entry.has_in_progress_downloads() {
|
|
||||||
info!("Tenant {tenant_id} has pending timeline downloads, skipping repository registration");
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
info!(
|
|
||||||
"Tenant {tenant_id} download completed. Picking to register in repository"
|
|
||||||
);
|
|
||||||
// Here we assume that if tenant has no in-progress downloads that
|
|
||||||
// means that it is the last completed timeline download that triggered
|
|
||||||
// sync status update. So we look at the index for available timelines
|
|
||||||
// and register them all at once in a repository for download
|
|
||||||
// to be submitted in a single operation to repository
|
|
||||||
// so it can apply them at once to internal timeline map.
|
|
||||||
sync_status_updates
|
|
||||||
.insert(tenant_id, tenant_entry.keys().copied().collect());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
drop(index_accessor);
|
|
||||||
// Batch timeline download registration to ensure that the external registration code won't block any running tasks before.
|
// Batch timeline download registration to ensure that the external registration code won't block any running tasks before.
|
||||||
attach_downloaded_tenants(conf, &index, sync_status_updates);
|
apply_timeline_sync_status_updates(conf, &index, new_timeline_states);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ControlFlow::Break(()) => {
|
ControlFlow::Break(()) => {
|
||||||
@@ -970,14 +938,6 @@ fn storage_sync_loop<P, S>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// needed to check whether the download happened
|
|
||||||
// more informative than just a bool
|
|
||||||
#[derive(Debug)]
|
|
||||||
enum DownloadMarker {
|
|
||||||
Downloaded,
|
|
||||||
Nothing,
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn process_batches<P, S>(
|
async fn process_batches<P, S>(
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
max_sync_errors: NonZeroU32,
|
max_sync_errors: NonZeroU32,
|
||||||
@@ -985,7 +945,7 @@ async fn process_batches<P, S>(
|
|||||||
index: &RemoteIndex,
|
index: &RemoteIndex,
|
||||||
batched_tasks: HashMap<ZTenantTimelineId, SyncTaskBatch>,
|
batched_tasks: HashMap<ZTenantTimelineId, SyncTaskBatch>,
|
||||||
sync_queue: &SyncQueue,
|
sync_queue: &SyncQueue,
|
||||||
) -> HashSet<ZTenantId>
|
) -> HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncStatusUpdate>>
|
||||||
where
|
where
|
||||||
P: Debug + Send + Sync + 'static,
|
P: Debug + Send + Sync + 'static,
|
||||||
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
||||||
@@ -1010,19 +970,22 @@ where
|
|||||||
})
|
})
|
||||||
.collect::<FuturesUnordered<_>>();
|
.collect::<FuturesUnordered<_>>();
|
||||||
|
|
||||||
let mut downloaded_timelines = HashSet::new();
|
let mut new_timeline_states: HashMap<
|
||||||
|
ZTenantId,
|
||||||
|
HashMap<ZTimelineId, TimelineSyncStatusUpdate>,
|
||||||
|
> = HashMap::new();
|
||||||
|
|
||||||
while let Some((sync_id, download_marker)) = sync_results.next().await {
|
while let Some((sync_id, state_update)) = sync_results.next().await {
|
||||||
debug!(
|
debug!("Finished storage sync task for sync id {sync_id}");
|
||||||
"Finished storage sync task for sync id {sync_id} download marker {:?}",
|
if let Some(state_update) = state_update {
|
||||||
download_marker
|
new_timeline_states
|
||||||
);
|
.entry(sync_id.tenant_id)
|
||||||
if matches!(download_marker, DownloadMarker::Downloaded) {
|
.or_default()
|
||||||
downloaded_timelines.insert(sync_id.tenant_id);
|
.insert(sync_id.timeline_id, state_update);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
downloaded_timelines
|
new_timeline_states
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn process_sync_task_batch<P, S>(
|
async fn process_sync_task_batch<P, S>(
|
||||||
@@ -1031,7 +994,7 @@ async fn process_sync_task_batch<P, S>(
|
|||||||
max_sync_errors: NonZeroU32,
|
max_sync_errors: NonZeroU32,
|
||||||
sync_id: ZTenantTimelineId,
|
sync_id: ZTenantTimelineId,
|
||||||
batch: SyncTaskBatch,
|
batch: SyncTaskBatch,
|
||||||
) -> DownloadMarker
|
) -> Option<TimelineSyncStatusUpdate>
|
||||||
where
|
where
|
||||||
P: Debug + Send + Sync + 'static,
|
P: Debug + Send + Sync + 'static,
|
||||||
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
||||||
@@ -1116,7 +1079,7 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DownloadMarker::Nothing
|
None
|
||||||
}
|
}
|
||||||
.instrument(info_span!("download_timeline_data")),
|
.instrument(info_span!("download_timeline_data")),
|
||||||
);
|
);
|
||||||
@@ -1170,7 +1133,7 @@ async fn download_timeline_data<P, S>(
|
|||||||
new_download_data: SyncData<LayersDownload>,
|
new_download_data: SyncData<LayersDownload>,
|
||||||
sync_start: Instant,
|
sync_start: Instant,
|
||||||
task_name: &str,
|
task_name: &str,
|
||||||
) -> DownloadMarker
|
) -> Option<TimelineSyncStatusUpdate>
|
||||||
where
|
where
|
||||||
P: Debug + Send + Sync + 'static,
|
P: Debug + Send + Sync + 'static,
|
||||||
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
||||||
@@ -1199,7 +1162,7 @@ where
|
|||||||
Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
|
Ok(()) => match index.write().await.set_awaits_download(&sync_id, false) {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
register_sync_status(sync_id, sync_start, task_name, Some(true));
|
register_sync_status(sync_id, sync_start, task_name, Some(true));
|
||||||
return DownloadMarker::Downloaded;
|
return Some(TimelineSyncStatusUpdate::Downloaded);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
|
error!("Timeline {sync_id} was expected to be in the remote index after a successful download, but it's absent: {e:?}");
|
||||||
@@ -1215,7 +1178,7 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DownloadMarker::Nothing
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn update_local_metadata(
|
async fn update_local_metadata(
|
||||||
@@ -1495,6 +1458,35 @@ async fn validate_task_retries<T>(
|
|||||||
ControlFlow::Continue(sync_data)
|
ControlFlow::Continue(sync_data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn try_fetch_index_parts<P, S>(
|
||||||
|
conf: &'static PageServerConf,
|
||||||
|
storage: &S,
|
||||||
|
keys: HashSet<ZTenantTimelineId>,
|
||||||
|
) -> HashMap<ZTenantTimelineId, IndexPart>
|
||||||
|
where
|
||||||
|
P: Debug + Send + Sync + 'static,
|
||||||
|
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
||||||
|
{
|
||||||
|
let mut index_parts = HashMap::with_capacity(keys.len());
|
||||||
|
|
||||||
|
let mut part_downloads = keys
|
||||||
|
.into_iter()
|
||||||
|
.map(|id| async move { (id, download_index_part(conf, storage, id).await) })
|
||||||
|
.collect::<FuturesUnordered<_>>();
|
||||||
|
|
||||||
|
while let Some((id, part_upload_result)) = part_downloads.next().await {
|
||||||
|
match part_upload_result {
|
||||||
|
Ok(index_part) => {
|
||||||
|
debug!("Successfully fetched index part for {id}");
|
||||||
|
index_parts.insert(id, index_part);
|
||||||
|
}
|
||||||
|
Err(e) => warn!("Failed to fetch index part for {id}: {e}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
index_parts
|
||||||
|
}
|
||||||
|
|
||||||
fn schedule_first_sync_tasks(
|
fn schedule_first_sync_tasks(
|
||||||
index: &mut RemoteTimelineIndex,
|
index: &mut RemoteTimelineIndex,
|
||||||
sync_queue: &SyncQueue,
|
sync_queue: &SyncQueue,
|
||||||
|
|||||||
@@ -1,14 +1,10 @@
|
|||||||
//! Timeline synchronization logic to fetch the layer files from remote storage into pageserver's local directory.
|
//! Timeline synchronization logic to fetch the layer files from remote storage into pageserver's local directory.
|
||||||
|
|
||||||
use std::{
|
use std::{collections::HashSet, fmt::Debug, path::Path};
|
||||||
collections::{HashMap, HashSet},
|
|
||||||
fmt::Debug,
|
|
||||||
path::Path,
|
|
||||||
};
|
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use futures::stream::{FuturesUnordered, StreamExt};
|
use futures::stream::{FuturesUnordered, StreamExt};
|
||||||
use remote_storage::{path_with_suffix_extension, RemoteObjectName, RemoteStorage};
|
use remote_storage::{path_with_suffix_extension, RemoteStorage};
|
||||||
use tokio::{
|
use tokio::{
|
||||||
fs,
|
fs,
|
||||||
io::{self, AsyncWriteExt},
|
io::{self, AsyncWriteExt},
|
||||||
@@ -18,7 +14,7 @@ use tracing::{debug, error, info, warn};
|
|||||||
use crate::{
|
use crate::{
|
||||||
config::PageServerConf, layered_repository::metadata::metadata_path, storage_sync::SyncTask,
|
config::PageServerConf, layered_repository::metadata::metadata_path, storage_sync::SyncTask,
|
||||||
};
|
};
|
||||||
use utils::zid::{ZTenantId, ZTenantTimelineId, ZTimelineId};
|
use utils::zid::ZTenantTimelineId;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
index::{IndexPart, RemoteTimeline},
|
index::{IndexPart, RemoteTimeline},
|
||||||
@@ -27,108 +23,8 @@ use super::{
|
|||||||
|
|
||||||
pub const TEMP_DOWNLOAD_EXTENSION: &str = "temp_download";
|
pub const TEMP_DOWNLOAD_EXTENSION: &str = "temp_download";
|
||||||
|
|
||||||
/// FIXME: Needs cleanup. Currently it swallows errors. Here we need to ensure that
|
|
||||||
/// we successfully downloaded all metadata parts for one tenant.
|
|
||||||
/// And successful includes absence of index_part in the remote. Because it is valid situation
|
|
||||||
/// when timeline was just created and pageserver restarted before upload of index part was completed.
|
|
||||||
/// But currently RemoteStorage interface does not provide this knowledge because it uses
|
|
||||||
/// anyhow::Error as an error type. So this needs a refactoring.
|
|
||||||
///
|
|
||||||
/// In other words we need to yield only complete sets of tenant timelines.
|
|
||||||
/// Failure for one timeline of a tenant should exclude whole tenant from returned hashmap.
|
|
||||||
/// So there are two requirements: keep everything in one futures unordered
|
|
||||||
/// to allow higher concurrency. Mark tenants as failed independently.
|
|
||||||
/// That requires some bookeeping.
|
|
||||||
pub async fn download_index_parts<P, S>(
|
|
||||||
conf: &'static PageServerConf,
|
|
||||||
storage: &S,
|
|
||||||
keys: HashSet<ZTenantTimelineId>,
|
|
||||||
) -> HashMap<ZTenantId, HashMap<ZTimelineId, IndexPart>>
|
|
||||||
where
|
|
||||||
P: Debug + Send + Sync + 'static,
|
|
||||||
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
|
||||||
{
|
|
||||||
let mut index_parts: HashMap<ZTenantId, HashMap<ZTimelineId, IndexPart>> = HashMap::new();
|
|
||||||
|
|
||||||
let mut part_downloads = keys
|
|
||||||
.into_iter()
|
|
||||||
.map(|id| async move { (id, download_index_part(conf, storage, id).await) })
|
|
||||||
.collect::<FuturesUnordered<_>>();
|
|
||||||
|
|
||||||
while let Some((id, part_upload_result)) = part_downloads.next().await {
|
|
||||||
match part_upload_result {
|
|
||||||
Ok(index_part) => {
|
|
||||||
debug!("Successfully fetched index part for {id}");
|
|
||||||
index_parts
|
|
||||||
.entry(id.tenant_id)
|
|
||||||
.or_default()
|
|
||||||
.insert(id.timeline_id, index_part);
|
|
||||||
}
|
|
||||||
Err(e) => error!("Failed to fetch index part for {id}: {e}"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
index_parts
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Note: The function is rather expensive from s3 access point of view, it will execute ceil(N/1000) + N requests.
|
|
||||||
/// At least one request to obtain a list of tenant timelines (more requests is there are more than 1000 timelines).
|
|
||||||
/// And then will attempt to download all index files that belong to these timelines.
|
|
||||||
pub async fn gather_tenant_timelines_index_parts<P, S>(
|
|
||||||
conf: &'static PageServerConf,
|
|
||||||
storage: &S,
|
|
||||||
tenant_id: ZTenantId,
|
|
||||||
) -> anyhow::Result<HashMap<ZTimelineId, IndexPart>>
|
|
||||||
where
|
|
||||||
P: RemoteObjectName + Debug + Send + Sync + 'static,
|
|
||||||
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
|
||||||
{
|
|
||||||
let tenant_path = conf.timelines_path(&tenant_id);
|
|
||||||
let tenant_storage_path = storage.remote_object_id(&tenant_path).with_context(|| {
|
|
||||||
format!(
|
|
||||||
"Failed to get tenant storage path for local path '{}'",
|
|
||||||
tenant_path.display()
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let timelines = storage
|
|
||||||
.list_prefixes(Some(tenant_storage_path))
|
|
||||||
.await
|
|
||||||
.with_context(|| {
|
|
||||||
format!(
|
|
||||||
"Failed to list tenant storage path to get remote timelines to download: {}",
|
|
||||||
tenant_id
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let mut sync_ids = HashSet::new();
|
|
||||||
|
|
||||||
for timeline_remote_storage_key in timelines {
|
|
||||||
let object_name = timeline_remote_storage_key.object_name().ok_or_else(|| {
|
|
||||||
anyhow::anyhow!("failed to get timeline id for remote tenant {tenant_id}")
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let timeline_id: ZTimelineId = object_name
|
|
||||||
.parse()
|
|
||||||
.with_context(|| {
|
|
||||||
format!("failed to parse object name into timeline id for tenant {tenant_id} '{object_name}'")
|
|
||||||
})?;
|
|
||||||
|
|
||||||
sync_ids.insert(ZTenantTimelineId {
|
|
||||||
tenant_id,
|
|
||||||
timeline_id,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
download_index_parts(conf, storage, sync_ids)
|
|
||||||
.await
|
|
||||||
.remove(&tenant_id)
|
|
||||||
.ok_or(anyhow::anyhow!(
|
|
||||||
"Missing tenant index parts. This is a bug."
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retrieves index data from the remote storage for a given timeline.
|
/// Retrieves index data from the remote storage for a given timeline.
|
||||||
async fn download_index_part<P, S>(
|
pub async fn download_index_part<P, S>(
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
storage: &S,
|
storage: &S,
|
||||||
sync_id: ZTenantTimelineId,
|
sync_id: ZTenantTimelineId,
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
//! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
|
//! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
|
||||||
//! remote timeline layers and its metadata.
|
//! remote timeline layers and its metadata.
|
||||||
|
|
||||||
use std::ops::{Deref, DerefMut};
|
|
||||||
use std::{
|
use std::{
|
||||||
collections::{HashMap, HashSet},
|
collections::{HashMap, HashSet},
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
@@ -15,10 +14,7 @@ use serde_with::{serde_as, DisplayFromStr};
|
|||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
use crate::{config::PageServerConf, layered_repository::metadata::TimelineMetadata};
|
use crate::{config::PageServerConf, layered_repository::metadata::TimelineMetadata};
|
||||||
use utils::{
|
use utils::{lsn::Lsn, zid::ZTenantTimelineId};
|
||||||
lsn::Lsn,
|
|
||||||
zid::{ZTenantId, ZTenantTimelineId, ZTimelineId},
|
|
||||||
};
|
|
||||||
|
|
||||||
/// A part of the filesystem path, that needs a root to become a path again.
|
/// A part of the filesystem path, that needs a root to become a path again.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||||
@@ -45,68 +41,38 @@ impl RelativePath {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default)]
|
|
||||||
pub struct TenantEntry(HashMap<ZTimelineId, RemoteTimeline>);
|
|
||||||
|
|
||||||
impl TenantEntry {
|
|
||||||
pub fn has_in_progress_downloads(&self) -> bool {
|
|
||||||
self.values()
|
|
||||||
.any(|remote_timeline| remote_timeline.awaits_download)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Deref for TenantEntry {
|
|
||||||
type Target = HashMap<ZTimelineId, RemoteTimeline>;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
&self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DerefMut for TenantEntry {
|
|
||||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
||||||
&mut self.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<HashMap<ZTimelineId, RemoteTimeline>> for TenantEntry {
|
|
||||||
fn from(inner: HashMap<ZTimelineId, RemoteTimeline>) -> Self {
|
|
||||||
Self(inner)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An index to track tenant files that exist on the remote storage.
|
/// An index to track tenant files that exist on the remote storage.
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct RemoteTimelineIndex {
|
pub struct RemoteTimelineIndex {
|
||||||
entries: HashMap<ZTenantId, TenantEntry>,
|
timeline_entries: HashMap<ZTenantTimelineId, RemoteTimeline>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A wrapper to synchronize the access to the index, should be created and used before dealing with any [`RemoteTimelineIndex`].
|
/// A wrapper to synchronize the access to the index, should be created and used before dealing with any [`RemoteTimelineIndex`].
|
||||||
#[derive(Default)]
|
|
||||||
pub struct RemoteIndex(Arc<RwLock<RemoteTimelineIndex>>);
|
pub struct RemoteIndex(Arc<RwLock<RemoteTimelineIndex>>);
|
||||||
|
|
||||||
impl RemoteIndex {
|
impl RemoteIndex {
|
||||||
|
pub fn empty() -> Self {
|
||||||
|
Self(Arc::new(RwLock::new(RemoteTimelineIndex {
|
||||||
|
timeline_entries: HashMap::new(),
|
||||||
|
})))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn from_parts(
|
pub fn from_parts(
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
index_parts: HashMap<ZTenantId, HashMap<ZTimelineId, IndexPart>>,
|
index_parts: HashMap<ZTenantTimelineId, IndexPart>,
|
||||||
) -> anyhow::Result<Self> {
|
) -> anyhow::Result<Self> {
|
||||||
let mut entries: HashMap<ZTenantId, TenantEntry> = HashMap::new();
|
let mut timeline_entries = HashMap::new();
|
||||||
|
|
||||||
for (tenant_id, timelines) in index_parts {
|
for (sync_id, index_part) in index_parts {
|
||||||
for (timeline_id, index_part) in timelines {
|
let timeline_path = conf.timeline_path(&sync_id.timeline_id, &sync_id.tenant_id);
|
||||||
let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
|
let remote_timeline = RemoteTimeline::from_index_part(&timeline_path, index_part)
|
||||||
let remote_timeline =
|
.context("Failed to restore remote timeline data from index part")?;
|
||||||
RemoteTimeline::from_index_part(&timeline_path, index_part)
|
timeline_entries.insert(sync_id, remote_timeline);
|
||||||
.context("Failed to restore remote timeline data from index part")?;
|
|
||||||
|
|
||||||
entries
|
|
||||||
.entry(tenant_id)
|
|
||||||
.or_default()
|
|
||||||
.insert(timeline_id, remote_timeline);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Self(Arc::new(RwLock::new(RemoteTimelineIndex { entries }))))
|
Ok(Self(Arc::new(RwLock::new(RemoteTimelineIndex {
|
||||||
|
timeline_entries,
|
||||||
|
}))))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn read(&self) -> tokio::sync::RwLockReadGuard<'_, RemoteTimelineIndex> {
|
pub async fn read(&self) -> tokio::sync::RwLockReadGuard<'_, RemoteTimelineIndex> {
|
||||||
@@ -125,67 +91,20 @@ impl Clone for RemoteIndex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl RemoteTimelineIndex {
|
impl RemoteTimelineIndex {
|
||||||
pub fn timeline_entry(
|
pub fn timeline_entry(&self, id: &ZTenantTimelineId) -> Option<&RemoteTimeline> {
|
||||||
&self,
|
self.timeline_entries.get(id)
|
||||||
ZTenantTimelineId {
|
|
||||||
tenant_id,
|
|
||||||
timeline_id,
|
|
||||||
}: &ZTenantTimelineId,
|
|
||||||
) -> Option<&RemoteTimeline> {
|
|
||||||
self.entries.get(tenant_id)?.get(timeline_id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn timeline_entry_mut(
|
pub fn timeline_entry_mut(&mut self, id: &ZTenantTimelineId) -> Option<&mut RemoteTimeline> {
|
||||||
&mut self,
|
self.timeline_entries.get_mut(id)
|
||||||
ZTenantTimelineId {
|
|
||||||
tenant_id,
|
|
||||||
timeline_id,
|
|
||||||
}: &ZTenantTimelineId,
|
|
||||||
) -> Option<&mut RemoteTimeline> {
|
|
||||||
self.entries.get_mut(tenant_id)?.get_mut(timeline_id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_timeline_entry(
|
pub fn add_timeline_entry(&mut self, id: ZTenantTimelineId, entry: RemoteTimeline) {
|
||||||
&mut self,
|
self.timeline_entries.insert(id, entry);
|
||||||
ZTenantTimelineId {
|
|
||||||
tenant_id,
|
|
||||||
timeline_id,
|
|
||||||
}: ZTenantTimelineId,
|
|
||||||
entry: RemoteTimeline,
|
|
||||||
) {
|
|
||||||
self.entries
|
|
||||||
.entry(tenant_id)
|
|
||||||
.or_default()
|
|
||||||
.insert(timeline_id, entry);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn remove_timeline_entry(
|
pub fn all_sync_ids(&self) -> impl Iterator<Item = ZTenantTimelineId> + '_ {
|
||||||
&mut self,
|
self.timeline_entries.keys().copied()
|
||||||
ZTenantTimelineId {
|
|
||||||
tenant_id,
|
|
||||||
timeline_id,
|
|
||||||
}: ZTenantTimelineId,
|
|
||||||
) -> Option<RemoteTimeline> {
|
|
||||||
self.entries
|
|
||||||
.entry(tenant_id)
|
|
||||||
.or_default()
|
|
||||||
.remove(&timeline_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn tenant_entry(&self, tenant_id: &ZTenantId) -> Option<&TenantEntry> {
|
|
||||||
self.entries.get(tenant_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn tenant_entry_mut(&mut self, tenant_id: &ZTenantId) -> Option<&mut TenantEntry> {
|
|
||||||
self.entries.get_mut(tenant_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn add_tenant_entry(&mut self, tenant_id: ZTenantId) -> &mut TenantEntry {
|
|
||||||
self.entries.entry(tenant_id).or_default()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn remove_tenant_entry(&mut self, tenant_id: &ZTenantId) -> Option<TenantEntry> {
|
|
||||||
self.entries.remove(tenant_id)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_awaits_download(
|
pub fn set_awaits_download(
|
||||||
|
|||||||
@@ -4,8 +4,8 @@
|
|||||||
use crate::config::PageServerConf;
|
use crate::config::PageServerConf;
|
||||||
use crate::layered_repository::{load_metadata, LayeredRepository};
|
use crate::layered_repository::{load_metadata, LayeredRepository};
|
||||||
use crate::pgdatadir_mapping::DatadirTimeline;
|
use crate::pgdatadir_mapping::DatadirTimeline;
|
||||||
use crate::repository::Repository;
|
use crate::repository::{Repository, TimelineSyncStatusUpdate};
|
||||||
use crate::storage_sync::index::{RemoteIndex, RemoteTimelineIndex};
|
use crate::storage_sync::index::RemoteIndex;
|
||||||
use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
|
use crate::storage_sync::{self, LocalTimelineInitStatus, SyncStartupData};
|
||||||
use crate::tenant_config::TenantConfOpt;
|
use crate::tenant_config::TenantConfOpt;
|
||||||
use crate::thread_mgr::ThreadKind;
|
use crate::thread_mgr::ThreadKind;
|
||||||
@@ -13,11 +13,11 @@ use crate::timelines::CreateRepo;
|
|||||||
use crate::walredo::PostgresRedoManager;
|
use crate::walredo::PostgresRedoManager;
|
||||||
use crate::{thread_mgr, timelines, walreceiver};
|
use crate::{thread_mgr, timelines, walreceiver};
|
||||||
use crate::{DatadirTimelineImpl, RepositoryImpl};
|
use crate::{DatadirTimelineImpl, RepositoryImpl};
|
||||||
use anyhow::Context;
|
use anyhow::{bail, Context};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_with::{serde_as, DisplayFromStr};
|
use serde_with::{serde_as, DisplayFromStr};
|
||||||
use std::collections::hash_map::Entry;
|
use std::collections::hash_map::Entry;
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::HashMap;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
@@ -157,13 +157,7 @@ pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result<RemoteIn
|
|||||||
// loading a tenant is serious, but it's better to complete the startup and
|
// loading a tenant is serious, but it's better to complete the startup and
|
||||||
// serve other tenants, than fail completely.
|
// serve other tenants, than fail completely.
|
||||||
error!("Failed to initialize local tenant {tenant_id}: {:?}", err);
|
error!("Failed to initialize local tenant {tenant_id}: {:?}", err);
|
||||||
|
set_tenant_state(tenant_id, TenantState::Broken)?;
|
||||||
if let Err(err) = set_tenant_state(tenant_id, TenantState::Broken) {
|
|
||||||
error!(
|
|
||||||
"Failed to set tenant state to broken {tenant_id}: {:?}",
|
|
||||||
err
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -171,51 +165,44 @@ pub fn init_tenant_mgr(conf: &'static PageServerConf) -> anyhow::Result<RemoteIn
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub enum LocalTimelineUpdate {
|
pub enum LocalTimelineUpdate {
|
||||||
Detach {
|
Detach(ZTenantTimelineId),
|
||||||
id: ZTenantTimelineId,
|
Attach(ZTenantTimelineId, Arc<DatadirTimelineImpl>),
|
||||||
// used to signal to the detach caller that walreceiver successfully terminated for specified id
|
|
||||||
join_confirmation_sender: std::sync::mpsc::Sender<()>,
|
|
||||||
},
|
|
||||||
Attach {
|
|
||||||
id: ZTenantTimelineId,
|
|
||||||
datadir: Arc<DatadirTimelineImpl>,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Debug for LocalTimelineUpdate {
|
impl std::fmt::Debug for LocalTimelineUpdate {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Self::Detach { id, .. } => f.debug_tuple("Remove").field(id).finish(),
|
Self::Detach(ttid) => f.debug_tuple("Remove").field(ttid).finish(),
|
||||||
Self::Attach { id, .. } => f.debug_tuple("Add").field(id).finish(),
|
Self::Attach(ttid, _) => f.debug_tuple("Add").field(ttid).finish(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Updates tenants' repositories, changing their timelines state in memory.
|
/// Updates tenants' repositories, changing their timelines state in memory.
|
||||||
pub fn attach_downloaded_tenants(
|
pub fn apply_timeline_sync_status_updates(
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
remote_index: &RemoteIndex,
|
remote_index: &RemoteIndex,
|
||||||
sync_status_updates: HashMap<ZTenantId, HashSet<ZTimelineId>>,
|
sync_status_updates: HashMap<ZTenantId, HashMap<ZTimelineId, TimelineSyncStatusUpdate>>,
|
||||||
) {
|
) {
|
||||||
if sync_status_updates.is_empty() {
|
if sync_status_updates.is_empty() {
|
||||||
debug!("No sync status updates to apply");
|
debug!("no sync status updates to apply");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (tenant_id, downloaded_timelines) in sync_status_updates {
|
info!(
|
||||||
info!(
|
"Applying sync status updates for {} timelines",
|
||||||
"Registering downlloaded timelines for {tenant_id} {} timelines",
|
sync_status_updates.len()
|
||||||
downloaded_timelines.len()
|
);
|
||||||
);
|
debug!("Sync status updates: {sync_status_updates:?}");
|
||||||
debug!("Downloaded timelines: {downloaded_timelines:?}");
|
|
||||||
|
|
||||||
|
for (tenant_id, status_updates) in sync_status_updates {
|
||||||
let repo = match load_local_repo(conf, tenant_id, remote_index) {
|
let repo = match load_local_repo(conf, tenant_id, remote_index) {
|
||||||
Ok(repo) => repo,
|
Ok(repo) => repo,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Failed to load repo for tenant {tenant_id} Error: {e:?}");
|
error!("Failed to load repo for tenant {tenant_id} Error: {e:?}",);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
match attach_downloaded_tenant(&repo, downloaded_timelines) {
|
match apply_timeline_remote_sync_status_updates(&repo, status_updates) {
|
||||||
Ok(()) => info!("successfully applied sync status updates for tenant {tenant_id}"),
|
Ok(()) => info!("successfully applied sync status updates for tenant {tenant_id}"),
|
||||||
Err(e) => error!(
|
Err(e) => error!(
|
||||||
"Failed to apply timeline sync timeline status updates for tenant {tenant_id}: {e:?}"
|
"Failed to apply timeline sync timeline status updates for tenant {tenant_id}: {e:?}"
|
||||||
@@ -400,86 +387,33 @@ pub fn get_local_timeline_with_load(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn delete_timeline(tenant_id: ZTenantId, timeline_id: ZTimelineId) -> anyhow::Result<()> {
|
pub fn detach_timeline(
|
||||||
// Start with the shutdown of timeline tasks (this shuts down the walreceiver)
|
conf: &'static PageServerConf,
|
||||||
// It is important that we do not take locks here, and do not check whether the timeline exists
|
tenant_id: ZTenantId,
|
||||||
// because if we hold tenants_state::write_tenants() while awaiting for the threads to join
|
timeline_id: ZTimelineId,
|
||||||
// we cannot create new timelines and tenants, and that can take quite some time,
|
) -> anyhow::Result<()> {
|
||||||
// it can even become stuck due to a bug making whole pageserver unavailable for some operations
|
// shutdown the timeline threads (this shuts down the walreceiver)
|
||||||
// so this is the way how we deal with concurrent delete requests: shutdown everythig, wait for confirmation
|
thread_mgr::shutdown_threads(None, Some(tenant_id), Some(timeline_id));
|
||||||
// and then try to actually remove timeline from inmemory state and this is the point when concurrent requests
|
|
||||||
// will synchronize and either fail with the not found error or succeed
|
|
||||||
|
|
||||||
let (sender, receiver) = std::sync::mpsc::channel::<()>();
|
|
||||||
tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach {
|
|
||||||
id: ZTenantTimelineId::new(tenant_id, timeline_id),
|
|
||||||
join_confirmation_sender: sender,
|
|
||||||
});
|
|
||||||
|
|
||||||
debug!("waiting for wal receiver to shutdown");
|
|
||||||
let _ = receiver.recv();
|
|
||||||
debug!("wal receiver shutdown confirmed");
|
|
||||||
debug!("waiting for threads to shutdown");
|
|
||||||
thread_mgr::shutdown_threads(None, None, Some(timeline_id));
|
|
||||||
debug!("thread shutdown completed");
|
|
||||||
match tenants_state::write_tenants().get_mut(&tenant_id) {
|
match tenants_state::write_tenants().get_mut(&tenant_id) {
|
||||||
Some(tenant) => {
|
Some(tenant) => {
|
||||||
tenant.repo.delete_timeline(timeline_id)?;
|
tenant
|
||||||
|
.repo
|
||||||
|
.detach_timeline(timeline_id)
|
||||||
|
.context("Failed to detach inmem tenant timeline")?;
|
||||||
tenant.local_timelines.remove(&timeline_id);
|
tenant.local_timelines.remove(&timeline_id);
|
||||||
|
tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach(
|
||||||
|
ZTenantTimelineId::new(tenant_id, timeline_id),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
None => anyhow::bail!("Tenant {tenant_id} not found in local tenant state"),
|
None => bail!("Tenant {tenant_id} not found in local tenant state"),
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
let local_timeline_directory = conf.timeline_path(&timeline_id, &tenant_id);
|
||||||
}
|
std::fs::remove_dir_all(&local_timeline_directory).with_context(|| {
|
||||||
|
|
||||||
pub fn detach_tenant(conf: &'static PageServerConf, tenant_id: ZTenantId) -> anyhow::Result<()> {
|
|
||||||
set_tenant_state(tenant_id, TenantState::Stopping)?;
|
|
||||||
// shutdown the tenant and timeline threads: gc, compaction, page service threads)
|
|
||||||
thread_mgr::shutdown_threads(None, Some(tenant_id), None);
|
|
||||||
|
|
||||||
// FIXME should we protect somehow from starting new threads/walreceivers when tenant is in stopping state?
|
|
||||||
// send stop signal to wal receiver and collect join handles while holding the lock
|
|
||||||
let walreceiver_join_handles = {
|
|
||||||
let tenants = tenants_state::write_tenants();
|
|
||||||
let tenant = tenants.get(&tenant_id).context("tenant not found")?;
|
|
||||||
let mut walreceiver_join_handles = Vec::with_capacity(tenant.local_timelines.len());
|
|
||||||
for timeline_id in tenant.local_timelines.keys() {
|
|
||||||
let (sender, receiver) = std::sync::mpsc::channel::<()>();
|
|
||||||
tenants_state::try_send_timeline_update(LocalTimelineUpdate::Detach {
|
|
||||||
id: ZTenantTimelineId::new(tenant_id, *timeline_id),
|
|
||||||
join_confirmation_sender: sender,
|
|
||||||
});
|
|
||||||
walreceiver_join_handles.push((*timeline_id, receiver));
|
|
||||||
}
|
|
||||||
// drop the tenants lock
|
|
||||||
walreceiver_join_handles
|
|
||||||
};
|
|
||||||
|
|
||||||
// wait for wal receivers to stop without holding the lock, because walreceiver
|
|
||||||
// will attempt to change tenant state which is protected by the same global tenants lock.
|
|
||||||
// TODO do we need a timeout here? how to handle it?
|
|
||||||
// recv_timeout is broken: https://github.com/rust-lang/rust/issues/94518#issuecomment-1057440631
|
|
||||||
// need to use crossbeam-channel
|
|
||||||
for (timeline_id, join_handle) in walreceiver_join_handles {
|
|
||||||
info!("waiting for wal receiver to shutdown timeline_id {timeline_id}");
|
|
||||||
join_handle.recv().context("failed to join walreceiver")?;
|
|
||||||
info!("wal receiver shutdown confirmed timeline_id {timeline_id}");
|
|
||||||
}
|
|
||||||
|
|
||||||
tenants_state::write_tenants().remove(&tenant_id);
|
|
||||||
|
|
||||||
// If removal fails there will be no way to successfully retry detach,
|
|
||||||
// because tenant no longer exists in in memory map. And it needs to be removed from it
|
|
||||||
// before we remove files because it contains references to repository
|
|
||||||
// which references ephemeral files which are deleted on drop. So if we keep these references
|
|
||||||
// code will attempt to remove files which no longer exist. This can be fixed by having shutdown
|
|
||||||
// mechanism for repository that will clean temporary data to avoid any references to ephemeral files
|
|
||||||
let local_tenant_directory = conf.tenant_path(&tenant_id);
|
|
||||||
std::fs::remove_dir_all(&local_tenant_directory).with_context(|| {
|
|
||||||
format!(
|
format!(
|
||||||
"Failed to remove local timeline directory '{}'",
|
"Failed to remove local timeline directory '{}'",
|
||||||
local_tenant_directory.display()
|
local_timeline_directory.display()
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -500,10 +434,10 @@ fn load_local_timeline(
|
|||||||
));
|
));
|
||||||
page_tline.init_logical_size()?;
|
page_tline.init_logical_size()?;
|
||||||
|
|
||||||
tenants_state::try_send_timeline_update(LocalTimelineUpdate::Attach {
|
tenants_state::try_send_timeline_update(LocalTimelineUpdate::Attach(
|
||||||
id: ZTenantTimelineId::new(repo.tenant_id(), timeline_id),
|
ZTenantTimelineId::new(repo.tenant_id(), timeline_id),
|
||||||
datadir: Arc::clone(&page_tline),
|
Arc::clone(&page_tline),
|
||||||
});
|
));
|
||||||
|
|
||||||
Ok(page_tline)
|
Ok(page_tline)
|
||||||
}
|
}
|
||||||
@@ -513,27 +447,15 @@ fn load_local_timeline(
|
|||||||
pub struct TenantInfo {
|
pub struct TenantInfo {
|
||||||
#[serde_as(as = "DisplayFromStr")]
|
#[serde_as(as = "DisplayFromStr")]
|
||||||
pub id: ZTenantId,
|
pub id: ZTenantId,
|
||||||
pub state: Option<TenantState>,
|
pub state: TenantState,
|
||||||
pub has_in_progress_downloads: Option<bool>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn list_tenants(remote_index: &RemoteTimelineIndex) -> Vec<TenantInfo> {
|
pub fn list_tenants() -> Vec<TenantInfo> {
|
||||||
tenants_state::read_tenants()
|
tenants_state::read_tenants()
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(id, tenant)| {
|
.map(|(id, tenant)| TenantInfo {
|
||||||
let has_in_progress_downloads = remote_index
|
id: *id,
|
||||||
.tenant_entry(id)
|
state: tenant.state,
|
||||||
.map(|entry| entry.has_in_progress_downloads());
|
|
||||||
|
|
||||||
if has_in_progress_downloads.is_none() {
|
|
||||||
error!("timeline is not found in remote index while it is present in the tenants registry")
|
|
||||||
}
|
|
||||||
|
|
||||||
TenantInfo {
|
|
||||||
id: *id,
|
|
||||||
state: Some(tenant.state),
|
|
||||||
has_in_progress_downloads,
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
@@ -545,73 +467,74 @@ pub fn list_tenants(remote_index: &RemoteTimelineIndex) -> Vec<TenantInfo> {
|
|||||||
/// A timeline is categorized as broken when any of following conditions is true:
|
/// A timeline is categorized as broken when any of following conditions is true:
|
||||||
/// - failed to load the timeline's metadata
|
/// - failed to load the timeline's metadata
|
||||||
/// - the timeline's disk consistent LSN is zero
|
/// - the timeline's disk consistent LSN is zero
|
||||||
fn check_broken_timeline(
|
fn check_broken_timeline(repo: &LayeredRepository, timeline_id: ZTimelineId) -> anyhow::Result<()> {
|
||||||
conf: &'static PageServerConf,
|
let metadata = load_metadata(repo.conf, timeline_id, repo.tenant_id())
|
||||||
tenant_id: ZTenantId,
|
.context("failed to load metadata")?;
|
||||||
timeline_id: ZTimelineId,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let metadata =
|
|
||||||
load_metadata(conf, timeline_id, tenant_id).context("failed to load metadata")?;
|
|
||||||
|
|
||||||
// A timeline with zero disk consistent LSN can happen when the page server
|
// A timeline with zero disk consistent LSN can happen when the page server
|
||||||
// failed to checkpoint the timeline import data when creating that timeline.
|
// failed to checkpoint the timeline import data when creating that timeline.
|
||||||
if metadata.disk_consistent_lsn() == Lsn::INVALID {
|
if metadata.disk_consistent_lsn() == Lsn::INVALID {
|
||||||
anyhow::bail!("Timeline {timeline_id} has a zero disk consistent LSN.");
|
bail!("Timeline {timeline_id} has a zero disk consistent LSN.");
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Note: all timelines are attached at once if and only if all of them are locally complete
|
|
||||||
fn init_local_repository(
|
fn init_local_repository(
|
||||||
conf: &'static PageServerConf,
|
conf: &'static PageServerConf,
|
||||||
tenant_id: ZTenantId,
|
tenant_id: ZTenantId,
|
||||||
local_timeline_init_statuses: HashMap<ZTimelineId, LocalTimelineInitStatus>,
|
local_timeline_init_statuses: HashMap<ZTimelineId, LocalTimelineInitStatus>,
|
||||||
remote_index: &RemoteIndex,
|
remote_index: &RemoteIndex,
|
||||||
) -> anyhow::Result<(), anyhow::Error> {
|
) -> anyhow::Result<(), anyhow::Error> {
|
||||||
let mut timelines_to_attach = HashSet::new();
|
// initialize local tenant
|
||||||
|
let repo = load_local_repo(conf, tenant_id, remote_index)
|
||||||
|
.with_context(|| format!("Failed to load repo for tenant {tenant_id}"))?;
|
||||||
|
|
||||||
|
let mut status_updates = HashMap::with_capacity(local_timeline_init_statuses.len());
|
||||||
for (timeline_id, init_status) in local_timeline_init_statuses {
|
for (timeline_id, init_status) in local_timeline_init_statuses {
|
||||||
match init_status {
|
match init_status {
|
||||||
LocalTimelineInitStatus::LocallyComplete => {
|
LocalTimelineInitStatus::LocallyComplete => {
|
||||||
debug!("timeline {timeline_id} for tenant {tenant_id} is locally complete, registering it in repository");
|
debug!("timeline {timeline_id} for tenant {tenant_id} is locally complete, registering it in repository");
|
||||||
check_broken_timeline(conf, tenant_id, timeline_id)
|
if let Err(err) = check_broken_timeline(&repo, timeline_id) {
|
||||||
.context("found broken timeline")?;
|
info!(
|
||||||
timelines_to_attach.insert(timeline_id);
|
"Found a broken timeline {timeline_id} (err={err:?}), skip registering it in repository"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
status_updates.insert(timeline_id, TimelineSyncStatusUpdate::Downloaded);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
LocalTimelineInitStatus::NeedsSync => {
|
LocalTimelineInitStatus::NeedsSync => {
|
||||||
debug!(
|
debug!(
|
||||||
"timeline {tenant_id} for tenant {timeline_id} needs sync, \
|
"timeline {tenant_id} for tenant {timeline_id} needs sync, \
|
||||||
so skipped for adding into repository until sync is finished"
|
so skipped for adding into repository until sync is finished"
|
||||||
);
|
);
|
||||||
return Ok(());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize local tenant
|
|
||||||
let repo = load_local_repo(conf, tenant_id, remote_index)
|
|
||||||
.with_context(|| format!("Failed to load repo for tenant {tenant_id}"))?;
|
|
||||||
|
|
||||||
// Lets fail here loudly to be on the safe side.
|
// Lets fail here loudly to be on the safe side.
|
||||||
// XXX: It may be a better api to actually distinguish between repository startup
|
// XXX: It may be a better api to actually distinguish between repository startup
|
||||||
// and processing of newly downloaded timelines.
|
// and processing of newly downloaded timelines.
|
||||||
attach_downloaded_tenant(&repo, timelines_to_attach)
|
apply_timeline_remote_sync_status_updates(&repo, status_updates)
|
||||||
.with_context(|| format!("Failed to bootstrap timelines for tenant {tenant_id}"))?;
|
.with_context(|| format!("Failed to bootstrap timelines for tenant {tenant_id}"))?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn attach_downloaded_tenant(
|
fn apply_timeline_remote_sync_status_updates(
|
||||||
repo: &LayeredRepository,
|
repo: &LayeredRepository,
|
||||||
downloaded_timelines: HashSet<ZTimelineId>,
|
status_updates: HashMap<ZTimelineId, TimelineSyncStatusUpdate>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let mut registration_queue = Vec::with_capacity(downloaded_timelines.len());
|
let mut registration_queue = Vec::with_capacity(status_updates.len());
|
||||||
|
|
||||||
// first need to register the in-mem representations, to avoid missing ancestors during the local disk data registration
|
// first need to register the in-mem representations, to avoid missing ancestors during the local disk data registration
|
||||||
for timeline_id in downloaded_timelines {
|
for (timeline_id, status_update) in status_updates {
|
||||||
repo.attach_timeline(timeline_id).with_context(|| {
|
repo.apply_timeline_remote_sync_status_update(timeline_id, status_update)
|
||||||
format!("Failed to load timeline {timeline_id} into in-memory repository")
|
.with_context(|| {
|
||||||
})?;
|
format!("Failed to load timeline {timeline_id} into in-memory repository")
|
||||||
registration_queue.push(timeline_id);
|
})?;
|
||||||
|
match status_update {
|
||||||
|
TimelineSyncStatusUpdate::Downloaded => registration_queue.push(timeline_id),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for timeline_id in registration_queue {
|
for timeline_id in registration_queue {
|
||||||
@@ -619,7 +542,7 @@ fn attach_downloaded_tenant(
|
|||||||
match tenants_state::write_tenants().get_mut(&tenant_id) {
|
match tenants_state::write_tenants().get_mut(&tenant_id) {
|
||||||
Some(tenant) => match tenant.local_timelines.entry(timeline_id) {
|
Some(tenant) => match tenant.local_timelines.entry(timeline_id) {
|
||||||
Entry::Occupied(_) => {
|
Entry::Occupied(_) => {
|
||||||
anyhow::bail!("Local timeline {timeline_id} already registered")
|
bail!("Local timeline {timeline_id} already registered")
|
||||||
}
|
}
|
||||||
Entry::Vacant(v) => {
|
Entry::Vacant(v) => {
|
||||||
v.insert(load_local_timeline(repo, timeline_id).with_context(|| {
|
v.insert(load_local_timeline(repo, timeline_id).with_context(|| {
|
||||||
@@ -627,7 +550,7 @@ fn attach_downloaded_tenant(
|
|||||||
})?);
|
})?);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
None => anyhow::bail!(
|
None => bail!(
|
||||||
"Tenant {} not found in local tenant state",
|
"Tenant {} not found in local tenant state",
|
||||||
repo.tenant_id()
|
repo.tenant_id()
|
||||||
),
|
),
|
||||||
|
|||||||
@@ -119,6 +119,8 @@ pub fn start_compaction_loop(tenantid: ZTenantId) -> anyhow::Result<()> {
|
|||||||
pub fn init_tenant_task_pool() -> anyhow::Result<()> {
|
pub fn init_tenant_task_pool() -> anyhow::Result<()> {
|
||||||
let runtime = tokio::runtime::Builder::new_multi_thread()
|
let runtime = tokio::runtime::Builder::new_multi_thread()
|
||||||
.thread_name("tenant-task-worker")
|
.thread_name("tenant-task-worker")
|
||||||
|
.worker_threads(40) // Way more than necessary
|
||||||
|
.max_blocking_threads(100) // Way more than necessary
|
||||||
.enable_all()
|
.enable_all()
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
|
|||||||
@@ -202,7 +202,7 @@ pub fn create_repo(
|
|||||||
// anymore, but I think that could still happen.
|
// anymore, but I think that could still happen.
|
||||||
let wal_redo_manager = Arc::new(crate::walredo::DummyRedoManager {});
|
let wal_redo_manager = Arc::new(crate::walredo::DummyRedoManager {});
|
||||||
|
|
||||||
(wal_redo_manager as _, RemoteIndex::default())
|
(wal_redo_manager as _, RemoteIndex::empty())
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -347,7 +347,7 @@ pub(crate) fn create_timeline(
|
|||||||
tenant_id: ZTenantId,
|
tenant_id: ZTenantId,
|
||||||
new_timeline_id: Option<ZTimelineId>,
|
new_timeline_id: Option<ZTimelineId>,
|
||||||
ancestor_timeline_id: Option<ZTimelineId>,
|
ancestor_timeline_id: Option<ZTimelineId>,
|
||||||
mut ancestor_start_lsn: Option<Lsn>,
|
ancestor_start_lsn: Option<Lsn>,
|
||||||
) -> Result<Option<TimelineInfo>> {
|
) -> Result<Option<TimelineInfo>> {
|
||||||
let new_timeline_id = new_timeline_id.unwrap_or_else(ZTimelineId::generate);
|
let new_timeline_id = new_timeline_id.unwrap_or_else(ZTimelineId::generate);
|
||||||
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
let repo = tenant_mgr::get_repository_for_tenant(tenant_id)?;
|
||||||
@@ -357,35 +357,41 @@ pub(crate) fn create_timeline(
|
|||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut start_lsn = ancestor_start_lsn.unwrap_or(Lsn(0));
|
||||||
|
|
||||||
let new_timeline_info = match ancestor_timeline_id {
|
let new_timeline_info = match ancestor_timeline_id {
|
||||||
Some(ancestor_timeline_id) => {
|
Some(ancestor_timeline_id) => {
|
||||||
let ancestor_timeline = repo
|
let ancestor_timeline = repo
|
||||||
.get_timeline_load(ancestor_timeline_id)
|
.get_timeline_load(ancestor_timeline_id)
|
||||||
.context("Cannot branch off the timeline that's not present locally")?;
|
.context("Cannot branch off the timeline that's not present locally")?;
|
||||||
|
|
||||||
if let Some(lsn) = ancestor_start_lsn.as_mut() {
|
if start_lsn == Lsn(0) {
|
||||||
|
// Find end of WAL on the old timeline
|
||||||
|
let end_of_wal = ancestor_timeline.get_last_record_lsn();
|
||||||
|
info!("branching at end of WAL: {}", end_of_wal);
|
||||||
|
start_lsn = end_of_wal;
|
||||||
|
} else {
|
||||||
// Wait for the WAL to arrive and be processed on the parent branch up
|
// Wait for the WAL to arrive and be processed on the parent branch up
|
||||||
// to the requested branch point. The repository code itself doesn't
|
// to the requested branch point. The repository code itself doesn't
|
||||||
// require it, but if we start to receive WAL on the new timeline,
|
// require it, but if we start to receive WAL on the new timeline,
|
||||||
// decoding the new WAL might need to look up previous pages, relation
|
// decoding the new WAL might need to look up previous pages, relation
|
||||||
// sizes etc. and that would get confused if the previous page versions
|
// sizes etc. and that would get confused if the previous page versions
|
||||||
// are not in the repository yet.
|
// are not in the repository yet.
|
||||||
*lsn = lsn.align();
|
ancestor_timeline.wait_lsn(start_lsn)?;
|
||||||
ancestor_timeline.wait_lsn(*lsn)?;
|
}
|
||||||
|
start_lsn = start_lsn.align();
|
||||||
|
|
||||||
let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
|
let ancestor_ancestor_lsn = ancestor_timeline.get_ancestor_lsn();
|
||||||
if ancestor_ancestor_lsn > *lsn {
|
if ancestor_ancestor_lsn > start_lsn {
|
||||||
// can we safely just branch from the ancestor instead?
|
// can we safely just branch from the ancestor instead?
|
||||||
anyhow::bail!(
|
anyhow::bail!(
|
||||||
"invalid start lsn {} for ancestor timeline {}: less than timeline ancestor lsn {}",
|
"invalid start lsn {} for ancestor timeline {}: less than timeline ancestor lsn {}",
|
||||||
lsn,
|
start_lsn,
|
||||||
ancestor_timeline_id,
|
ancestor_timeline_id,
|
||||||
ancestor_ancestor_lsn,
|
ancestor_ancestor_lsn,
|
||||||
);
|
);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
repo.branch_timeline(ancestor_timeline_id, new_timeline_id, start_lsn)?;
|
||||||
repo.branch_timeline(ancestor_timeline_id, new_timeline_id, ancestor_start_lsn)?;
|
|
||||||
// load the timeline into memory
|
// load the timeline into memory
|
||||||
let loaded_timeline =
|
let loaded_timeline =
|
||||||
tenant_mgr::get_local_timeline_with_load(tenant_id, new_timeline_id)?;
|
tenant_mgr::get_local_timeline_with_load(tenant_id, new_timeline_id)?;
|
||||||
|
|||||||
@@ -91,6 +91,7 @@ pub fn init_wal_receiver_main_thread(
|
|||||||
|
|
||||||
let runtime = tokio::runtime::Builder::new_multi_thread()
|
let runtime = tokio::runtime::Builder::new_multi_thread()
|
||||||
.thread_name("wal-receiver-runtime-thread")
|
.thread_name("wal-receiver-runtime-thread")
|
||||||
|
.worker_threads(40)
|
||||||
.enable_all()
|
.enable_all()
|
||||||
.on_thread_start(|| IS_WAL_RECEIVER.with(|c| c.set(true)))
|
.on_thread_start(|| IS_WAL_RECEIVER.with(|c| c.set(true)))
|
||||||
.build()
|
.build()
|
||||||
@@ -264,10 +265,7 @@ async fn wal_receiver_main_thread_loop_step<'a>(
|
|||||||
info!("Processing timeline update: {update:?}");
|
info!("Processing timeline update: {update:?}");
|
||||||
match update {
|
match update {
|
||||||
// Timeline got detached, stop all related tasks and remove public timeline data.
|
// Timeline got detached, stop all related tasks and remove public timeline data.
|
||||||
LocalTimelineUpdate::Detach {
|
LocalTimelineUpdate::Detach(id) => {
|
||||||
id,
|
|
||||||
join_confirmation_sender,
|
|
||||||
} => {
|
|
||||||
match local_timeline_wal_receivers.get_mut(&id.tenant_id) {
|
match local_timeline_wal_receivers.get_mut(&id.tenant_id) {
|
||||||
Some(wal_receivers) => {
|
Some(wal_receivers) => {
|
||||||
if let hash_map::Entry::Occupied(o) = wal_receivers.entry(id.timeline_id) {
|
if let hash_map::Entry::Occupied(o) = wal_receivers.entry(id.timeline_id) {
|
||||||
@@ -283,48 +281,44 @@ async fn wal_receiver_main_thread_loop_step<'a>(
|
|||||||
};
|
};
|
||||||
{
|
{
|
||||||
WAL_RECEIVER_ENTRIES.write().await.remove(&id);
|
WAL_RECEIVER_ENTRIES.write().await.remove(&id);
|
||||||
if let Err(e) = join_confirmation_sender.send(()) {
|
|
||||||
warn!("cannot send wal_receiver shutdown confirmation {e}")
|
|
||||||
} else {
|
|
||||||
info!("confirm walreceiver shutdown for {id}");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Timeline got attached, retrieve all necessary information to start its broker loop and maintain this loop endlessly.
|
// Timeline got attached, retrieve all necessary information to start its broker loop and maintain this loop endlessly.
|
||||||
LocalTimelineUpdate::Attach { id, datadir } => {
|
LocalTimelineUpdate::Attach(new_id, new_timeline) => {
|
||||||
let timeline_connection_managers = local_timeline_wal_receivers
|
let timeline_connection_managers = local_timeline_wal_receivers
|
||||||
.entry(id.tenant_id)
|
.entry(new_id.tenant_id)
|
||||||
.or_default();
|
.or_default();
|
||||||
|
|
||||||
if timeline_connection_managers.is_empty() {
|
if timeline_connection_managers.is_empty() {
|
||||||
if let Err(e) = change_tenant_state(id.tenant_id, TenantState::Active).await
|
if let Err(e) =
|
||||||
|
change_tenant_state(new_id.tenant_id, TenantState::Active).await
|
||||||
{
|
{
|
||||||
error!("Failed to make tenant active for id {id}: {e:#}");
|
error!("Failed to make tenant active for id {new_id}: {e:#}");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let vacant_connection_manager_entry =
|
let vacant_connection_manager_entry =
|
||||||
match timeline_connection_managers.entry(id.timeline_id) {
|
match timeline_connection_managers.entry(new_id.timeline_id) {
|
||||||
hash_map::Entry::Occupied(_) => {
|
hash_map::Entry::Occupied(_) => {
|
||||||
debug!("Attepted to readd an existing timeline {id}, ignoring");
|
debug!("Attepted to readd an existing timeline {new_id}, ignoring");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
hash_map::Entry::Vacant(v) => v,
|
hash_map::Entry::Vacant(v) => v,
|
||||||
};
|
};
|
||||||
|
|
||||||
let (wal_connect_timeout, lagging_wal_timeout, max_lsn_wal_lag) =
|
let (wal_connect_timeout, lagging_wal_timeout, max_lsn_wal_lag) =
|
||||||
match fetch_tenant_settings(id.tenant_id).await {
|
match fetch_tenant_settings(new_id.tenant_id).await {
|
||||||
Ok(settings) => settings,
|
Ok(settings) => settings,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Failed to fetch tenant settings for id {id}: {e:#}");
|
error!("Failed to fetch tenant settings for id {new_id}: {e:#}");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
{
|
{
|
||||||
WAL_RECEIVER_ENTRIES.write().await.insert(
|
WAL_RECEIVER_ENTRIES.write().await.insert(
|
||||||
id,
|
new_id,
|
||||||
WalReceiverEntry {
|
WalReceiverEntry {
|
||||||
wal_producer_connstr: None,
|
wal_producer_connstr: None,
|
||||||
last_received_msg_lsn: None,
|
last_received_msg_lsn: None,
|
||||||
@@ -335,10 +329,10 @@ async fn wal_receiver_main_thread_loop_step<'a>(
|
|||||||
|
|
||||||
vacant_connection_manager_entry.insert(
|
vacant_connection_manager_entry.insert(
|
||||||
connection_manager::spawn_connection_manager_task(
|
connection_manager::spawn_connection_manager_task(
|
||||||
id,
|
new_id,
|
||||||
broker_prefix.to_owned(),
|
broker_prefix.to_owned(),
|
||||||
etcd_client.clone(),
|
etcd_client.clone(),
|
||||||
datadir,
|
new_timeline,
|
||||||
wal_connect_timeout,
|
wal_connect_timeout,
|
||||||
lagging_wal_timeout,
|
lagging_wal_timeout,
|
||||||
max_lsn_wal_lag,
|
max_lsn_wal_lag,
|
||||||
|
|||||||
@@ -623,7 +623,6 @@ impl PostgresRedoProcess {
|
|||||||
.env_clear()
|
.env_clear()
|
||||||
.env("LD_LIBRARY_PATH", conf.pg_lib_dir())
|
.env("LD_LIBRARY_PATH", conf.pg_lib_dir())
|
||||||
.env("DYLD_LIBRARY_PATH", conf.pg_lib_dir())
|
.env("DYLD_LIBRARY_PATH", conf.pg_lib_dir())
|
||||||
.close_fds()
|
|
||||||
.output()
|
.output()
|
||||||
.map_err(|e| Error::new(e.kind(), format!("failed to execute initdb: {}", e)))?;
|
.map_err(|e| Error::new(e.kind(), format!("failed to execute initdb: {}", e)))?;
|
||||||
|
|
||||||
|
|||||||
438
scripts/add_missing_rels.py
Normal file
438
scripts/add_missing_rels.py
Normal file
@@ -0,0 +1,438 @@
|
|||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
import tempfile
|
||||||
|
from contextlib import closing
|
||||||
|
import psycopg2
|
||||||
|
import subprocess
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
### utils copied from test fixtures
|
||||||
|
from typing import Any, List
|
||||||
|
from psycopg2.extensions import connection as PgConnection
|
||||||
|
import asyncpg
|
||||||
|
from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple
|
||||||
|
|
||||||
|
Env = Dict[str, str]
|
||||||
|
|
||||||
|
_global_counter = 0
|
||||||
|
|
||||||
|
|
||||||
|
def global_counter() -> int:
|
||||||
|
""" A really dumb global counter.
|
||||||
|
|
||||||
|
This is useful for giving output files a unique number, so if we run the
|
||||||
|
same command multiple times we can keep their output separate.
|
||||||
|
"""
|
||||||
|
global _global_counter
|
||||||
|
_global_counter += 1
|
||||||
|
return _global_counter
|
||||||
|
|
||||||
|
|
||||||
|
def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
|
||||||
|
""" Run a process and capture its output
|
||||||
|
|
||||||
|
Output will go to files named "cmd_NNN.stdout" and "cmd_NNN.stderr"
|
||||||
|
where "cmd" is the name of the program and NNN is an incrementing
|
||||||
|
counter.
|
||||||
|
|
||||||
|
If those files already exist, we will overwrite them.
|
||||||
|
Returns basepath for files with captured output.
|
||||||
|
"""
|
||||||
|
assert type(cmd) is list
|
||||||
|
base = os.path.basename(cmd[0]) + '_{}'.format(global_counter())
|
||||||
|
basepath = os.path.join(capture_dir, base)
|
||||||
|
stdout_filename = basepath + '.stdout'
|
||||||
|
stderr_filename = basepath + '.stderr'
|
||||||
|
|
||||||
|
with open(stdout_filename, 'w') as stdout_f:
|
||||||
|
with open(stderr_filename, 'w') as stderr_f:
|
||||||
|
print('(capturing output to "{}.stdout")'.format(base))
|
||||||
|
subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
|
||||||
|
|
||||||
|
return basepath
|
||||||
|
|
||||||
|
|
||||||
|
class PgBin:
|
||||||
|
""" A helper class for executing postgres binaries """
|
||||||
|
def __init__(self, log_dir: Path, pg_distrib_dir):
|
||||||
|
self.log_dir = log_dir
|
||||||
|
self.pg_bin_path = os.path.join(str(pg_distrib_dir), 'bin')
|
||||||
|
self.env = os.environ.copy()
|
||||||
|
self.env['LD_LIBRARY_PATH'] = os.path.join(str(pg_distrib_dir), 'lib')
|
||||||
|
|
||||||
|
def _fixpath(self, command: List[str]):
|
||||||
|
if '/' not in command[0]:
|
||||||
|
command[0] = os.path.join(self.pg_bin_path, command[0])
|
||||||
|
|
||||||
|
def _build_env(self, env_add: Optional[Env]) -> Env:
|
||||||
|
if env_add is None:
|
||||||
|
return self.env
|
||||||
|
env = self.env.copy()
|
||||||
|
env.update(env_add)
|
||||||
|
return env
|
||||||
|
|
||||||
|
def run(self, command: List[str], env: Optional[Env] = None, cwd: Optional[str] = None):
|
||||||
|
"""
|
||||||
|
Run one of the postgres binaries.
|
||||||
|
|
||||||
|
The command should be in list form, e.g. ['pgbench', '-p', '55432']
|
||||||
|
|
||||||
|
All the necessary environment variables will be set.
|
||||||
|
|
||||||
|
If the first argument (the command name) doesn't include a path (no '/'
|
||||||
|
characters present), then it will be edited to include the correct path.
|
||||||
|
|
||||||
|
If you want stdout/stderr captured to files, use `run_capture` instead.
|
||||||
|
"""
|
||||||
|
|
||||||
|
self._fixpath(command)
|
||||||
|
print('Running command "{}"'.format(' '.join(command)))
|
||||||
|
env = self._build_env(env)
|
||||||
|
subprocess.run(command, env=env, cwd=cwd, check=True)
|
||||||
|
|
||||||
|
def run_capture(self,
|
||||||
|
command: List[str],
|
||||||
|
env: Optional[Env] = None,
|
||||||
|
cwd: Optional[str] = None,
|
||||||
|
**kwargs: Any) -> str:
|
||||||
|
"""
|
||||||
|
Run one of the postgres binaries, with stderr and stdout redirected to a file.
|
||||||
|
|
||||||
|
This is just like `run`, but for chatty programs. Returns basepath for files
|
||||||
|
with captured output.
|
||||||
|
"""
|
||||||
|
|
||||||
|
self._fixpath(command)
|
||||||
|
print('Running command "{}"'.format(' '.join(command)))
|
||||||
|
env = self._build_env(env)
|
||||||
|
return subprocess_capture(str(self.log_dir),
|
||||||
|
command,
|
||||||
|
env=env,
|
||||||
|
cwd=cwd,
|
||||||
|
check=True,
|
||||||
|
**kwargs)
|
||||||
|
|
||||||
|
class PgProtocol:
|
||||||
|
""" Reusable connection logic """
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
self.default_options = kwargs
|
||||||
|
|
||||||
|
def connstr(self, **kwargs) -> str:
|
||||||
|
"""
|
||||||
|
Build a libpq connection string for the Postgres instance.
|
||||||
|
"""
|
||||||
|
return str(make_dsn(**self.conn_options(**kwargs)))
|
||||||
|
|
||||||
|
def conn_options(self, **kwargs):
|
||||||
|
conn_options = self.default_options.copy()
|
||||||
|
if 'dsn' in kwargs:
|
||||||
|
conn_options.update(parse_dsn(kwargs['dsn']))
|
||||||
|
conn_options.update(kwargs)
|
||||||
|
|
||||||
|
# Individual statement timeout in seconds. 2 minutes should be
|
||||||
|
# enough for our tests, but if you need a longer, you can
|
||||||
|
# change it by calling "SET statement_timeout" after
|
||||||
|
# connecting.
|
||||||
|
if 'options' in conn_options:
|
||||||
|
conn_options['options'] = f"-cstatement_timeout=120s " + conn_options['options']
|
||||||
|
else:
|
||||||
|
conn_options['options'] = "-cstatement_timeout=120s"
|
||||||
|
return conn_options
|
||||||
|
|
||||||
|
# autocommit=True here by default because that's what we need most of the time
|
||||||
|
def connect(self, autocommit=True, **kwargs) -> PgConnection:
|
||||||
|
"""
|
||||||
|
Connect to the node.
|
||||||
|
Returns psycopg2's connection object.
|
||||||
|
This method passes all extra params to connstr.
|
||||||
|
"""
|
||||||
|
conn = psycopg2.connect(**self.conn_options(**kwargs))
|
||||||
|
|
||||||
|
# WARNING: this setting affects *all* tests!
|
||||||
|
conn.autocommit = autocommit
|
||||||
|
return conn
|
||||||
|
|
||||||
|
async def connect_async(self, **kwargs) -> asyncpg.Connection:
|
||||||
|
"""
|
||||||
|
Connect to the node from async python.
|
||||||
|
Returns asyncpg's connection object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# asyncpg takes slightly different options than psycopg2. Try
|
||||||
|
# to convert the defaults from the psycopg2 format.
|
||||||
|
|
||||||
|
# The psycopg2 option 'dbname' is called 'database' is asyncpg
|
||||||
|
conn_options = self.conn_options(**kwargs)
|
||||||
|
if 'dbname' in conn_options:
|
||||||
|
conn_options['database'] = conn_options.pop('dbname')
|
||||||
|
|
||||||
|
# Convert options='-c<key>=<val>' to server_settings
|
||||||
|
if 'options' in conn_options:
|
||||||
|
options = conn_options.pop('options')
|
||||||
|
for match in re.finditer('-c(\w*)=(\w*)', options):
|
||||||
|
key = match.group(1)
|
||||||
|
val = match.group(2)
|
||||||
|
if 'server_options' in conn_options:
|
||||||
|
conn_options['server_settings'].update({key: val})
|
||||||
|
else:
|
||||||
|
conn_options['server_settings'] = {key: val}
|
||||||
|
return await asyncpg.connect(**conn_options)
|
||||||
|
|
||||||
|
def safe_psql(self, query: str, **kwargs: Any) -> List[Tuple[Any, ...]]:
|
||||||
|
"""
|
||||||
|
Execute query against the node and return all rows.
|
||||||
|
This method passes all extra params to connstr.
|
||||||
|
"""
|
||||||
|
return self.safe_psql_many([query], **kwargs)[0]
|
||||||
|
|
||||||
|
def safe_psql_many(self, queries: List[str], **kwargs: Any) -> List[List[Tuple[Any, ...]]]:
|
||||||
|
"""
|
||||||
|
Execute queries against the node and return all rows.
|
||||||
|
This method passes all extra params to connstr.
|
||||||
|
"""
|
||||||
|
result: List[List[Any]] = []
|
||||||
|
with closing(self.connect(**kwargs)) as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
for query in queries:
|
||||||
|
print(f"Executing query: {query}")
|
||||||
|
cur.execute(query)
|
||||||
|
|
||||||
|
if cur.description is None:
|
||||||
|
result.append([]) # query didn't return data
|
||||||
|
else:
|
||||||
|
result.append(cast(List[Any], cur.fetchall()))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
class VanillaPostgres(PgProtocol):
|
||||||
|
def __init__(self, pgdatadir: Path, pg_bin: PgBin, port: int, init=True):
|
||||||
|
super().__init__(host='localhost', port=port, dbname='postgres')
|
||||||
|
self.pgdatadir = pgdatadir
|
||||||
|
self.pg_bin = pg_bin
|
||||||
|
self.running = False
|
||||||
|
if init:
|
||||||
|
self.pg_bin.run_capture(['initdb', '-D', str(pgdatadir)])
|
||||||
|
self.configure([f"port = {port}\n"])
|
||||||
|
|
||||||
|
def configure(self, options: List[str]):
|
||||||
|
"""Append lines into postgresql.conf file."""
|
||||||
|
assert not self.running
|
||||||
|
with open(os.path.join(self.pgdatadir, 'postgresql.conf'), 'a') as conf_file:
|
||||||
|
conf_file.write("\n".join(options))
|
||||||
|
|
||||||
|
def start(self, log_path: Optional[str] = None):
|
||||||
|
assert not self.running
|
||||||
|
self.running = True
|
||||||
|
|
||||||
|
if log_path is None:
|
||||||
|
log_path = os.path.join(self.pgdatadir, "pg.log")
|
||||||
|
|
||||||
|
self.pg_bin.run_capture(
|
||||||
|
['pg_ctl', '-w', '-D', str(self.pgdatadir), '-l', log_path, 'start'])
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
assert self.running
|
||||||
|
self.running = False
|
||||||
|
self.pg_bin.run_capture(['pg_ctl', '-w', '-D', str(self.pgdatadir), 'stop'])
|
||||||
|
|
||||||
|
def get_subdir_size(self, subdir) -> int:
|
||||||
|
"""Return size of pgdatadir subdirectory in bytes."""
|
||||||
|
return get_dir_size(os.path.join(self.pgdatadir, subdir))
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc, tb):
|
||||||
|
if self.running:
|
||||||
|
self.stop()
|
||||||
|
|
||||||
|
|
||||||
|
### actual code
|
||||||
|
|
||||||
|
|
||||||
|
def get_rel_paths(log_dir, pg_bin, base_tar):
|
||||||
|
"""Yeild list of relation paths"""
|
||||||
|
with tempfile.TemporaryDirectory() as restored_dir:
|
||||||
|
# Unpack the base tar
|
||||||
|
subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
|
||||||
|
|
||||||
|
port = "55439" # Probably free
|
||||||
|
with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg:
|
||||||
|
vanilla_pg.configure([f"port={port}"])
|
||||||
|
vanilla_pg.start()
|
||||||
|
|
||||||
|
# Create database based on template0 because we can't connect to template0
|
||||||
|
query = "create database template0copy template template0"
|
||||||
|
vanilla_pg.safe_psql(query, user="cloud_admin")
|
||||||
|
vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin")
|
||||||
|
|
||||||
|
# Get all databases
|
||||||
|
query = "select oid, datname from pg_database"
|
||||||
|
oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin")
|
||||||
|
template0_oid = [
|
||||||
|
oid
|
||||||
|
for (oid, database) in oid_dbname_pairs
|
||||||
|
if database == "template0"
|
||||||
|
][0]
|
||||||
|
|
||||||
|
# Get rel paths for each database
|
||||||
|
for oid, database in oid_dbname_pairs:
|
||||||
|
if database == "template0":
|
||||||
|
# We can't connect to template0
|
||||||
|
continue
|
||||||
|
|
||||||
|
query = "select relname, pg_relation_filepath(oid) from pg_class"
|
||||||
|
result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database)
|
||||||
|
for relname, filepath in result:
|
||||||
|
if filepath is not None:
|
||||||
|
|
||||||
|
if database == "template0copy":
|
||||||
|
# Add all template0copy paths to template0
|
||||||
|
prefix = f"base/{oid}/"
|
||||||
|
if filepath.startswith(prefix):
|
||||||
|
suffix = filepath[len(prefix):]
|
||||||
|
yield f"base/{template0_oid}/{suffix}"
|
||||||
|
elif filepath.startswith("global"):
|
||||||
|
print(f"skipping {database} global file {filepath}")
|
||||||
|
else:
|
||||||
|
raise AssertionError
|
||||||
|
else:
|
||||||
|
yield filepath
|
||||||
|
|
||||||
|
|
||||||
|
def pack_base(log_dir, restored_dir, output_tar):
|
||||||
|
tmp_tar_name = "tmp.tar"
|
||||||
|
tmp_tar_path = os.path.join(restored_dir, tmp_tar_name)
|
||||||
|
cmd = ["tar", "-cf", tmp_tar_name] + os.listdir(restored_dir)
|
||||||
|
subprocess_capture(log_dir, cmd, cwd=restored_dir)
|
||||||
|
shutil.move(tmp_tar_path, output_tar)
|
||||||
|
|
||||||
|
|
||||||
|
def get_files_in_tar(log_dir, tar):
|
||||||
|
with tempfile.TemporaryDirectory() as restored_dir:
|
||||||
|
# Unpack the base tar
|
||||||
|
subprocess_capture(log_dir, ["tar", "-xf", tar, "-C", restored_dir])
|
||||||
|
|
||||||
|
# Find empty files
|
||||||
|
empty_files = []
|
||||||
|
for root, dirs, files in os.walk(restored_dir):
|
||||||
|
for name in files:
|
||||||
|
file_path = os.path.join(root, name)
|
||||||
|
yield file_path[len(restored_dir) + 1:]
|
||||||
|
|
||||||
|
|
||||||
|
def corrupt(log_dir, base_tar, output_tar):
|
||||||
|
"""Remove all empty files and repackage. Return paths of files removed."""
|
||||||
|
with tempfile.TemporaryDirectory() as restored_dir:
|
||||||
|
# Unpack the base tar
|
||||||
|
subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
|
||||||
|
|
||||||
|
# Find empty files
|
||||||
|
empty_files = []
|
||||||
|
for root, dirs, files in os.walk(restored_dir):
|
||||||
|
for name in files:
|
||||||
|
file_path = os.path.join(root, name)
|
||||||
|
file_size = os.path.getsize(file_path)
|
||||||
|
if file_size == 0:
|
||||||
|
empty_files.append(file_path)
|
||||||
|
|
||||||
|
# Delete empty files (just to see if they get recreated)
|
||||||
|
for empty_file in empty_files:
|
||||||
|
os.remove(empty_file)
|
||||||
|
|
||||||
|
# Repackage
|
||||||
|
pack_base(log_dir, restored_dir, output_tar)
|
||||||
|
|
||||||
|
# Return relative paths
|
||||||
|
return {
|
||||||
|
empty_file[len(restored_dir) + 1:]
|
||||||
|
for empty_file in empty_files
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths):
|
||||||
|
with tempfile.TemporaryDirectory() as restored_dir:
|
||||||
|
# Unpack the base tar
|
||||||
|
subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir])
|
||||||
|
|
||||||
|
# Touch files that don't exist
|
||||||
|
for path in paths:
|
||||||
|
absolute_path = os.path.join(restored_dir, path)
|
||||||
|
exists = os.path.exists(absolute_path)
|
||||||
|
if not exists:
|
||||||
|
print("File {absolute_path} didn't exist. Creating..")
|
||||||
|
Path(absolute_path).touch()
|
||||||
|
|
||||||
|
# Repackage
|
||||||
|
pack_base(log_dir, restored_dir, output_tar)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO this test is not currently called. It needs any ordinary base.tar path as input
|
||||||
|
def test_add_missing_rels(base_tar):
|
||||||
|
output_tar = base_tar + ".fixed"
|
||||||
|
|
||||||
|
# Create new base tar with missing empty files
|
||||||
|
corrupt_tar = os.path.join(test_output_dir, "psql_2-corrupted.stdout")
|
||||||
|
deleted_files = corrupt(test_output_dir, base_tar, corrupt_tar)
|
||||||
|
assert len(set(get_files_in_tar(test_output_dir, base_tar)) -
|
||||||
|
set(get_files_in_tar(test_output_dir, corrupt_tar))) > 0
|
||||||
|
|
||||||
|
# Reconstruct paths from the corrupted tar, assert it covers everything important
|
||||||
|
reconstructed_paths = set(get_rel_paths(test_output_dir, pg_bin, corrupt_tar))
|
||||||
|
paths_missed = deleted_files - reconstructed_paths
|
||||||
|
assert paths_missed.issubset({
|
||||||
|
"postgresql.auto.conf",
|
||||||
|
"pg_ident.conf",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Recreate the correct tar by touching files, compare with original tar
|
||||||
|
touch_missing_rels(test_output_dir, corrupt_tar, output_tar, reconstructed_paths)
|
||||||
|
paths_missed = (set(get_files_in_tar(test_output_dir, base_tar)) -
|
||||||
|
set(get_files_in_tar(test_output_dir, output_tar)))
|
||||||
|
assert paths_missed.issubset({
|
||||||
|
"postgresql.auto.conf",
|
||||||
|
"pg_ident.conf",
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# Example command:
|
||||||
|
# poetry run python scripts/add_missing_rels.py \
|
||||||
|
# --base-tar /home/bojan/src/neondatabase/neon/test_output/test_import_from_pageserver/psql_2.stdout \
|
||||||
|
# --output-tar output-base.tar \
|
||||||
|
# --log-dir /home/bojan/tmp
|
||||||
|
# --pg-distrib-dir /home/bojan/src/neondatabase/neon/tmp_install/
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
'--base-tar',
|
||||||
|
dest='base_tar',
|
||||||
|
required=True,
|
||||||
|
help='base.tar file to add missing rels to (file will not be modified)',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--output-tar',
|
||||||
|
dest='output_tar',
|
||||||
|
required=True,
|
||||||
|
help='path and name for the output base.tar file',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--log-dir',
|
||||||
|
dest='log_dir',
|
||||||
|
required=True,
|
||||||
|
help='directory to save log files in',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--pg-distrib-dir',
|
||||||
|
dest='pg_distrib_dir',
|
||||||
|
required=True,
|
||||||
|
help='directory where postgres is installed',
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
base_tar = args.base_tar
|
||||||
|
output_tar = args.output_tar
|
||||||
|
log_dir = args.log_dir
|
||||||
|
pg_bin = PgBin(log_dir, args.pg_distrib_dir)
|
||||||
|
|
||||||
|
reconstructed_paths = set(get_rel_paths(log_dir, pg_bin, base_tar))
|
||||||
|
touch_missing_rels(log_dir, base_tar, output_tar, reconstructed_paths)
|
||||||
232
scripts/export_import_betwen_pageservers.py
Executable file
232
scripts/export_import_betwen_pageservers.py
Executable file
@@ -0,0 +1,232 @@
|
|||||||
|
#
|
||||||
|
# Simple script to export nodes from one pageserver
|
||||||
|
# and import them into another page server
|
||||||
|
#
|
||||||
|
from os import path
|
||||||
|
import os
|
||||||
|
import requests
|
||||||
|
import uuid
|
||||||
|
import subprocess
|
||||||
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# directory to save exported tar files to
|
||||||
|
basepath = path.dirname(path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
|
class NeonPageserverApiException(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class NeonPageserverHttpClient(requests.Session):
|
||||||
|
def __init__(self, host, port):
|
||||||
|
super().__init__()
|
||||||
|
self.host = host
|
||||||
|
self.port = port
|
||||||
|
|
||||||
|
def verbose_error(self, res: requests.Response):
|
||||||
|
try:
|
||||||
|
res.raise_for_status()
|
||||||
|
except requests.RequestException as e:
|
||||||
|
try:
|
||||||
|
msg = res.json()['msg']
|
||||||
|
except:
|
||||||
|
msg = ''
|
||||||
|
raise NeonPageserverApiException(msg) from e
|
||||||
|
|
||||||
|
def check_status(self):
|
||||||
|
self.get(f"http://{self.host}:{self.port}/v1/status").raise_for_status()
|
||||||
|
|
||||||
|
def tenant_list(self):
|
||||||
|
res = self.get(f"http://{self.host}:{self.port}/v1/tenant")
|
||||||
|
self.verbose_error(res)
|
||||||
|
res_json = res.json()
|
||||||
|
assert isinstance(res_json, list)
|
||||||
|
return res_json
|
||||||
|
|
||||||
|
def tenant_create(self, new_tenant_id: uuid.UUID, ok_if_exists):
|
||||||
|
res = self.post(
|
||||||
|
f"http://{self.host}:{self.port}/v1/tenant",
|
||||||
|
json={
|
||||||
|
'new_tenant_id': new_tenant_id.hex,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if res.status_code == 409:
|
||||||
|
if ok_if_exists:
|
||||||
|
print(f'could not create tenant: already exists for id {new_tenant_id}')
|
||||||
|
else:
|
||||||
|
res.raise_for_status()
|
||||||
|
elif res.status_code == 201:
|
||||||
|
print(f'created tenant {new_tenant_id}')
|
||||||
|
else:
|
||||||
|
self.verbose_error(res)
|
||||||
|
|
||||||
|
return new_tenant_id
|
||||||
|
|
||||||
|
def timeline_list(self, tenant_id: uuid.UUID):
|
||||||
|
res = self.get(f"http://{self.host}:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
|
||||||
|
self.verbose_error(res)
|
||||||
|
res_json = res.json()
|
||||||
|
assert isinstance(res_json, list)
|
||||||
|
return res_json
|
||||||
|
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
import os
|
||||||
|
def add_missing_empty_rels(base_tar, output_tar):
|
||||||
|
os.environ['INPUT_BASE_TAR'] = base_tar
|
||||||
|
os.environ['OUTPUT_BASE_TAR'] = output_tar
|
||||||
|
pytest.main(["-s", "-k", "test_main_hack"])
|
||||||
|
|
||||||
|
|
||||||
|
def main(args: argparse.Namespace):
|
||||||
|
old_pageserver_host = args.old_pageserver_host
|
||||||
|
new_pageserver_host = args.new_pageserver_host
|
||||||
|
tenants = args.tenants
|
||||||
|
|
||||||
|
old_http_client = NeonPageserverHttpClient(old_pageserver_host, args.old_pageserver_http_port)
|
||||||
|
old_http_client.check_status()
|
||||||
|
old_pageserver_connstr = f"postgresql://{old_pageserver_host}:{args.old_pageserver_pg_port}"
|
||||||
|
|
||||||
|
new_http_client = NeonPageserverHttpClient(new_pageserver_host, args.new_pageserver_http_port)
|
||||||
|
new_http_client.check_status()
|
||||||
|
new_pageserver_connstr = f"postgresql://{new_pageserver_host}:{args.new_pageserver_pg_port}"
|
||||||
|
|
||||||
|
psql_env = {**os.environ, 'LD_LIBRARY_PATH': '/usr/local/lib/'}
|
||||||
|
|
||||||
|
for tenant_id in tenants:
|
||||||
|
print(f"Tenant: {tenant_id}")
|
||||||
|
timelines = old_http_client.timeline_list(uuid.UUID(tenant_id))
|
||||||
|
print(f"Timelines: {timelines}")
|
||||||
|
|
||||||
|
# Create tenant in new pageserver
|
||||||
|
if args.only_import is False:
|
||||||
|
new_http_client.tenant_create(uuid.UUID(tenant_id), args.ok_if_exists)
|
||||||
|
|
||||||
|
for timeline in timelines:
|
||||||
|
|
||||||
|
# Export timelines from old pageserver
|
||||||
|
if args.only_import is False:
|
||||||
|
query = f"fullbackup {timeline['tenant_id']} {timeline['timeline_id']} {timeline['local']['last_record_lsn']}"
|
||||||
|
|
||||||
|
cmd = [args.psql_path, "--no-psqlrc", old_pageserver_connstr, "-c", query]
|
||||||
|
print(f"Running: {cmd}")
|
||||||
|
|
||||||
|
tar_filename = path.join(basepath,
|
||||||
|
f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
|
||||||
|
stderr_filename = path.join(
|
||||||
|
basepath, f"{timeline['tenant_id']}_{timeline['timeline_id']}.stderr")
|
||||||
|
|
||||||
|
with open(tar_filename, 'w') as stdout_f:
|
||||||
|
with open(stderr_filename, 'w') as stderr_f:
|
||||||
|
print(f"(capturing output to {tar_filename})")
|
||||||
|
subprocess.run(cmd, stdout=stdout_f, stderr=stderr_f, env=psql_env)
|
||||||
|
|
||||||
|
# add_missing_emtpy_rels(incomplete_tar_filename, tar_filename)
|
||||||
|
|
||||||
|
print(f"Done export: {tar_filename}")
|
||||||
|
|
||||||
|
# Import timelines to new pageserver
|
||||||
|
psql_path = Path(args.psql_path)
|
||||||
|
import_cmd = f"import basebackup {timeline['tenant_id']} {timeline['timeline_id']} {timeline['local']['last_record_lsn']} {timeline['local']['last_record_lsn']}"
|
||||||
|
tar_filename = path.join(basepath,
|
||||||
|
f"{timeline['tenant_id']}_{timeline['timeline_id']}.tar")
|
||||||
|
full_cmd = rf"""cat {tar_filename} | {psql_path} {new_pageserver_connstr} -c '{import_cmd}' """
|
||||||
|
|
||||||
|
stderr_filename2 = path.join(
|
||||||
|
basepath, f"import_{timeline['tenant_id']}_{timeline['timeline_id']}.stderr")
|
||||||
|
stdout_filename = path.join(
|
||||||
|
basepath, f"import_{timeline['tenant_id']}_{timeline['timeline_id']}.stdout")
|
||||||
|
|
||||||
|
print(f"Running: {full_cmd}")
|
||||||
|
|
||||||
|
with open(stdout_filename, 'w') as stdout_f:
|
||||||
|
with open(stderr_filename2, 'w') as stderr_f:
|
||||||
|
print(f"(capturing output to {stdout_filename})")
|
||||||
|
subprocess.run(full_cmd,
|
||||||
|
stdout=stdout_f,
|
||||||
|
stderr=stderr_f,
|
||||||
|
env=psql_env,
|
||||||
|
shell=True)
|
||||||
|
|
||||||
|
print(f"Done import")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
'--tenant-id',
|
||||||
|
dest='tenants',
|
||||||
|
required=True,
|
||||||
|
nargs='+',
|
||||||
|
help='Id of the tenant to migrate. You can pass multiple arguments',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--from-host',
|
||||||
|
dest='old_pageserver_host',
|
||||||
|
required=True,
|
||||||
|
help='Host of the pageserver to migrate data from',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--from-http-port',
|
||||||
|
dest='old_pageserver_http_port',
|
||||||
|
required=False,
|
||||||
|
type=int,
|
||||||
|
default=9898,
|
||||||
|
help='HTTP port of the pageserver to migrate data from. Default: 9898',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--from-pg-port',
|
||||||
|
dest='old_pageserver_pg_port',
|
||||||
|
required=False,
|
||||||
|
type=int,
|
||||||
|
default=6400,
|
||||||
|
help='pg port of the pageserver to migrate data from. Default: 6400',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--to-host',
|
||||||
|
dest='new_pageserver_host',
|
||||||
|
required=True,
|
||||||
|
help='Host of the pageserver to migrate data to',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--to-http-port',
|
||||||
|
dest='new_pageserver_http_port',
|
||||||
|
required=False,
|
||||||
|
default=9898,
|
||||||
|
type=int,
|
||||||
|
help='HTTP port of the pageserver to migrate data to. Default: 9898',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--to-pg-port',
|
||||||
|
dest='new_pageserver_pg_port',
|
||||||
|
required=False,
|
||||||
|
default=6400,
|
||||||
|
type=int,
|
||||||
|
help='pg port of the pageserver to migrate data to. Default: 6400',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--ignore-tenant-exists',
|
||||||
|
dest='ok_if_exists',
|
||||||
|
required=False,
|
||||||
|
help=
|
||||||
|
'Ignore error if we are trying to create the tenant that already exists. It can be dangerous if existing tenant already contains some data.',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--psql-path',
|
||||||
|
dest='psql_path',
|
||||||
|
required=False,
|
||||||
|
default='/usr/local/bin/psql',
|
||||||
|
help='Path to the psql binary. Default: /usr/local/bin/psql',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--only-import',
|
||||||
|
dest='only_import',
|
||||||
|
required=False,
|
||||||
|
default=False,
|
||||||
|
action='store_true',
|
||||||
|
help='Skip export and tenant creation part',
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
main(args)
|
||||||
@@ -105,3 +105,16 @@ def test_ancestor_branch(neon_env_builder: NeonEnvBuilder):
|
|||||||
|
|
||||||
branch2_cur.execute('SELECT count(*) FROM foo')
|
branch2_cur.execute('SELECT count(*) FROM foo')
|
||||||
assert branch2_cur.fetchone() == (300000, )
|
assert branch2_cur.fetchone() == (300000, )
|
||||||
|
|
||||||
|
|
||||||
|
def test_ancestor_branch_detach(neon_simple_env: NeonEnv):
|
||||||
|
env = neon_simple_env
|
||||||
|
|
||||||
|
parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_detach_parent", "empty")
|
||||||
|
|
||||||
|
env.neon_cli.create_branch("test_ancestor_branch_detach_branch1",
|
||||||
|
"test_ancestor_branch_detach_parent")
|
||||||
|
|
||||||
|
ps_http = env.pageserver.http_client()
|
||||||
|
with pytest.raises(NeonPageserverApiException, match="Failed to detach inmem tenant timeline"):
|
||||||
|
ps_http.timeline_detach(env.initial_tenant, parent_timeline_id)
|
||||||
|
|||||||
@@ -1,101 +0,0 @@
|
|||||||
from fixtures.log_helper import log
|
|
||||||
from fixtures.neon_fixtures import NeonEnv
|
|
||||||
from fixtures.utils import lsn_from_hex
|
|
||||||
|
|
||||||
|
|
||||||
# Test the GC implementation when running with branching.
|
|
||||||
# This test reproduces the issue https://github.com/neondatabase/neon/issues/707.
|
|
||||||
#
|
|
||||||
# Consider two LSNs `lsn1` and `lsn2` with some delta files as follows:
|
|
||||||
# ...
|
|
||||||
# p -> has an image layer xx_p with p < lsn1
|
|
||||||
# ...
|
|
||||||
# lsn1
|
|
||||||
# ...
|
|
||||||
# q -> has an image layer yy_q with lsn1 < q < lsn2
|
|
||||||
# ...
|
|
||||||
# lsn2
|
|
||||||
#
|
|
||||||
# Consider running a GC iteration such that the GC horizon is between p and lsn1
|
|
||||||
# ...
|
|
||||||
# p -> has an image layer xx_p with p < lsn1
|
|
||||||
# D_start -> is a delta layer D's start (e.g D = '...-...-D_start-D_end')
|
|
||||||
# ...
|
|
||||||
# GC_h -> is a gc horizon such that p < GC_h < lsn1
|
|
||||||
# ...
|
|
||||||
# lsn1
|
|
||||||
# ...
|
|
||||||
# D_end -> is a delta layer D's end
|
|
||||||
# ...
|
|
||||||
# q -> has an image layer yy_q with lsn1 < q < lsn2
|
|
||||||
# ...
|
|
||||||
# lsn2
|
|
||||||
#
|
|
||||||
# As described in the issue #707, the image layer xx_p will be deleted as
|
|
||||||
# its range is below the GC horizon and there exists a newer image layer yy_q (q > p).
|
|
||||||
# However, removing xx_p will corrupt any delta layers that depend on xx_p that
|
|
||||||
# are not deleted by GC. For example, the delta layer D is corrupted in the
|
|
||||||
# above example because D depends on the image layer xx_p for value reconstruction.
|
|
||||||
#
|
|
||||||
# Because the delta layer D covering lsn1 is corrupted, creating a branch
|
|
||||||
# starting from lsn1 should return an error as follows:
|
|
||||||
# could not find data for key ... at LSN ..., for request at LSN ...
|
|
||||||
def test_branch_and_gc(neon_simple_env: NeonEnv):
|
|
||||||
env = neon_simple_env
|
|
||||||
|
|
||||||
tenant, _ = env.neon_cli.create_tenant(
|
|
||||||
conf={
|
|
||||||
# disable background GC
|
|
||||||
'gc_period': '10 m',
|
|
||||||
'gc_horizon': f'{10 * 1024 ** 3}',
|
|
||||||
|
|
||||||
# small checkpoint distance to create more delta layer files
|
|
||||||
'checkpoint_distance': f'{1024 ** 2}',
|
|
||||||
|
|
||||||
# set the target size to be large to allow the image layer to cover the whole key space
|
|
||||||
'compaction_target_size': f'{1024 ** 3}',
|
|
||||||
|
|
||||||
# tweak the default settings to allow quickly create image layers and L1 layers
|
|
||||||
'compaction_period': '1 s',
|
|
||||||
'compaction_threshold': '2',
|
|
||||||
'image_creation_threshold': '1',
|
|
||||||
|
|
||||||
# set PITR interval to be small, so we can do GC
|
|
||||||
'pitr_interval': '1 s'
|
|
||||||
})
|
|
||||||
|
|
||||||
timeline_main = env.neon_cli.create_timeline(f'test_main', tenant_id=tenant)
|
|
||||||
pg_main = env.postgres.create_start('test_main', tenant_id=tenant)
|
|
||||||
|
|
||||||
main_cur = pg_main.connect().cursor()
|
|
||||||
|
|
||||||
main_cur.execute(
|
|
||||||
"CREATE TABLE foo(key serial primary key, t text default 'foooooooooooooooooooooooooooooooooooooooooooooooooooo')"
|
|
||||||
)
|
|
||||||
main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
|
|
||||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
|
||||||
lsn1 = main_cur.fetchone()[0]
|
|
||||||
log.info(f'LSN1: {lsn1}')
|
|
||||||
|
|
||||||
main_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
|
|
||||||
main_cur.execute('SELECT pg_current_wal_insert_lsn()')
|
|
||||||
lsn2 = main_cur.fetchone()[0]
|
|
||||||
log.info(f'LSN2: {lsn2}')
|
|
||||||
|
|
||||||
# Set the GC horizon so that lsn1 is inside the horizon, which means
|
|
||||||
# we can create a new branch starting from lsn1.
|
|
||||||
env.pageserver.safe_psql(
|
|
||||||
f'''do_gc {tenant.hex} {timeline_main.hex} {lsn_from_hex(lsn2) - lsn_from_hex(lsn1) + 1024}'''
|
|
||||||
)
|
|
||||||
|
|
||||||
env.neon_cli.create_branch('test_branch',
|
|
||||||
'test_main',
|
|
||||||
tenant_id=tenant,
|
|
||||||
ancestor_start_lsn=lsn1)
|
|
||||||
pg_branch = env.postgres.create_start('test_branch', tenant_id=tenant)
|
|
||||||
|
|
||||||
branch_cur = pg_branch.connect().cursor()
|
|
||||||
branch_cur.execute('INSERT INTO foo SELECT FROM generate_series(1, 100000)')
|
|
||||||
|
|
||||||
branch_cur.execute('SELECT count(*) FROM foo')
|
|
||||||
assert branch_cur.fetchone() == (200000, )
|
|
||||||
@@ -1,78 +0,0 @@
|
|||||||
from typing import List
|
|
||||||
import threading
|
|
||||||
import pytest
|
|
||||||
from fixtures.neon_fixtures import NeonEnv, PgBin, Postgres
|
|
||||||
import time
|
|
||||||
import random
|
|
||||||
from fixtures.log_helper import log
|
|
||||||
from performance.test_perf_pgbench import get_scales_matrix
|
|
||||||
|
|
||||||
|
|
||||||
# Test branch creation
|
|
||||||
#
|
|
||||||
# This test spawns pgbench in a thread in the background, and creates a branch while
|
|
||||||
# pgbench is running. Then it launches pgbench on the new branch, and creates another branch.
|
|
||||||
# Repeat `n_branches` times.
|
|
||||||
#
|
|
||||||
# If 'ty' == 'cascade', each branch is created from the previous branch, so that you end
|
|
||||||
# up with a branch of a branch of a branch ... of a branch. With 'ty' == 'flat',
|
|
||||||
# each branch is created from the root.
|
|
||||||
@pytest.mark.parametrize("n_branches", [10])
|
|
||||||
@pytest.mark.parametrize("scale", get_scales_matrix(1))
|
|
||||||
@pytest.mark.parametrize("ty", ["cascade", "flat"])
|
|
||||||
def test_branching_with_pgbench(neon_simple_env: NeonEnv,
|
|
||||||
pg_bin: PgBin,
|
|
||||||
n_branches: int,
|
|
||||||
scale: int,
|
|
||||||
ty: str):
|
|
||||||
env = neon_simple_env
|
|
||||||
|
|
||||||
# Use aggressive GC and checkpoint settings, so that we also exercise GC during the test
|
|
||||||
tenant, _ = env.neon_cli.create_tenant(
|
|
||||||
conf={
|
|
||||||
'gc_period': '5 s',
|
|
||||||
'gc_horizon': f'{1024 ** 2}',
|
|
||||||
'checkpoint_distance': f'{1024 ** 2}',
|
|
||||||
'compaction_target_size': f'{1024 ** 2}',
|
|
||||||
# set PITR interval to be small, so we can do GC
|
|
||||||
'pitr_interval': '5 s'
|
|
||||||
})
|
|
||||||
|
|
||||||
def run_pgbench(pg: Postgres):
|
|
||||||
connstr = pg.connstr()
|
|
||||||
|
|
||||||
log.info(f"Start a pgbench workload on pg {connstr}")
|
|
||||||
|
|
||||||
pg_bin.run_capture(['pgbench', '-i', f'-s{scale}', connstr])
|
|
||||||
pg_bin.run_capture(['pgbench', '-c10', '-T15', connstr])
|
|
||||||
|
|
||||||
env.neon_cli.create_branch('b0', tenant_id=tenant)
|
|
||||||
pgs: List[Postgres] = []
|
|
||||||
pgs.append(env.postgres.create_start('b0', tenant_id=tenant))
|
|
||||||
|
|
||||||
threads: List[threading.Thread] = []
|
|
||||||
threads.append(threading.Thread(target=run_pgbench, args=(pgs[0], ), daemon=True))
|
|
||||||
threads[-1].start()
|
|
||||||
|
|
||||||
for i in range(n_branches):
|
|
||||||
# random a delay between [0, 5]
|
|
||||||
delay = random.random() * 5
|
|
||||||
time.sleep(delay)
|
|
||||||
log.info(f"Sleep {delay}s")
|
|
||||||
|
|
||||||
if ty == "cascade":
|
|
||||||
env.neon_cli.create_branch('b{}'.format(i + 1), 'b{}'.format(i), tenant_id=tenant)
|
|
||||||
else:
|
|
||||||
env.neon_cli.create_branch('b{}'.format(i + 1), 'b0', tenant_id=tenant)
|
|
||||||
|
|
||||||
pgs.append(env.postgres.create_start('b{}'.format(i + 1), tenant_id=tenant))
|
|
||||||
|
|
||||||
threads.append(threading.Thread(target=run_pgbench, args=(pgs[-1], ), daemon=True))
|
|
||||||
threads[-1].start()
|
|
||||||
|
|
||||||
for thread in threads:
|
|
||||||
thread.join()
|
|
||||||
|
|
||||||
for pg in pgs:
|
|
||||||
res = pg.safe_psql('SELECT count(*) from pgbench_accounts')
|
|
||||||
assert res[0] == (100000 * scale, )
|
|
||||||
@@ -110,6 +110,6 @@ def test_fix_broken_timelines_on_startup(neon_simple_env: NeonEnv):
|
|||||||
env.neon_cli.pageserver_stop(immediate=True)
|
env.neon_cli.pageserver_stop(immediate=True)
|
||||||
env.neon_cli.pageserver_start()
|
env.neon_cli.pageserver_start()
|
||||||
|
|
||||||
# Check that tenant with "broken" timeline is not loaded.
|
# Check that the "broken" timeline is not loaded
|
||||||
with pytest.raises(Exception, match=f"Failed to get repo for tenant {tenant_id.hex}"):
|
timelines = env.neon_cli.list_timelines(tenant_id)
|
||||||
env.neon_cli.list_timelines(tenant_id)
|
assert len(timelines) == 1
|
||||||
|
|||||||
@@ -1,51 +0,0 @@
|
|||||||
from contextlib import closing
|
|
||||||
import shutil
|
|
||||||
import time
|
|
||||||
import subprocess
|
|
||||||
import os.path
|
|
||||||
|
|
||||||
from cached_property import threading
|
|
||||||
from fixtures.neon_fixtures import NeonEnv
|
|
||||||
from fixtures.log_helper import log
|
|
||||||
|
|
||||||
|
|
||||||
def lsof_path() -> str:
|
|
||||||
path_output = shutil.which("lsof")
|
|
||||||
if path_output is None:
|
|
||||||
raise RuntimeError('lsof not found in PATH')
|
|
||||||
else:
|
|
||||||
return path_output
|
|
||||||
|
|
||||||
|
|
||||||
# Makes sure that `pageserver.pid` is only held by `pageserve` command, not other commands.
|
|
||||||
# This is to test the changes in https://github.com/neondatabase/neon/pull/1834.
|
|
||||||
def test_lsof_pageserver_pid(neon_simple_env: NeonEnv):
|
|
||||||
env = neon_simple_env
|
|
||||||
|
|
||||||
def start_workload():
|
|
||||||
env.neon_cli.create_branch("test_lsof_pageserver_pid")
|
|
||||||
pg = env.postgres.create_start("test_lsof_pageserver_pid")
|
|
||||||
with closing(pg.connect()) as conn:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute("CREATE TABLE foo as SELECT x FROM generate_series(1,100000) x")
|
|
||||||
cur.execute("update foo set x=x+1")
|
|
||||||
|
|
||||||
workload_thread = threading.Thread(target=start_workload, args=(), daemon=True)
|
|
||||||
workload_thread.start()
|
|
||||||
|
|
||||||
path = os.path.join(env.repo_dir, "pageserver.pid")
|
|
||||||
lsof = lsof_path()
|
|
||||||
while workload_thread.is_alive():
|
|
||||||
res = subprocess.run([lsof, path],
|
|
||||||
check=False,
|
|
||||||
universal_newlines=True,
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE)
|
|
||||||
|
|
||||||
# parse the `lsof` command's output to get only the list of commands
|
|
||||||
commands = [line.split(' ')[0] for line in res.stdout.strip().split('\n')[1:]]
|
|
||||||
if len(commands) > 0:
|
|
||||||
log.info(f"lsof commands: {commands}")
|
|
||||||
assert commands == ['pageserve']
|
|
||||||
|
|
||||||
time.sleep(1.0)
|
|
||||||
167
test_runner/batch_others/test_complete_basebackup.py
Normal file
167
test_runner/batch_others/test_complete_basebackup.py
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
from fixtures.neon_fixtures import VanillaPostgres
|
||||||
|
from fixtures.utils import subprocess_capture
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
|
def get_rel_paths(log_dir, pg_bin, base_tar):
|
||||||
|
"""Yeild list of relation paths"""
|
||||||
|
with tempfile.TemporaryDirectory() as restored_dir:
|
||||||
|
# Unpack the base tar
|
||||||
|
subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
|
||||||
|
|
||||||
|
port = "55439" # Probably free
|
||||||
|
with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg:
|
||||||
|
vanilla_pg.configure([f"port={port}"])
|
||||||
|
vanilla_pg.start()
|
||||||
|
|
||||||
|
# Create database based on template0 because we can't connect to template0
|
||||||
|
query = "create database template0copy template template0"
|
||||||
|
vanilla_pg.safe_psql(query, user="cloud_admin")
|
||||||
|
vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin")
|
||||||
|
|
||||||
|
# Get all databases
|
||||||
|
query = "select oid, datname from pg_database"
|
||||||
|
oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin")
|
||||||
|
template0_oid = [
|
||||||
|
oid
|
||||||
|
for (oid, database) in oid_dbname_pairs
|
||||||
|
if database == "template0"
|
||||||
|
][0]
|
||||||
|
|
||||||
|
# Get rel paths for each database
|
||||||
|
for oid, database in oid_dbname_pairs:
|
||||||
|
if database == "template0":
|
||||||
|
# We can't connect to template0
|
||||||
|
continue
|
||||||
|
|
||||||
|
query = "select relname, pg_relation_filepath(oid) from pg_class"
|
||||||
|
result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database)
|
||||||
|
for relname, filepath in result:
|
||||||
|
if filepath is not None:
|
||||||
|
|
||||||
|
if database == "template0copy":
|
||||||
|
# Add all template0copy paths to template0
|
||||||
|
prefix = f"base/{oid}/"
|
||||||
|
if filepath.startswith(prefix):
|
||||||
|
suffix = filepath[len(prefix):]
|
||||||
|
yield f"base/{template0_oid}/{suffix}"
|
||||||
|
elif filepath.startswith("global"):
|
||||||
|
print(f"skipping {database} global file {filepath}")
|
||||||
|
else:
|
||||||
|
raise AssertionError
|
||||||
|
else:
|
||||||
|
yield filepath
|
||||||
|
|
||||||
|
|
||||||
|
def pack_base(log_dir, restored_dir, output_tar):
|
||||||
|
tmp_tar_name = "tmp.tar"
|
||||||
|
tmp_tar_path = os.path.join(restored_dir, tmp_tar_name)
|
||||||
|
cmd = ["tar", "-cf", tmp_tar_name] + os.listdir(restored_dir)
|
||||||
|
subprocess_capture(log_dir, cmd, cwd=restored_dir)
|
||||||
|
shutil.move(tmp_tar_path, output_tar)
|
||||||
|
|
||||||
|
|
||||||
|
def get_files_in_tar(log_dir, tar):
|
||||||
|
with tempfile.TemporaryDirectory() as restored_dir:
|
||||||
|
# Unpack the base tar
|
||||||
|
subprocess_capture(log_dir, ["tar", "-xf", tar, "-C", restored_dir])
|
||||||
|
|
||||||
|
# Find empty files
|
||||||
|
empty_files = []
|
||||||
|
for root, dirs, files in os.walk(restored_dir):
|
||||||
|
for name in files:
|
||||||
|
file_path = os.path.join(root, name)
|
||||||
|
yield file_path[len(restored_dir) + 1:]
|
||||||
|
|
||||||
|
|
||||||
|
def corrupt(log_dir, base_tar, output_tar):
|
||||||
|
"""Remove all empty files and repackage. Return paths of files removed."""
|
||||||
|
with tempfile.TemporaryDirectory() as restored_dir:
|
||||||
|
# Unpack the base tar
|
||||||
|
subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
|
||||||
|
|
||||||
|
# Find empty files
|
||||||
|
empty_files = []
|
||||||
|
for root, dirs, files in os.walk(restored_dir):
|
||||||
|
for name in files:
|
||||||
|
file_path = os.path.join(root, name)
|
||||||
|
file_size = os.path.getsize(file_path)
|
||||||
|
if file_size == 0:
|
||||||
|
empty_files.append(file_path)
|
||||||
|
|
||||||
|
# Delete empty files (just to see if they get recreated)
|
||||||
|
for empty_file in empty_files:
|
||||||
|
os.remove(empty_file)
|
||||||
|
|
||||||
|
# Repackage
|
||||||
|
pack_base(log_dir, restored_dir, output_tar)
|
||||||
|
|
||||||
|
# Return relative paths
|
||||||
|
return {
|
||||||
|
empty_file[len(restored_dir) + 1:]
|
||||||
|
for empty_file in empty_files
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths):
|
||||||
|
with tempfile.TemporaryDirectory() as restored_dir:
|
||||||
|
# Unpack the base tar
|
||||||
|
subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir])
|
||||||
|
|
||||||
|
# Touch files that don't exist
|
||||||
|
for path in paths:
|
||||||
|
absolute_path = os.path.join(restored_dir, path)
|
||||||
|
exists = os.path.exists(absolute_path)
|
||||||
|
if not exists:
|
||||||
|
print("File {absolute_path} didn't exist. Creating..")
|
||||||
|
Path(absolute_path).touch()
|
||||||
|
|
||||||
|
# Repackage
|
||||||
|
pack_base(log_dir, restored_dir, output_tar)
|
||||||
|
|
||||||
|
|
||||||
|
def test_complete(test_output_dir, pg_bin):
|
||||||
|
# Specify directories
|
||||||
|
# TODO make a basebackup instead of using one from another test
|
||||||
|
work_dir = "/home/bojan/src/neondatabase/neon/test_output/test_import_from_pageserver/"
|
||||||
|
base_tar = os.path.join(work_dir, "psql_2.stdout")
|
||||||
|
output_tar = os.path.join(work_dir, "psql_2-completed.stdout")
|
||||||
|
|
||||||
|
# Create new base tar with missing empty files
|
||||||
|
corrupt_tar = os.path.join(test_output_dir, "psql_2-corrupted.stdout")
|
||||||
|
deleted_files = corrupt(test_output_dir, base_tar, corrupt_tar)
|
||||||
|
assert len(set(get_files_in_tar(test_output_dir, base_tar)) -
|
||||||
|
set(get_files_in_tar(test_output_dir, corrupt_tar))) > 0
|
||||||
|
|
||||||
|
# Reconstruct paths from the corrupted tar, assert it covers everything important
|
||||||
|
reconstructed_paths = set(get_rel_paths(test_output_dir, pg_bin, corrupt_tar))
|
||||||
|
paths_missed = deleted_files - reconstructed_paths
|
||||||
|
assert paths_missed.issubset({
|
||||||
|
"postgresql.auto.conf",
|
||||||
|
"pg_ident.conf",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Recreate the correct tar by touching files, compare with original tar
|
||||||
|
touch_missing_rels(test_output_dir, corrupt_tar, output_tar, reconstructed_paths)
|
||||||
|
paths_missed = (set(get_files_in_tar(test_output_dir, base_tar)) -
|
||||||
|
set(get_files_in_tar(test_output_dir, output_tar)))
|
||||||
|
assert paths_missed.issubset({
|
||||||
|
"postgresql.auto.conf",
|
||||||
|
"pg_ident.conf",
|
||||||
|
})
|
||||||
|
|
||||||
|
# HACK this script relies on test fixtures, but you can run it with
|
||||||
|
# poetry run pytest -k test_main_hack and pass inputs via envvars
|
||||||
|
#
|
||||||
|
# The script takes a base tar, infers what empty rel files might be missing
|
||||||
|
# and creates a new base tar with those files included. It does not modify
|
||||||
|
# the original file.
|
||||||
|
def test_main_hack(test_output_dir, pg_bin, pytestconfig):
|
||||||
|
base_tar = os.environ['INPUT_BASE_TAR']
|
||||||
|
output_tar = os.environ['OUTPUT_BASE_TAR']
|
||||||
|
|
||||||
|
reconstructed_paths = set(get_rel_paths(test_output_dir, pg_bin, base_tar))
|
||||||
|
touch_missing_rels(test_output_dir, base_tar, output_tar, reconstructed_paths)
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
from fixtures.neon_fixtures import NeonEnvBuilder, WalCraft
|
|
||||||
from fixtures.log_helper import log
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
# Restart nodes with WAL end having specially crafted shape, like last record
|
|
||||||
# crossing segment boundary, to test decoding issues.
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('wal_type',
|
|
||||||
[
|
|
||||||
'simple',
|
|
||||||
'last_wal_record_xlog_switch',
|
|
||||||
'last_wal_record_xlog_switch_ends_on_page_boundary',
|
|
||||||
'last_wal_record_crossing_segment',
|
|
||||||
'wal_record_crossing_segment_followed_by_small_one',
|
|
||||||
])
|
|
||||||
def test_crafted_wal_end(neon_env_builder: NeonEnvBuilder, wal_type: str):
|
|
||||||
neon_env_builder.num_safekeepers = 1
|
|
||||||
env = neon_env_builder.init_start()
|
|
||||||
env.neon_cli.create_branch('test_crafted_wal_end')
|
|
||||||
|
|
||||||
pg = env.postgres.create('test_crafted_wal_end')
|
|
||||||
wal_craft = WalCraft(env)
|
|
||||||
pg.config(wal_craft.postgres_config())
|
|
||||||
pg.start()
|
|
||||||
res = pg.safe_psql_many(queries=[
|
|
||||||
'CREATE TABLE keys(key int primary key)',
|
|
||||||
'INSERT INTO keys SELECT generate_series(1, 100)',
|
|
||||||
'SELECT SUM(key) FROM keys'
|
|
||||||
])
|
|
||||||
assert res[-1][0] == (5050, )
|
|
||||||
|
|
||||||
wal_craft.in_existing(wal_type, pg.connstr())
|
|
||||||
|
|
||||||
log.info("Restarting all safekeepers and pageservers")
|
|
||||||
env.pageserver.stop()
|
|
||||||
env.safekeepers[0].stop()
|
|
||||||
env.safekeepers[0].start()
|
|
||||||
env.pageserver.start()
|
|
||||||
|
|
||||||
log.info("Trying more queries")
|
|
||||||
res = pg.safe_psql_many(queries=[
|
|
||||||
'SELECT SUM(key) FROM keys',
|
|
||||||
'INSERT INTO keys SELECT generate_series(101, 200)',
|
|
||||||
'SELECT SUM(key) FROM keys',
|
|
||||||
])
|
|
||||||
assert res[0][0] == (5050, )
|
|
||||||
assert res[-1][0] == (20100, )
|
|
||||||
|
|
||||||
log.info("Restarting all safekeepers and pageservers (again)")
|
|
||||||
env.pageserver.stop()
|
|
||||||
env.safekeepers[0].stop()
|
|
||||||
env.safekeepers[0].start()
|
|
||||||
env.pageserver.start()
|
|
||||||
|
|
||||||
log.info("Trying more queries (again)")
|
|
||||||
res = pg.safe_psql_many(queries=[
|
|
||||||
'SELECT SUM(key) FROM keys',
|
|
||||||
'INSERT INTO keys SELECT generate_series(201, 300)',
|
|
||||||
'SELECT SUM(key) FROM keys',
|
|
||||||
])
|
|
||||||
assert res[0][0] == (20100, )
|
|
||||||
assert res[-1][0] == (45150, )
|
|
||||||
@@ -90,7 +90,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
|
|||||||
# Clean up
|
# Clean up
|
||||||
# TODO it should clean itself
|
# TODO it should clean itself
|
||||||
client = env.pageserver.http_client()
|
client = env.pageserver.http_client()
|
||||||
client.timeline_delete(tenant, timeline)
|
client.timeline_detach(tenant, timeline)
|
||||||
|
|
||||||
# Importing correct backup works
|
# Importing correct backup works
|
||||||
import_tar(base_tar, wal_tar)
|
import_tar(base_tar, wal_tar)
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ def check_tenant(env: NeonEnv, pageserver_http: NeonPageserverHttpClient):
|
|||||||
assert res_2[0] == (5000050000, )
|
assert res_2[0] == (5000050000, )
|
||||||
|
|
||||||
pg.stop()
|
pg.stop()
|
||||||
pageserver_http.tenant_detach(tenant_id)
|
pageserver_http.timeline_detach(tenant_id, timeline_id)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('num_timelines,num_safekeepers', [(3, 1)])
|
@pytest.mark.parametrize('num_timelines,num_safekeepers', [(3, 1)])
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from contextlib import closing
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import time
|
import time
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from fixtures.neon_fixtures import NeonEnvBuilder, assert_timeline_local, wait_until, wait_for_last_record_lsn, wait_for_upload
|
from fixtures.neon_fixtures import NeonEnvBuilder, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload
|
||||||
from fixtures.log_helper import log
|
from fixtures.log_helper import log
|
||||||
from fixtures.utils import lsn_from_hex, lsn_to_hex
|
from fixtures.utils import lsn_from_hex, lsn_to_hex
|
||||||
import pytest
|
import pytest
|
||||||
@@ -91,14 +91,14 @@ def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, sto
|
|||||||
# Introduce failpoint in download
|
# Introduce failpoint in download
|
||||||
env.pageserver.safe_psql(f"failpoints remote-storage-download-pre-rename=return")
|
env.pageserver.safe_psql(f"failpoints remote-storage-download-pre-rename=return")
|
||||||
|
|
||||||
client.tenant_attach(UUID(tenant_id))
|
client.timeline_attach(UUID(tenant_id), UUID(timeline_id))
|
||||||
|
|
||||||
# is there a better way to assert that failpoint triggered?
|
# is there a better way to assert that fafilpoint triggered?
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
# assert cannot attach timeline that is scheduled for download
|
# assert cannot attach timeline that is scheduled for download
|
||||||
with pytest.raises(Exception, match="Conflict: Tenant download is already in progress"):
|
with pytest.raises(Exception, match="Timeline download is already in progress"):
|
||||||
client.tenant_attach(UUID(tenant_id))
|
client.timeline_attach(UUID(tenant_id), UUID(timeline_id))
|
||||||
|
|
||||||
detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
|
detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
|
||||||
log.info("Timeline detail with active failpoint: %s", detail)
|
log.info("Timeline detail with active failpoint: %s", detail)
|
||||||
@@ -109,12 +109,12 @@ def test_remote_storage_backup_and_restore(neon_env_builder: NeonEnvBuilder, sto
|
|||||||
env.pageserver.stop()
|
env.pageserver.stop()
|
||||||
env.pageserver.start()
|
env.pageserver.start()
|
||||||
|
|
||||||
client.tenant_attach(UUID(tenant_id))
|
client.timeline_attach(UUID(tenant_id), UUID(timeline_id))
|
||||||
|
|
||||||
log.info("waiting for timeline redownload")
|
log.info("waiting for timeline redownload")
|
||||||
wait_until(number_of_iterations=10,
|
wait_until(number_of_iterations=10,
|
||||||
interval=1,
|
interval=1,
|
||||||
func=lambda: assert_timeline_local(client, UUID(tenant_id), UUID(timeline_id)))
|
func=lambda: assert_local(client, UUID(tenant_id), UUID(timeline_id)))
|
||||||
|
|
||||||
detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
|
detail = client.timeline_detail(UUID(tenant_id), UUID(timeline_id))
|
||||||
assert detail['local'] is not None
|
assert detail['local'] is not None
|
||||||
|
|||||||
74
test_runner/batch_others/test_restart_compute.py
Normal file
74
test_runner/batch_others/test_restart_compute.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from contextlib import closing
|
||||||
|
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||||
|
from fixtures.log_helper import log
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Test restarting and recreating a postgres instance
|
||||||
|
#
|
||||||
|
@pytest.mark.parametrize('with_safekeepers', [False, True])
|
||||||
|
def test_restart_compute(neon_env_builder: NeonEnvBuilder, with_safekeepers: bool):
|
||||||
|
neon_env_builder.auth_enabled = True
|
||||||
|
if with_safekeepers:
|
||||||
|
neon_env_builder.num_safekeepers = 3
|
||||||
|
env = neon_env_builder.init_start()
|
||||||
|
|
||||||
|
env.neon_cli.create_branch('test_restart_compute')
|
||||||
|
pg = env.postgres.create_start('test_restart_compute')
|
||||||
|
log.info("postgres is running on 'test_restart_compute' branch")
|
||||||
|
|
||||||
|
with closing(pg.connect()) as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute('CREATE TABLE t(key int primary key, value text)')
|
||||||
|
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||||
|
cur.execute('SELECT sum(key) FROM t')
|
||||||
|
r = cur.fetchone()
|
||||||
|
assert r == (5000050000, )
|
||||||
|
log.info(f"res = {r}")
|
||||||
|
|
||||||
|
# Remove data directory and restart
|
||||||
|
pg.stop_and_destroy().create_start('test_restart_compute')
|
||||||
|
|
||||||
|
with closing(pg.connect()) as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
# We can still see the row
|
||||||
|
cur.execute('SELECT sum(key) FROM t')
|
||||||
|
r = cur.fetchone()
|
||||||
|
assert r == (5000050000, )
|
||||||
|
log.info(f"res = {r}")
|
||||||
|
|
||||||
|
# Insert another row
|
||||||
|
cur.execute("INSERT INTO t VALUES (100001, 'payload2')")
|
||||||
|
cur.execute('SELECT count(*) FROM t')
|
||||||
|
|
||||||
|
r = cur.fetchone()
|
||||||
|
assert r == (100001, )
|
||||||
|
log.info(f"res = {r}")
|
||||||
|
|
||||||
|
# Again remove data directory and restart
|
||||||
|
pg.stop_and_destroy().create_start('test_restart_compute')
|
||||||
|
|
||||||
|
# That select causes lots of FPI's and increases probability of wakeepers
|
||||||
|
# lagging behind after query completion
|
||||||
|
with closing(pg.connect()) as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
# We can still see the rows
|
||||||
|
cur.execute('SELECT count(*) FROM t')
|
||||||
|
|
||||||
|
r = cur.fetchone()
|
||||||
|
assert r == (100001, )
|
||||||
|
log.info(f"res = {r}")
|
||||||
|
|
||||||
|
# And again remove data directory and restart
|
||||||
|
pg.stop_and_destroy().create_start('test_restart_compute')
|
||||||
|
|
||||||
|
with closing(pg.connect()) as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
# We can still see the rows
|
||||||
|
cur.execute('SELECT count(*) FROM t')
|
||||||
|
|
||||||
|
r = cur.fetchone()
|
||||||
|
assert r == (100001, )
|
||||||
|
log.info(f"res = {r}")
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
from threading import Thread
|
|
||||||
from uuid import uuid4
|
|
||||||
import psycopg2
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from fixtures.log_helper import log
|
|
||||||
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserverApiException
|
|
||||||
|
|
||||||
|
|
||||||
def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
|
||||||
env = neon_env_builder.init_start()
|
|
||||||
pageserver_http = env.pageserver.http_client()
|
|
||||||
|
|
||||||
# first check for non existing tenant
|
|
||||||
tenant_id = uuid4()
|
|
||||||
with pytest.raises(expected_exception=NeonPageserverApiException,
|
|
||||||
match=f'Tenant not found for id {tenant_id.hex}'):
|
|
||||||
pageserver_http.tenant_detach(tenant_id)
|
|
||||||
|
|
||||||
# create new nenant
|
|
||||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
|
||||||
|
|
||||||
# assert tenant exists on disk
|
|
||||||
assert (env.repo_dir / "tenants" / tenant_id.hex).exists()
|
|
||||||
|
|
||||||
pg = env.postgres.create_start('main', tenant_id=tenant_id)
|
|
||||||
# we rely upon autocommit after each statement
|
|
||||||
pg.safe_psql_many(queries=[
|
|
||||||
'CREATE TABLE t(key int primary key, value text)',
|
|
||||||
'INSERT INTO t SELECT generate_series(1,100000), \'payload\'',
|
|
||||||
])
|
|
||||||
|
|
||||||
# gc should not try to even start
|
|
||||||
with pytest.raises(expected_exception=psycopg2.DatabaseError,
|
|
||||||
match='gc target timeline does not exist'):
|
|
||||||
env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {uuid4().hex} 0')
|
|
||||||
|
|
||||||
# try to concurrently run gc and detach
|
|
||||||
gc_thread = Thread(
|
|
||||||
target=lambda: env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0'), )
|
|
||||||
gc_thread.start()
|
|
||||||
|
|
||||||
last_error = None
|
|
||||||
for i in range(3):
|
|
||||||
try:
|
|
||||||
pageserver_http.tenant_detach(tenant_id)
|
|
||||||
except Exception as e:
|
|
||||||
last_error = e
|
|
||||||
log.error(f"try {i} error detaching tenant: {e}")
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
# else is called if the loop finished without reaching "break"
|
|
||||||
else:
|
|
||||||
pytest.fail(f"could not detach timeline: {last_error}")
|
|
||||||
|
|
||||||
gc_thread.join(timeout=10)
|
|
||||||
|
|
||||||
# check that nothing is left on disk for deleted tenant
|
|
||||||
assert not (env.repo_dir / "tenants" / tenant_id.hex).exists()
|
|
||||||
|
|
||||||
with pytest.raises(expected_exception=psycopg2.DatabaseError,
|
|
||||||
match=f'Tenant {tenant_id.hex} not found'):
|
|
||||||
env.pageserver.safe_psql(f'do_gc {tenant_id.hex} {timeline_id.hex} 0')
|
|
||||||
@@ -1,31 +1,16 @@
|
|||||||
|
from contextlib import closing, contextmanager
|
||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
import signal
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import threading
|
import threading
|
||||||
from contextlib import closing, contextmanager
|
import typing
|
||||||
from typing import Any, Dict, Optional, Tuple
|
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
import pytest
|
|
||||||
from fixtures.log_helper import log
|
from fixtures.log_helper import log
|
||||||
from fixtures.neon_fixtures import (
|
from typing import Optional
|
||||||
Etcd,
|
import signal
|
||||||
NeonEnv,
|
import pytest
|
||||||
NeonEnvBuilder,
|
|
||||||
NeonPageserverHttpClient,
|
from fixtures.neon_fixtures import PgProtocol, PortDistributor, Postgres, NeonEnvBuilder, Etcd, NeonPageserverHttpClient, assert_local, wait_until, wait_for_last_record_lsn, wait_for_upload, neon_binpath, pg_distrib_dir
|
||||||
PageserverPort,
|
|
||||||
PortDistributor,
|
|
||||||
Postgres,
|
|
||||||
assert_no_in_progress_downloads_for_tenant,
|
|
||||||
assert_timeline_local,
|
|
||||||
base_dir,
|
|
||||||
neon_binpath,
|
|
||||||
pg_distrib_dir,
|
|
||||||
wait_for_last_record_lsn,
|
|
||||||
wait_for_upload,
|
|
||||||
wait_until,
|
|
||||||
)
|
|
||||||
from fixtures.utils import lsn_from_hex, subprocess_capture
|
from fixtures.utils import lsn_from_hex, subprocess_capture
|
||||||
|
|
||||||
|
|
||||||
@@ -116,126 +101,9 @@ def load(pg: Postgres, stop_event: threading.Event, load_ok_event: threading.Eve
|
|||||||
log.info('load thread stopped')
|
log.info('load thread stopped')
|
||||||
|
|
||||||
|
|
||||||
def populate_branch(
|
|
||||||
pg: Postgres,
|
|
||||||
tenant_id: UUID,
|
|
||||||
ps_http: NeonPageserverHttpClient,
|
|
||||||
create_table: bool,
|
|
||||||
expected_sum: Optional[int],
|
|
||||||
) -> Tuple[UUID, int]:
|
|
||||||
# insert some data
|
|
||||||
with pg_cur(pg) as cur:
|
|
||||||
cur.execute("SHOW neon.timeline_id")
|
|
||||||
timeline_id = UUID(cur.fetchone()[0])
|
|
||||||
log.info("timeline to relocate %s", timeline_id.hex)
|
|
||||||
|
|
||||||
cur.execute("SELECT pg_current_wal_flush_lsn()")
|
|
||||||
log.info("pg_current_wal_flush_lsn() %s", lsn_from_hex(cur.fetchone()[0]))
|
|
||||||
log.info("timeline detail %s",
|
|
||||||
ps_http.timeline_detail(tenant_id=tenant_id, timeline_id=timeline_id))
|
|
||||||
|
|
||||||
# we rely upon autocommit after each statement
|
|
||||||
# as waiting for acceptors happens there
|
|
||||||
if create_table:
|
|
||||||
cur.execute("CREATE TABLE t(key int, value text)")
|
|
||||||
cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'some payload'")
|
|
||||||
if expected_sum is not None:
|
|
||||||
cur.execute("SELECT sum(key) FROM t")
|
|
||||||
assert cur.fetchone() == (expected_sum, )
|
|
||||||
cur.execute("SELECT pg_current_wal_flush_lsn()")
|
|
||||||
|
|
||||||
current_lsn = lsn_from_hex(cur.fetchone()[0])
|
|
||||||
return timeline_id, current_lsn
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_checkpoint(
|
|
||||||
pageserver_cur,
|
|
||||||
pageserver_http: NeonPageserverHttpClient,
|
|
||||||
tenant_id: UUID,
|
|
||||||
timeline_id: UUID,
|
|
||||||
current_lsn: int,
|
|
||||||
):
|
|
||||||
# run checkpoint manually to be sure that data landed in remote storage
|
|
||||||
pageserver_cur.execute(f"checkpoint {tenant_id.hex} {timeline_id.hex}")
|
|
||||||
|
|
||||||
# wait until pageserver successfully uploaded a checkpoint to remote storage
|
|
||||||
wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn)
|
|
||||||
|
|
||||||
|
|
||||||
def check_timeline_attached(
|
|
||||||
new_pageserver_http_client: NeonPageserverHttpClient,
|
|
||||||
tenant_id: UUID,
|
|
||||||
timeline_id: UUID,
|
|
||||||
old_timeline_detail: Dict[str, Any],
|
|
||||||
old_current_lsn: int,
|
|
||||||
):
|
|
||||||
# new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
|
|
||||||
new_timeline_detail = assert_timeline_local(new_pageserver_http_client, tenant_id, timeline_id)
|
|
||||||
|
|
||||||
# when load is active these checks can break because lsns are not static
|
|
||||||
# so lets check with some margin
|
|
||||||
assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
|
|
||||||
lsn_from_hex(old_timeline_detail['local']['disk_consistent_lsn']),
|
|
||||||
0.03)
|
|
||||||
|
|
||||||
assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
|
|
||||||
old_current_lsn,
|
|
||||||
0.03)
|
|
||||||
|
|
||||||
|
|
||||||
def switch_pg_to_new_pageserver(env: NeonEnv,
|
|
||||||
pg: Postgres,
|
|
||||||
new_pageserver_port: int,
|
|
||||||
tenant_id: UUID,
|
|
||||||
timeline_id: UUID) -> pathlib.Path:
|
|
||||||
pg.stop()
|
|
||||||
|
|
||||||
pg_config_file_path = pathlib.Path(pg.config_file_path())
|
|
||||||
pg_config_file_path.open('a').write(
|
|
||||||
f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_port}'")
|
|
||||||
|
|
||||||
pg.start()
|
|
||||||
|
|
||||||
timeline_to_detach_local_path = env.repo_dir / 'tenants' / tenant_id.hex / 'timelines' / timeline_id.hex
|
|
||||||
files_before_detach = os.listdir(timeline_to_detach_local_path)
|
|
||||||
assert 'metadata' in files_before_detach, f'Regular timeline {timeline_to_detach_local_path} should have the metadata file,\
|
|
||||||
but got: {files_before_detach}'
|
|
||||||
assert len(files_before_detach) >= 2, f'Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\
|
|
||||||
but got {files_before_detach}'
|
|
||||||
|
|
||||||
return timeline_to_detach_local_path
|
|
||||||
|
|
||||||
|
|
||||||
def post_migration_check(pg: Postgres, sum_before_migration: int, old_local_path: pathlib.Path):
|
|
||||||
with pg_cur(pg) as cur:
|
|
||||||
# check that data is still there
|
|
||||||
cur.execute("SELECT sum(key) FROM t")
|
|
||||||
assert cur.fetchone() == (sum_before_migration, )
|
|
||||||
# check that we can write new data
|
|
||||||
cur.execute("INSERT INTO t SELECT generate_series(1001,2000), 'some payload'")
|
|
||||||
cur.execute("SELECT sum(key) FROM t")
|
|
||||||
assert cur.fetchone() == (sum_before_migration + 1500500, )
|
|
||||||
|
|
||||||
assert not os.path.exists(old_local_path), f'After detach, local timeline dir {old_local_path} should be removed'
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
'method',
|
|
||||||
[
|
|
||||||
# A minor migration involves no storage breaking changes.
|
|
||||||
# It is done by attaching the tenant to a new pageserver.
|
|
||||||
'minor',
|
|
||||||
# A major migration involves exporting a postgres datadir
|
|
||||||
# basebackup and importing it into the new pageserver.
|
|
||||||
# This kind of migration can tolerate breaking changes
|
|
||||||
# to storage format
|
|
||||||
pytest.param('major', marks=pytest.mark.xfail(reason="Not implemented")),
|
|
||||||
])
|
|
||||||
@pytest.mark.parametrize('with_load', ['with_load', 'without_load'])
|
@pytest.mark.parametrize('with_load', ['with_load', 'without_load'])
|
||||||
def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
|
def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
|
||||||
port_distributor: PortDistributor,
|
port_distributor: PortDistributor,
|
||||||
test_output_dir,
|
|
||||||
method: str,
|
|
||||||
with_load: str):
|
with_load: str):
|
||||||
neon_env_builder.enable_local_fs_remote_storage()
|
neon_env_builder.enable_local_fs_remote_storage()
|
||||||
|
|
||||||
@@ -244,83 +112,58 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
|
|||||||
# create folder for remote storage mock
|
# create folder for remote storage mock
|
||||||
remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'
|
remote_storage_mock_path = env.repo_dir / 'local_fs_remote_storage'
|
||||||
|
|
||||||
# we use two branches to check that they are both relocated
|
tenant, _ = env.neon_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
|
||||||
# first branch is used for load, compute for second one is used to
|
log.info("tenant to relocate %s", tenant)
|
||||||
# check that data is not lost
|
|
||||||
|
# attach does not download ancestor branches (should it?), just use root branch for now
|
||||||
|
env.neon_cli.create_root_branch('test_tenant_relocation', tenant_id=tenant)
|
||||||
|
|
||||||
|
tenant_pg = env.postgres.create_start(branch_name='test_tenant_relocation',
|
||||||
|
node_name='test_tenant_relocation',
|
||||||
|
tenant_id=tenant)
|
||||||
|
|
||||||
|
# insert some data
|
||||||
|
with closing(tenant_pg.connect()) as conn:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
# save timeline for later gc call
|
||||||
|
cur.execute("SHOW neon.timeline_id")
|
||||||
|
timeline = UUID(cur.fetchone()[0])
|
||||||
|
log.info("timeline to relocate %s", timeline.hex)
|
||||||
|
|
||||||
|
# we rely upon autocommit after each statement
|
||||||
|
# as waiting for acceptors happens there
|
||||||
|
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||||
|
cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'some payload'")
|
||||||
|
cur.execute("SELECT sum(key) FROM t")
|
||||||
|
assert cur.fetchone() == (500500, )
|
||||||
|
cur.execute("SELECT pg_current_wal_flush_lsn()")
|
||||||
|
|
||||||
|
current_lsn = lsn_from_hex(cur.fetchone()[0])
|
||||||
|
|
||||||
pageserver_http = env.pageserver.http_client()
|
pageserver_http = env.pageserver.http_client()
|
||||||
|
|
||||||
tenant_id, initial_timeline_id = env.neon_cli.create_tenant(UUID("74ee8b079a0e437eb0afea7d26a07209"))
|
|
||||||
log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, initial_timeline_id)
|
|
||||||
|
|
||||||
env.neon_cli.create_branch("test_tenant_relocation_main", tenant_id=tenant_id)
|
|
||||||
pg_main = env.postgres.create_start(branch_name='test_tenant_relocation_main',
|
|
||||||
tenant_id=tenant_id)
|
|
||||||
|
|
||||||
timeline_id_main, current_lsn_main = populate_branch(
|
|
||||||
pg_main,
|
|
||||||
tenant_id=tenant_id,
|
|
||||||
ps_http=pageserver_http,
|
|
||||||
create_table=True,
|
|
||||||
expected_sum=500500,
|
|
||||||
)
|
|
||||||
|
|
||||||
env.neon_cli.create_branch(
|
|
||||||
new_branch_name="test_tenant_relocation_second",
|
|
||||||
ancestor_branch_name="test_tenant_relocation_main",
|
|
||||||
tenant_id=tenant_id,
|
|
||||||
)
|
|
||||||
pg_second = env.postgres.create_start(branch_name='test_tenant_relocation_second',
|
|
||||||
tenant_id=tenant_id)
|
|
||||||
|
|
||||||
timeline_id_second, current_lsn_second = populate_branch(
|
|
||||||
pg_second,
|
|
||||||
tenant_id=tenant_id,
|
|
||||||
ps_http=pageserver_http,
|
|
||||||
create_table=False,
|
|
||||||
expected_sum=1001000,
|
|
||||||
)
|
|
||||||
|
|
||||||
# wait until pageserver receives that data
|
# wait until pageserver receives that data
|
||||||
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_main, current_lsn_main)
|
wait_for_last_record_lsn(pageserver_http, tenant, timeline, current_lsn)
|
||||||
timeline_detail_main = assert_timeline_local(pageserver_http, tenant_id, timeline_id_main)
|
timeline_detail = assert_local(pageserver_http, tenant, timeline)
|
||||||
|
|
||||||
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_second, current_lsn_second)
|
|
||||||
timeline_detail_second = assert_timeline_local(pageserver_http, tenant_id, timeline_id_second)
|
|
||||||
|
|
||||||
if with_load == 'with_load':
|
if with_load == 'with_load':
|
||||||
# create load table
|
# create load table
|
||||||
with pg_cur(pg_main) as cur:
|
with pg_cur(tenant_pg) as cur:
|
||||||
cur.execute("CREATE TABLE load(value text)")
|
cur.execute("CREATE TABLE load(value text)")
|
||||||
|
|
||||||
load_stop_event = threading.Event()
|
load_stop_event = threading.Event()
|
||||||
load_ok_event = threading.Event()
|
load_ok_event = threading.Event()
|
||||||
load_thread = threading.Thread(
|
load_thread = threading.Thread(target=load,
|
||||||
target=load,
|
args=(tenant_pg, load_stop_event, load_ok_event))
|
||||||
args=(pg_main, load_stop_event, load_ok_event),
|
|
||||||
daemon=True, # To make sure the child dies when the parent errors
|
|
||||||
)
|
|
||||||
load_thread.start()
|
load_thread.start()
|
||||||
|
|
||||||
# this requirement introduces a problem
|
# run checkpoint manually to be sure that data landed in remote storage
|
||||||
# if user creates a branch during migration
|
with closing(env.pageserver.connect()) as psconn:
|
||||||
# it wont appear on the new pageserver
|
with psconn.cursor() as pscur:
|
||||||
with pg_cur(env.pageserver) as cur:
|
pscur.execute(f"checkpoint {tenant.hex} {timeline.hex}")
|
||||||
ensure_checkpoint(
|
|
||||||
cur,
|
|
||||||
pageserver_http=pageserver_http,
|
|
||||||
tenant_id=tenant_id,
|
|
||||||
timeline_id=timeline_id_main,
|
|
||||||
current_lsn=current_lsn_main,
|
|
||||||
)
|
|
||||||
|
|
||||||
ensure_checkpoint(
|
# wait until pageserver successfully uploaded a checkpoint to remote storage
|
||||||
cur,
|
wait_for_upload(pageserver_http, tenant, timeline, current_lsn)
|
||||||
pageserver_http=pageserver_http,
|
|
||||||
tenant_id=tenant_id,
|
|
||||||
timeline_id=timeline_id_second,
|
|
||||||
current_lsn=current_lsn_second,
|
|
||||||
)
|
|
||||||
|
|
||||||
log.info("inititalizing new pageserver")
|
log.info("inititalizing new pageserver")
|
||||||
# bootstrap second pageserver
|
# bootstrap second pageserver
|
||||||
@@ -341,60 +184,40 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
|
|||||||
new_pageserver_http_port,
|
new_pageserver_http_port,
|
||||||
neon_env_builder.broker):
|
neon_env_builder.broker):
|
||||||
|
|
||||||
# Migrate either by attaching from s3 or import/export basebackup
|
# Migrate either by attacking from s3 or import/export basebackup
|
||||||
if method == "major":
|
relocation_method = "import"
|
||||||
|
if relocation_method == "import":
|
||||||
|
scripts_dir = "/home/bojan/src/neondatabase/neon/scripts/"
|
||||||
cmd = [
|
cmd = [
|
||||||
"python",
|
"python",
|
||||||
os.path.join(base_dir, "scripts/export_import_between_pageservers.py"),
|
os.path.join(scripts_dir, "export_import_betwen_pageservers.py"),
|
||||||
"--tenant-id",
|
"--tenant-id", tenant.hex,
|
||||||
tenant_id.hex,
|
"--from-host", "localhost",
|
||||||
"--from-host",
|
"--from-http-port", str(pageserver_http.port),
|
||||||
"localhost",
|
"--from-pg-port", str(env.pageserver.service_port.pg),
|
||||||
"--from-http-port",
|
"--to-host", "localhost",
|
||||||
str(pageserver_http.port),
|
"--to-http-port", str(new_pageserver_http_port),
|
||||||
"--from-pg-port",
|
"--to-pg-port", str(new_pageserver_pg_port),
|
||||||
str(env.pageserver.service_port.pg),
|
"--psql-path", os.path.join(pg_distrib_dir, "bin", "psql"),
|
||||||
"--to-host",
|
|
||||||
"localhost",
|
|
||||||
"--to-http-port",
|
|
||||||
str(new_pageserver_http_port),
|
|
||||||
"--to-pg-port",
|
|
||||||
str(new_pageserver_pg_port),
|
|
||||||
"--psql-path",
|
|
||||||
os.path.join(pg_distrib_dir, "bin", "psql"),
|
|
||||||
"--work-dir",
|
|
||||||
os.path.join(test_output_dir),
|
|
||||||
]
|
]
|
||||||
subprocess_capture(str(env.repo_dir), cmd, check=True)
|
subprocess_capture(env.repo_dir, cmd, check=True)
|
||||||
elif method == "minor":
|
elif relocation_method == "attach":
|
||||||
# call to attach timeline to new pageserver
|
# call to attach timeline to new pageserver
|
||||||
new_pageserver_http.tenant_attach(tenant_id)
|
new_pageserver_http.timeline_attach(tenant, timeline)
|
||||||
|
|
||||||
# check that it shows that download is in progress
|
# new pageserver should be in sync (modulo wal tail or vacuum activity) with the old one because there was no new writes since checkpoint
|
||||||
tenant_status = new_pageserver_http.tenant_status(tenant_id=tenant_id)
|
new_timeline_detail = wait_until(
|
||||||
assert tenant_status.get('has_in_progress_downloads'), tenant_status
|
number_of_iterations=5,
|
||||||
|
interval=1,
|
||||||
|
func=lambda: assert_local(new_pageserver_http, tenant, timeline))
|
||||||
|
|
||||||
# wait until tenant is downloaded
|
# when load is active these checks can break because lsns are not static
|
||||||
wait_until(number_of_iterations=10,
|
# so lets check with some margin
|
||||||
interval=1,
|
assert_abs_margin_ratio(lsn_from_hex(new_timeline_detail['local']['disk_consistent_lsn']),
|
||||||
func=lambda: assert_no_in_progress_downloads_for_tenant(
|
lsn_from_hex(timeline_detail['local']['disk_consistent_lsn']),
|
||||||
new_pageserver_http, tenant_id))
|
0.03)
|
||||||
|
|
||||||
check_timeline_attached(
|
tenant_pg.stop()
|
||||||
new_pageserver_http,
|
|
||||||
tenant_id,
|
|
||||||
timeline_id_main,
|
|
||||||
timeline_detail_main,
|
|
||||||
current_lsn_main,
|
|
||||||
)
|
|
||||||
|
|
||||||
check_timeline_attached(
|
|
||||||
new_pageserver_http,
|
|
||||||
tenant_id,
|
|
||||||
timeline_id_second,
|
|
||||||
timeline_detail_second,
|
|
||||||
current_lsn_second,
|
|
||||||
)
|
|
||||||
|
|
||||||
# rewrite neon cli config to use new pageserver for basebackup to start new compute
|
# rewrite neon cli config to use new pageserver for basebackup to start new compute
|
||||||
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
|
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
|
||||||
@@ -402,29 +225,33 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
|
|||||||
cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'"
|
cli_config_lines[-1] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'"
|
||||||
(env.repo_dir / 'config').write_text('\n'.join(cli_config_lines))
|
(env.repo_dir / 'config').write_text('\n'.join(cli_config_lines))
|
||||||
|
|
||||||
old_local_path_main = switch_pg_to_new_pageserver(
|
tenant_pg_config_file_path = pathlib.Path(tenant_pg.config_file_path())
|
||||||
env,
|
tenant_pg_config_file_path.open('a').write(
|
||||||
pg_main,
|
f"\nneon.pageserver_connstring = 'postgresql://no_user:@localhost:{new_pageserver_pg_port}'"
|
||||||
new_pageserver_pg_port,
|
|
||||||
tenant_id,
|
|
||||||
timeline_id_main,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
old_local_path_second = switch_pg_to_new_pageserver(
|
tenant_pg.start()
|
||||||
env,
|
|
||||||
pg_second,
|
timeline_to_detach_local_path = env.repo_dir / 'tenants' / tenant.hex / 'timelines' / timeline.hex
|
||||||
new_pageserver_pg_port,
|
files_before_detach = os.listdir(timeline_to_detach_local_path)
|
||||||
tenant_id,
|
assert 'metadata' in files_before_detach, f'Regular timeline {timeline_to_detach_local_path} should have the metadata file,\
|
||||||
timeline_id_second,
|
but got: {files_before_detach}'
|
||||||
)
|
assert len(files_before_detach) > 2, f'Regular timeline {timeline_to_detach_local_path} should have at least one layer file,\
|
||||||
|
but got {files_before_detach}'
|
||||||
|
|
||||||
# detach tenant from old pageserver before we check
|
# detach tenant from old pageserver before we check
|
||||||
# that all the data is there to be sure that old pageserver
|
# that all the data is there to be sure that old pageserver
|
||||||
# is no longer involved, and if it is, we will see the errors
|
# is no longer involved, and if it is, we will see the errors
|
||||||
pageserver_http.tenant_detach(tenant_id)
|
pageserver_http.timeline_detach(tenant, timeline)
|
||||||
|
|
||||||
post_migration_check(pg_main, 500500, old_local_path_main)
|
with pg_cur(tenant_pg) as cur:
|
||||||
post_migration_check(pg_second, 1001000, old_local_path_second)
|
# check that data is still there
|
||||||
|
cur.execute("SELECT sum(key) FROM t")
|
||||||
|
assert cur.fetchone() == (500500, )
|
||||||
|
# check that we can write new data
|
||||||
|
cur.execute("INSERT INTO t SELECT generate_series(1001,2000), 'some payload'")
|
||||||
|
cur.execute("SELECT sum(key) FROM t")
|
||||||
|
assert cur.fetchone() == (2001000, )
|
||||||
|
|
||||||
if with_load == 'with_load':
|
if with_load == 'with_load':
|
||||||
assert load_ok_event.wait(3)
|
assert load_ok_event.wait(3)
|
||||||
@@ -433,6 +260,8 @@ def test_tenant_relocation(neon_env_builder: NeonEnvBuilder,
|
|||||||
load_thread.join(timeout=10)
|
load_thread.join(timeout=10)
|
||||||
log.info('load thread stopped')
|
log.info('load thread stopped')
|
||||||
|
|
||||||
|
assert not os.path.exists(timeline_to_detach_local_path), f'After detach, local timeline dir {timeline_to_detach_local_path} should be removed'
|
||||||
|
|
||||||
# bring old pageserver back for clean shutdown via neon cli
|
# bring old pageserver back for clean shutdown via neon cli
|
||||||
# new pageserver will be shut down by the context manager
|
# new pageserver will be shut down by the context manager
|
||||||
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
|
cli_config_lines = (env.repo_dir / 'config').read_text().splitlines()
|
||||||
|
|||||||
@@ -35,10 +35,10 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
|
|||||||
value = line.lstrip(name).strip()
|
value = line.lstrip(name).strip()
|
||||||
return int(value)
|
return int(value)
|
||||||
|
|
||||||
def delete_all_timelines(tenant):
|
def detach_all_timelines(tenant):
|
||||||
timelines = [UUID(t["timeline_id"]) for t in client.timeline_list(tenant)]
|
timelines = [UUID(t["timeline_id"]) for t in client.timeline_list(tenant)]
|
||||||
for t in timelines:
|
for t in timelines:
|
||||||
client.timeline_delete(tenant, t)
|
client.timeline_detach(tenant, t)
|
||||||
|
|
||||||
def assert_idle(tenant):
|
def assert_idle(tenant):
|
||||||
assert get_state(tenant) == "Idle"
|
assert get_state(tenant) == "Idle"
|
||||||
@@ -56,7 +56,7 @@ def test_tenant_tasks(neon_env_builder: NeonEnvBuilder):
|
|||||||
# TODO they should be already idle since there are no active computes
|
# TODO they should be already idle since there are no active computes
|
||||||
for tenant_info in client.tenant_list():
|
for tenant_info in client.tenant_list():
|
||||||
tenant_id = UUID(tenant_info["id"])
|
tenant_id = UUID(tenant_info["id"])
|
||||||
delete_all_timelines(tenant_id)
|
detach_all_timelines(tenant_id)
|
||||||
wait_until(10, 0.2, lambda: assert_idle(tenant_id))
|
wait_until(10, 0.2, lambda: assert_idle(tenant_id))
|
||||||
|
|
||||||
# Assert that all tasks finish quickly after tenants go idle
|
# Assert that all tasks finish quickly after tenants go idle
|
||||||
|
|||||||
@@ -1,60 +0,0 @@
|
|||||||
from uuid import uuid4
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from fixtures.neon_fixtures import NeonEnv, NeonPageserverApiException, wait_until
|
|
||||||
|
|
||||||
|
|
||||||
def test_timeline_delete(neon_simple_env: NeonEnv):
|
|
||||||
env = neon_simple_env
|
|
||||||
|
|
||||||
ps_http = env.pageserver.http_client()
|
|
||||||
|
|
||||||
# first try to delete non existing timeline
|
|
||||||
# for existing tenant:
|
|
||||||
invalid_timeline_id = uuid4()
|
|
||||||
with pytest.raises(NeonPageserverApiException, match="timeline not found"):
|
|
||||||
ps_http.timeline_delete(tenant_id=env.initial_tenant, timeline_id=invalid_timeline_id)
|
|
||||||
|
|
||||||
# for non existing tenant:
|
|
||||||
invalid_tenant_id = uuid4()
|
|
||||||
with pytest.raises(NeonPageserverApiException,
|
|
||||||
match=f"Tenant {invalid_tenant_id.hex} not found in local tenant state"):
|
|
||||||
ps_http.timeline_delete(tenant_id=invalid_tenant_id, timeline_id=invalid_timeline_id)
|
|
||||||
|
|
||||||
# construct pair of branches to validate that pageserver prohibits
|
|
||||||
# deletion of ancestor timelines when they have child branches
|
|
||||||
parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_parent", "empty")
|
|
||||||
|
|
||||||
leaf_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_branch1",
|
|
||||||
"test_ancestor_branch_delete_parent")
|
|
||||||
|
|
||||||
ps_http = env.pageserver.http_client()
|
|
||||||
with pytest.raises(NeonPageserverApiException,
|
|
||||||
match="Cannot detach timeline which has child timelines"):
|
|
||||||
|
|
||||||
timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / parent_timeline_id.hex
|
|
||||||
assert timeline_path.exists()
|
|
||||||
|
|
||||||
ps_http.timeline_delete(env.initial_tenant, parent_timeline_id)
|
|
||||||
|
|
||||||
assert not timeline_path.exists()
|
|
||||||
|
|
||||||
timeline_path = env.repo_dir / "tenants" / env.initial_tenant.hex / "timelines" / leaf_timeline_id.hex
|
|
||||||
assert timeline_path.exists()
|
|
||||||
|
|
||||||
# retry deletes when compaction or gc is running in pageserver
|
|
||||||
wait_until(number_of_iterations=3,
|
|
||||||
interval=0.2,
|
|
||||||
func=lambda: ps_http.timeline_delete(env.initial_tenant, leaf_timeline_id))
|
|
||||||
|
|
||||||
assert not timeline_path.exists()
|
|
||||||
|
|
||||||
# check 404
|
|
||||||
with pytest.raises(NeonPageserverApiException,
|
|
||||||
match="is not found neither locally nor remotely"):
|
|
||||||
ps_http.timeline_detail(env.initial_tenant, leaf_timeline_id)
|
|
||||||
|
|
||||||
# FIXME leaves tenant without timelines, should we prevent deletion of root timeline?
|
|
||||||
wait_until(number_of_iterations=3,
|
|
||||||
interval=0.2,
|
|
||||||
func=lambda: ps_http.timeline_delete(env.initial_tenant, parent_timeline_id))
|
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
import psycopg2.extras
|
import psycopg2.extras
|
||||||
import psycopg2.errors
|
import psycopg2.errors
|
||||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_timeline_local
|
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, assert_local
|
||||||
from fixtures.log_helper import log
|
from fixtures.log_helper import log
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@@ -11,7 +11,7 @@ def test_timeline_size(neon_simple_env: NeonEnv):
|
|||||||
new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')
|
new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')
|
||||||
|
|
||||||
client = env.pageserver.http_client()
|
client = env.pageserver.http_client()
|
||||||
timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
|
timeline_details = assert_local(client, env.initial_tenant, new_timeline_id)
|
||||||
assert timeline_details['local']['current_logical_size'] == timeline_details['local'][
|
assert timeline_details['local']['current_logical_size'] == timeline_details['local'][
|
||||||
'current_logical_size_non_incremental']
|
'current_logical_size_non_incremental']
|
||||||
|
|
||||||
@@ -29,13 +29,13 @@ def test_timeline_size(neon_simple_env: NeonEnv):
|
|||||||
FROM generate_series(1, 10) g
|
FROM generate_series(1, 10) g
|
||||||
""")
|
""")
|
||||||
|
|
||||||
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
|
res = assert_local(client, env.initial_tenant, new_timeline_id)
|
||||||
local_details = res['local']
|
local_details = res['local']
|
||||||
assert local_details["current_logical_size"] == local_details[
|
assert local_details["current_logical_size"] == local_details[
|
||||||
"current_logical_size_non_incremental"]
|
"current_logical_size_non_incremental"]
|
||||||
cur.execute("TRUNCATE foo")
|
cur.execute("TRUNCATE foo")
|
||||||
|
|
||||||
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
|
res = assert_local(client, env.initial_tenant, new_timeline_id)
|
||||||
local_details = res['local']
|
local_details = res['local']
|
||||||
assert local_details["current_logical_size"] == local_details[
|
assert local_details["current_logical_size"] == local_details[
|
||||||
"current_logical_size_non_incremental"]
|
"current_logical_size_non_incremental"]
|
||||||
@@ -46,7 +46,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
|
|||||||
new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')
|
new_timeline_id = env.neon_cli.create_branch('test_timeline_size', 'empty')
|
||||||
|
|
||||||
client = env.pageserver.http_client()
|
client = env.pageserver.http_client()
|
||||||
timeline_details = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
|
timeline_details = assert_local(client, env.initial_tenant, new_timeline_id)
|
||||||
assert timeline_details['local']['current_logical_size'] == timeline_details['local'][
|
assert timeline_details['local']['current_logical_size'] == timeline_details['local'][
|
||||||
'current_logical_size_non_incremental']
|
'current_logical_size_non_incremental']
|
||||||
|
|
||||||
@@ -57,7 +57,7 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
|
|||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("SHOW neon.timeline_id")
|
cur.execute("SHOW neon.timeline_id")
|
||||||
|
|
||||||
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
|
res = assert_local(client, env.initial_tenant, new_timeline_id)
|
||||||
local_details = res['local']
|
local_details = res['local']
|
||||||
assert local_details["current_logical_size"] == local_details[
|
assert local_details["current_logical_size"] == local_details[
|
||||||
"current_logical_size_non_incremental"]
|
"current_logical_size_non_incremental"]
|
||||||
@@ -73,14 +73,14 @@ def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
|
|||||||
FROM generate_series(1, 10) g
|
FROM generate_series(1, 10) g
|
||||||
""")
|
""")
|
||||||
|
|
||||||
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
|
res = assert_local(client, env.initial_tenant, new_timeline_id)
|
||||||
local_details = res['local']
|
local_details = res['local']
|
||||||
assert local_details["current_logical_size"] == local_details[
|
assert local_details["current_logical_size"] == local_details[
|
||||||
"current_logical_size_non_incremental"]
|
"current_logical_size_non_incremental"]
|
||||||
|
|
||||||
cur.execute('DROP DATABASE foodb')
|
cur.execute('DROP DATABASE foodb')
|
||||||
|
|
||||||
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
|
res = assert_local(client, env.initial_tenant, new_timeline_id)
|
||||||
local_details = res['local']
|
local_details = res['local']
|
||||||
assert local_details["current_logical_size"] == local_details[
|
assert local_details["current_logical_size"] == local_details[
|
||||||
"current_logical_size_non_incremental"]
|
"current_logical_size_non_incremental"]
|
||||||
@@ -117,7 +117,7 @@ def test_timeline_size_quota(neon_env_builder: NeonEnvBuilder):
|
|||||||
new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota')
|
new_timeline_id = env.neon_cli.create_branch('test_timeline_size_quota')
|
||||||
|
|
||||||
client = env.pageserver.http_client()
|
client = env.pageserver.http_client()
|
||||||
res = assert_timeline_local(client, env.initial_tenant, new_timeline_id)
|
res = assert_local(client, env.initial_tenant, new_timeline_id)
|
||||||
assert res['local']["current_logical_size"] == res['local'][
|
assert res['local']["current_logical_size"] == res['local'][
|
||||||
"current_logical_size_non_incremental"]
|
"current_logical_size_non_incremental"]
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
import asyncpg
|
import asyncpg
|
||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
@@ -8,7 +7,7 @@ import time
|
|||||||
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper
|
from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, Postgres, Safekeeper
|
||||||
from fixtures.log_helper import getLogger
|
from fixtures.log_helper import getLogger
|
||||||
from fixtures.utils import lsn_from_hex, lsn_to_hex
|
from fixtures.utils import lsn_from_hex, lsn_to_hex
|
||||||
from typing import List, Optional
|
from typing import List
|
||||||
|
|
||||||
log = getLogger('root.safekeeper_async')
|
log = getLogger('root.safekeeper_async')
|
||||||
|
|
||||||
@@ -235,156 +234,3 @@ def test_restarts_frequent_checkpoints(neon_env_builder: NeonEnvBuilder):
|
|||||||
# we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments
|
# we try to simulate large (flush_lsn - truncate_lsn) lag, to test that WAL segments
|
||||||
# are not removed before broadcasted to all safekeepers, with the help of replication slot
|
# are not removed before broadcasted to all safekeepers, with the help of replication slot
|
||||||
asyncio.run(run_restarts_under_load(env, pg, env.safekeepers, period_time=15, iterations=5))
|
asyncio.run(run_restarts_under_load(env, pg, env.safekeepers, period_time=15, iterations=5))
|
||||||
|
|
||||||
|
|
||||||
def postgres_create_start(env: NeonEnv, branch: str, pgdir_name: Optional[str]):
|
|
||||||
pg = Postgres(
|
|
||||||
env,
|
|
||||||
tenant_id=env.initial_tenant,
|
|
||||||
port=env.port_distributor.get_port(),
|
|
||||||
# In these tests compute has high probability of terminating on its own
|
|
||||||
# before our stop() due to lost consensus leadership.
|
|
||||||
check_stop_result=False)
|
|
||||||
|
|
||||||
# embed current time in node name
|
|
||||||
node_name = pgdir_name or f'pg_node_{time.time()}'
|
|
||||||
return pg.create_start(branch_name=branch,
|
|
||||||
node_name=node_name,
|
|
||||||
config_lines=['log_statement=all'])
|
|
||||||
|
|
||||||
|
|
||||||
async def exec_compute_query(env: NeonEnv,
|
|
||||||
branch: str,
|
|
||||||
query: str,
|
|
||||||
pgdir_name: Optional[str] = None):
|
|
||||||
with postgres_create_start(env, branch=branch, pgdir_name=pgdir_name) as pg:
|
|
||||||
before_conn = time.time()
|
|
||||||
conn = await pg.connect_async()
|
|
||||||
res = await conn.fetch(query)
|
|
||||||
await conn.close()
|
|
||||||
after_conn = time.time()
|
|
||||||
log.info(f'{query} took {after_conn - before_conn}s')
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
async def run_compute_restarts(env: NeonEnv,
|
|
||||||
queries=16,
|
|
||||||
batch_insert=10000,
|
|
||||||
branch='test_compute_restarts'):
|
|
||||||
cnt = 0
|
|
||||||
sum = 0
|
|
||||||
|
|
||||||
await exec_compute_query(env, branch, 'CREATE TABLE t (i int)')
|
|
||||||
|
|
||||||
for i in range(queries):
|
|
||||||
if i % 4 == 0:
|
|
||||||
await exec_compute_query(
|
|
||||||
env, branch, f'INSERT INTO t SELECT 1 FROM generate_series(1, {batch_insert})')
|
|
||||||
sum += batch_insert
|
|
||||||
cnt += batch_insert
|
|
||||||
elif (i % 4 == 1) or (i % 4 == 3):
|
|
||||||
# Note that select causes lots of FPI's and increases probability of safekeepers
|
|
||||||
# standing at different LSNs after compute termination.
|
|
||||||
actual_sum = (await exec_compute_query(env, branch, 'SELECT SUM(i) FROM t'))[0][0]
|
|
||||||
assert actual_sum == sum, f'Expected sum={sum}, actual={actual_sum}'
|
|
||||||
elif i % 4 == 2:
|
|
||||||
await exec_compute_query(env, branch, 'UPDATE t SET i = i + 1')
|
|
||||||
sum += cnt
|
|
||||||
|
|
||||||
|
|
||||||
# Add a test which creates compute for every query, and then destroys it right after.
|
|
||||||
def test_compute_restarts(neon_env_builder: NeonEnvBuilder):
|
|
||||||
neon_env_builder.num_safekeepers = 3
|
|
||||||
env = neon_env_builder.init_start()
|
|
||||||
|
|
||||||
env.neon_cli.create_branch('test_compute_restarts')
|
|
||||||
asyncio.run(run_compute_restarts(env))
|
|
||||||
|
|
||||||
|
|
||||||
class BackgroundCompute(object):
|
|
||||||
def __init__(self, index: int, env: NeonEnv, branch: str):
|
|
||||||
self.index = index
|
|
||||||
self.env = env
|
|
||||||
self.branch = branch
|
|
||||||
self.running = False
|
|
||||||
self.stopped = False
|
|
||||||
self.total_tries = 0
|
|
||||||
self.successful_queries: List[int] = []
|
|
||||||
|
|
||||||
async def run(self):
|
|
||||||
if self.running:
|
|
||||||
raise Exception('BackgroundCompute is already running')
|
|
||||||
|
|
||||||
self.running = True
|
|
||||||
i = 0
|
|
||||||
while not self.stopped:
|
|
||||||
try:
|
|
||||||
verify_key = (self.index << 16) + i
|
|
||||||
i += 1
|
|
||||||
self.total_tries += 1
|
|
||||||
res = await exec_compute_query(
|
|
||||||
self.env,
|
|
||||||
self.branch,
|
|
||||||
f'INSERT INTO query_log(index, verify_key) VALUES ({self.index}, {verify_key}) RETURNING verify_key',
|
|
||||||
pgdir_name=f'bgcompute{self.index}_key{verify_key}',
|
|
||||||
)
|
|
||||||
log.info(f'result: {res}')
|
|
||||||
if len(res) != 1:
|
|
||||||
raise Exception('No result returned')
|
|
||||||
if res[0][0] != verify_key:
|
|
||||||
raise Exception('Wrong result returned')
|
|
||||||
self.successful_queries.append(verify_key)
|
|
||||||
except Exception as e:
|
|
||||||
log.info(f'BackgroundCompute {self.index} query failed: {e}')
|
|
||||||
|
|
||||||
# With less sleep, there is a very big chance of not committing
|
|
||||||
# anything or only 1 xact during test run.
|
|
||||||
await asyncio.sleep(2 * random.random())
|
|
||||||
self.running = False
|
|
||||||
|
|
||||||
|
|
||||||
async def run_concurrent_computes(env: NeonEnv,
|
|
||||||
num_computes=10,
|
|
||||||
run_seconds=20,
|
|
||||||
branch='test_concurrent_computes'):
|
|
||||||
await exec_compute_query(
|
|
||||||
env,
|
|
||||||
branch,
|
|
||||||
'CREATE TABLE query_log (t timestamp default now(), index int, verify_key int)')
|
|
||||||
|
|
||||||
computes = [BackgroundCompute(i, env, branch) for i in range(num_computes)]
|
|
||||||
background_tasks = [asyncio.create_task(compute.run()) for compute in computes]
|
|
||||||
|
|
||||||
await asyncio.sleep(run_seconds)
|
|
||||||
for compute in computes[1:]:
|
|
||||||
compute.stopped = True
|
|
||||||
log.info("stopped all tasks but one")
|
|
||||||
|
|
||||||
# work for some time with only one compute -- it should be able to make some xacts
|
|
||||||
await asyncio.sleep(8)
|
|
||||||
computes[0].stopped = True
|
|
||||||
|
|
||||||
await asyncio.gather(*background_tasks)
|
|
||||||
|
|
||||||
result = await exec_compute_query(env, branch, 'SELECT * FROM query_log')
|
|
||||||
# we should have inserted something while single compute was running
|
|
||||||
assert len(result) >= 4
|
|
||||||
log.info(f'Executed {len(result)} queries')
|
|
||||||
for row in result:
|
|
||||||
log.info(f'{row[0]} {row[1]} {row[2]}')
|
|
||||||
|
|
||||||
# ensure everything reported as committed wasn't lost
|
|
||||||
for compute in computes:
|
|
||||||
for verify_key in compute.successful_queries:
|
|
||||||
assert verify_key in [row[2] for row in result]
|
|
||||||
|
|
||||||
|
|
||||||
# Run multiple computes concurrently, creating-destroying them after single
|
|
||||||
# query. Ensure we don't lose any xacts reported as committed and be able to
|
|
||||||
# progress once only one compute remains.
|
|
||||||
def test_concurrent_computes(neon_env_builder: NeonEnvBuilder):
|
|
||||||
neon_env_builder.num_safekeepers = 3
|
|
||||||
env = neon_env_builder.init_start()
|
|
||||||
|
|
||||||
env.neon_cli.create_branch('test_concurrent_computes')
|
|
||||||
asyncio.run(run_concurrent_computes(env))
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ from dataclasses import field
|
|||||||
from enum import Flag, auto
|
from enum import Flag, auto
|
||||||
import textwrap
|
import textwrap
|
||||||
from cached_property import cached_property
|
from cached_property import cached_property
|
||||||
import abc
|
|
||||||
import asyncpg
|
import asyncpg
|
||||||
import os
|
import os
|
||||||
import boto3
|
import boto3
|
||||||
@@ -30,7 +29,7 @@ from dataclasses import dataclass
|
|||||||
# Type-related stuff
|
# Type-related stuff
|
||||||
from psycopg2.extensions import connection as PgConnection
|
from psycopg2.extensions import connection as PgConnection
|
||||||
from psycopg2.extensions import make_dsn, parse_dsn
|
from psycopg2.extensions import make_dsn, parse_dsn
|
||||||
from typing import Any, Callable, Dict, Iterator, List, Optional, Type, TypeVar, cast, Union, Tuple
|
from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast, Union, Tuple
|
||||||
from typing_extensions import Literal
|
from typing_extensions import Literal
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@@ -325,7 +324,7 @@ class PgProtocol:
|
|||||||
# Convert options='-c<key>=<val>' to server_settings
|
# Convert options='-c<key>=<val>' to server_settings
|
||||||
if 'options' in conn_options:
|
if 'options' in conn_options:
|
||||||
options = conn_options.pop('options')
|
options = conn_options.pop('options')
|
||||||
for match in re.finditer(r'-c(\w*)=(\w*)', options):
|
for match in re.finditer('-c(\w*)=(\w*)', options):
|
||||||
key = match.group(1)
|
key = match.group(1)
|
||||||
val = match.group(2)
|
val = match.group(2)
|
||||||
if 'server_options' in conn_options:
|
if 'server_options' in conn_options:
|
||||||
@@ -796,49 +795,18 @@ class NeonPageserverHttpClient(requests.Session):
|
|||||||
def check_status(self):
|
def check_status(self):
|
||||||
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
|
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
|
||||||
|
|
||||||
def tenant_list(self) -> List[Dict[Any, Any]]:
|
def timeline_attach(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
|
||||||
res = self.get(f"http://localhost:{self.port}/v1/tenant")
|
|
||||||
self.verbose_error(res)
|
|
||||||
res_json = res.json()
|
|
||||||
assert isinstance(res_json, list)
|
|
||||||
return res_json
|
|
||||||
|
|
||||||
def tenant_create(self, new_tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID:
|
|
||||||
res = self.post(
|
res = self.post(
|
||||||
f"http://localhost:{self.port}/v1/tenant",
|
f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}/attach",
|
||||||
json={
|
|
||||||
'new_tenant_id': new_tenant_id.hex if new_tenant_id else None,
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
self.verbose_error(res)
|
self.verbose_error(res)
|
||||||
if res.status_code == 409:
|
|
||||||
raise Exception(f'could not create tenant: already exists for id {new_tenant_id}')
|
|
||||||
new_tenant_id = res.json()
|
|
||||||
assert isinstance(new_tenant_id, str)
|
|
||||||
return uuid.UUID(new_tenant_id)
|
|
||||||
|
|
||||||
def tenant_attach(self, tenant_id: uuid.UUID):
|
def timeline_detach(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
|
||||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/attach")
|
res = self.post(
|
||||||
|
f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}/detach",
|
||||||
|
)
|
||||||
self.verbose_error(res)
|
self.verbose_error(res)
|
||||||
|
|
||||||
def tenant_detach(self, tenant_id: uuid.UUID):
|
|
||||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/detach")
|
|
||||||
self.verbose_error(res)
|
|
||||||
|
|
||||||
def tenant_status(self, tenant_id: uuid.UUID) -> Dict[Any, Any]:
|
|
||||||
res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}")
|
|
||||||
self.verbose_error(res)
|
|
||||||
res_json = res.json()
|
|
||||||
assert isinstance(res_json, dict)
|
|
||||||
return res_json
|
|
||||||
|
|
||||||
def timeline_list(self, tenant_id: uuid.UUID) -> List[Dict[str, Any]]:
|
|
||||||
res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
|
|
||||||
self.verbose_error(res)
|
|
||||||
res_json = res.json()
|
|
||||||
assert isinstance(res_json, list)
|
|
||||||
return res_json
|
|
||||||
|
|
||||||
def timeline_create(
|
def timeline_create(
|
||||||
self,
|
self,
|
||||||
tenant_id: uuid.UUID,
|
tenant_id: uuid.UUID,
|
||||||
@@ -863,6 +831,34 @@ class NeonPageserverHttpClient(requests.Session):
|
|||||||
assert isinstance(res_json, dict)
|
assert isinstance(res_json, dict)
|
||||||
return res_json
|
return res_json
|
||||||
|
|
||||||
|
def tenant_list(self) -> List[Dict[Any, Any]]:
|
||||||
|
res = self.get(f"http://localhost:{self.port}/v1/tenant")
|
||||||
|
self.verbose_error(res)
|
||||||
|
res_json = res.json()
|
||||||
|
assert isinstance(res_json, list)
|
||||||
|
return res_json
|
||||||
|
|
||||||
|
def tenant_create(self, new_tenant_id: Optional[uuid.UUID] = None) -> uuid.UUID:
|
||||||
|
res = self.post(
|
||||||
|
f"http://localhost:{self.port}/v1/tenant",
|
||||||
|
json={
|
||||||
|
'new_tenant_id': new_tenant_id.hex if new_tenant_id else None,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self.verbose_error(res)
|
||||||
|
if res.status_code == 409:
|
||||||
|
raise Exception(f'could not create tenant: already exists for id {new_tenant_id}')
|
||||||
|
new_tenant_id = res.json()
|
||||||
|
assert isinstance(new_tenant_id, str)
|
||||||
|
return uuid.UUID(new_tenant_id)
|
||||||
|
|
||||||
|
def timeline_list(self, tenant_id: uuid.UUID) -> List[Dict[Any, Any]]:
|
||||||
|
res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline")
|
||||||
|
self.verbose_error(res)
|
||||||
|
res_json = res.json()
|
||||||
|
assert isinstance(res_json, list)
|
||||||
|
return res_json
|
||||||
|
|
||||||
def timeline_detail(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Dict[Any, Any]:
|
def timeline_detail(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Dict[Any, Any]:
|
||||||
res = self.get(
|
res = self.get(
|
||||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}?include-non-incremental-logical-size=1"
|
f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}?include-non-incremental-logical-size=1"
|
||||||
@@ -872,14 +868,6 @@ class NeonPageserverHttpClient(requests.Session):
|
|||||||
assert isinstance(res_json, dict)
|
assert isinstance(res_json, dict)
|
||||||
return res_json
|
return res_json
|
||||||
|
|
||||||
def timeline_delete(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID):
|
|
||||||
res = self.delete(
|
|
||||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}")
|
|
||||||
self.verbose_error(res)
|
|
||||||
res_json = res.json()
|
|
||||||
assert res_json is None
|
|
||||||
return res_json
|
|
||||||
|
|
||||||
def wal_receiver_get(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Dict[Any, Any]:
|
def wal_receiver_get(self, tenant_id: uuid.UUID, timeline_id: uuid.UUID) -> Dict[Any, Any]:
|
||||||
res = self.get(
|
res = self.get(
|
||||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}/wal_receiver"
|
f"http://localhost:{self.port}/v1/tenant/{tenant_id.hex}/timeline/{timeline_id.hex}/wal_receiver"
|
||||||
@@ -909,89 +897,14 @@ TIMELINE_DATA_EXTRACTOR = re.compile(r"\s(?P<branch_name>[^\s]+)\s\[(?P<timeline
|
|||||||
re.MULTILINE)
|
re.MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
class AbstractNeonCli(abc.ABC):
|
class NeonCli:
|
||||||
"""
|
|
||||||
A typed wrapper around an arbitrary Neon CLI tool.
|
|
||||||
Supports a way to run arbitrary command directly via CLI.
|
|
||||||
Do not use directly, use specific subclasses instead.
|
|
||||||
"""
|
|
||||||
def __init__(self, env: NeonEnv):
|
|
||||||
self.env = env
|
|
||||||
|
|
||||||
COMMAND: str = cast(str, None) # To be overwritten by the derived class.
|
|
||||||
|
|
||||||
def raw_cli(self,
|
|
||||||
arguments: List[str],
|
|
||||||
extra_env_vars: Optional[Dict[str, str]] = None,
|
|
||||||
check_return_code=True) -> 'subprocess.CompletedProcess[str]':
|
|
||||||
"""
|
|
||||||
Run the command with the specified arguments.
|
|
||||||
|
|
||||||
Arguments must be in list form, e.g. ['pg', 'create']
|
|
||||||
|
|
||||||
Return both stdout and stderr, which can be accessed as
|
|
||||||
|
|
||||||
>>> result = env.neon_cli.raw_cli(...)
|
|
||||||
>>> assert result.stderr == ""
|
|
||||||
>>> log.info(result.stdout)
|
|
||||||
|
|
||||||
If `check_return_code`, on non-zero exit code logs failure and raises.
|
|
||||||
"""
|
|
||||||
|
|
||||||
assert type(arguments) == list
|
|
||||||
assert type(self.COMMAND) == str
|
|
||||||
|
|
||||||
bin_neon = os.path.join(str(neon_binpath), self.COMMAND)
|
|
||||||
|
|
||||||
args = [bin_neon] + arguments
|
|
||||||
log.info('Running command "{}"'.format(' '.join(args)))
|
|
||||||
log.info(f'Running in "{self.env.repo_dir}"')
|
|
||||||
|
|
||||||
env_vars = os.environ.copy()
|
|
||||||
env_vars['NEON_REPO_DIR'] = str(self.env.repo_dir)
|
|
||||||
env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir)
|
|
||||||
if self.env.rust_log_override is not None:
|
|
||||||
env_vars['RUST_LOG'] = self.env.rust_log_override
|
|
||||||
for (extra_env_key, extra_env_value) in (extra_env_vars or {}).items():
|
|
||||||
env_vars[extra_env_key] = extra_env_value
|
|
||||||
|
|
||||||
# Pass coverage settings
|
|
||||||
var = 'LLVM_PROFILE_FILE'
|
|
||||||
val = os.environ.get(var)
|
|
||||||
if val:
|
|
||||||
env_vars[var] = val
|
|
||||||
|
|
||||||
# Intercept CalledProcessError and print more info
|
|
||||||
res = subprocess.run(args,
|
|
||||||
env=env_vars,
|
|
||||||
check=False,
|
|
||||||
universal_newlines=True,
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE)
|
|
||||||
if not res.returncode:
|
|
||||||
log.info(f"Run success: {res.stdout}")
|
|
||||||
elif check_return_code:
|
|
||||||
# this way command output will be in recorded and shown in CI in failure message
|
|
||||||
msg = f"""\
|
|
||||||
Run {res.args} failed:
|
|
||||||
stdout: {res.stdout}
|
|
||||||
stderr: {res.stderr}
|
|
||||||
"""
|
|
||||||
log.info(msg)
|
|
||||||
raise Exception(msg) from subprocess.CalledProcessError(res.returncode,
|
|
||||||
res.args,
|
|
||||||
res.stdout,
|
|
||||||
res.stderr)
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
class NeonCli(AbstractNeonCli):
|
|
||||||
"""
|
"""
|
||||||
A typed wrapper around the `neon` CLI tool.
|
A typed wrapper around the `neon` CLI tool.
|
||||||
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
|
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
|
||||||
"""
|
"""
|
||||||
|
def __init__(self, env: NeonEnv):
|
||||||
COMMAND = 'neon_local'
|
self.env = env
|
||||||
|
pass
|
||||||
|
|
||||||
def create_tenant(self,
|
def create_tenant(self,
|
||||||
tenant_id: Optional[uuid.UUID] = None,
|
tenant_id: Optional[uuid.UUID] = None,
|
||||||
@@ -1247,7 +1160,6 @@ class NeonCli(AbstractNeonCli):
|
|||||||
node_name: str,
|
node_name: str,
|
||||||
tenant_id: Optional[uuid.UUID] = None,
|
tenant_id: Optional[uuid.UUID] = None,
|
||||||
destroy=False,
|
destroy=False,
|
||||||
check_return_code=True,
|
|
||||||
) -> 'subprocess.CompletedProcess[str]':
|
) -> 'subprocess.CompletedProcess[str]':
|
||||||
args = [
|
args = [
|
||||||
'pg',
|
'pg',
|
||||||
@@ -1260,28 +1172,69 @@ class NeonCli(AbstractNeonCli):
|
|||||||
if node_name is not None:
|
if node_name is not None:
|
||||||
args.append(node_name)
|
args.append(node_name)
|
||||||
|
|
||||||
return self.raw_cli(args, check_return_code=check_return_code)
|
return self.raw_cli(args)
|
||||||
|
|
||||||
|
def raw_cli(self,
|
||||||
|
arguments: List[str],
|
||||||
|
extra_env_vars: Optional[Dict[str, str]] = None,
|
||||||
|
check_return_code=True) -> 'subprocess.CompletedProcess[str]':
|
||||||
|
"""
|
||||||
|
Run "neon" with the specified arguments.
|
||||||
|
|
||||||
class WalCraft(AbstractNeonCli):
|
Arguments must be in list form, e.g. ['pg', 'create']
|
||||||
"""
|
|
||||||
A typed wrapper around the `wal_craft` CLI tool.
|
|
||||||
Supports main commands via typed methods and a way to run arbitrary command directly via CLI.
|
|
||||||
"""
|
|
||||||
|
|
||||||
COMMAND = 'wal_craft'
|
Return both stdout and stderr, which can be accessed as
|
||||||
|
|
||||||
def postgres_config(self) -> List[str]:
|
>>> result = env.neon_cli.raw_cli(...)
|
||||||
res = self.raw_cli(["print-postgres-config"])
|
>>> assert result.stderr == ""
|
||||||
res.check_returncode()
|
>>> log.info(result.stdout)
|
||||||
return res.stdout.split('\n')
|
"""
|
||||||
|
|
||||||
def in_existing(self, type: str, connection: str) -> int:
|
assert type(arguments) == list
|
||||||
res = self.raw_cli(["in-existing", type, connection])
|
|
||||||
res.check_returncode()
|
bin_neon = os.path.join(str(neon_binpath), 'neon_local')
|
||||||
m = re.fullmatch(r'end_of_wal = (.*)\n', res.stdout)
|
|
||||||
assert m
|
args = [bin_neon] + arguments
|
||||||
return lsn_from_hex(m.group(1))
|
log.info('Running command "{}"'.format(' '.join(args)))
|
||||||
|
log.info(f'Running in "{self.env.repo_dir}"')
|
||||||
|
|
||||||
|
env_vars = os.environ.copy()
|
||||||
|
env_vars['NEON_REPO_DIR'] = str(self.env.repo_dir)
|
||||||
|
env_vars['POSTGRES_DISTRIB_DIR'] = str(pg_distrib_dir)
|
||||||
|
if self.env.rust_log_override is not None:
|
||||||
|
env_vars['RUST_LOG'] = self.env.rust_log_override
|
||||||
|
for (extra_env_key, extra_env_value) in (extra_env_vars or {}).items():
|
||||||
|
env_vars[extra_env_key] = extra_env_value
|
||||||
|
|
||||||
|
# Pass coverage settings
|
||||||
|
var = 'LLVM_PROFILE_FILE'
|
||||||
|
val = os.environ.get(var)
|
||||||
|
if val:
|
||||||
|
env_vars[var] = val
|
||||||
|
|
||||||
|
# Intercept CalledProcessError and print more info
|
||||||
|
try:
|
||||||
|
res = subprocess.run(args,
|
||||||
|
env=env_vars,
|
||||||
|
check=True,
|
||||||
|
universal_newlines=True,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE)
|
||||||
|
log.info(f"Run success: {res.stdout}")
|
||||||
|
except subprocess.CalledProcessError as exc:
|
||||||
|
# this way command output will be in recorded and shown in CI in failure message
|
||||||
|
msg = f"""\
|
||||||
|
Run failed: {exc}
|
||||||
|
stdout: {exc.stdout}
|
||||||
|
stderr: {exc.stderr}
|
||||||
|
"""
|
||||||
|
log.info(msg)
|
||||||
|
|
||||||
|
raise Exception(msg) from exc
|
||||||
|
|
||||||
|
if check_return_code:
|
||||||
|
res.check_returncode()
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
class NeonPageserver(PgProtocol):
|
class NeonPageserver(PgProtocol):
|
||||||
@@ -1573,11 +1526,7 @@ def static_proxy(vanilla_pg, port_distributor) -> Iterator[NeonProxy]:
|
|||||||
|
|
||||||
class Postgres(PgProtocol):
|
class Postgres(PgProtocol):
|
||||||
""" An object representing a running postgres daemon. """
|
""" An object representing a running postgres daemon. """
|
||||||
def __init__(self,
|
def __init__(self, env: NeonEnv, tenant_id: uuid.UUID, port: int):
|
||||||
env: NeonEnv,
|
|
||||||
tenant_id: uuid.UUID,
|
|
||||||
port: int,
|
|
||||||
check_stop_result: bool = True):
|
|
||||||
super().__init__(host='localhost', port=port, user='cloud_admin', dbname='postgres')
|
super().__init__(host='localhost', port=port, user='cloud_admin', dbname='postgres')
|
||||||
self.env = env
|
self.env = env
|
||||||
self.running = False
|
self.running = False
|
||||||
@@ -1585,7 +1534,6 @@ class Postgres(PgProtocol):
|
|||||||
self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA
|
self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA
|
||||||
self.tenant_id = tenant_id
|
self.tenant_id = tenant_id
|
||||||
self.port = port
|
self.port = port
|
||||||
self.check_stop_result = check_stop_result
|
|
||||||
# path to conf is <repo_dir>/pgdatadirs/tenants/<tenant_id>/<node_name>/postgresql.conf
|
# path to conf is <repo_dir>/pgdatadirs/tenants/<tenant_id>/<node_name>/postgresql.conf
|
||||||
|
|
||||||
def create(
|
def create(
|
||||||
@@ -1637,6 +1585,8 @@ class Postgres(PgProtocol):
|
|||||||
port=self.port)
|
port=self.port)
|
||||||
self.running = True
|
self.running = True
|
||||||
|
|
||||||
|
log.info(f"stdout: {run_result.stdout}")
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def pg_data_dir_path(self) -> str:
|
def pg_data_dir_path(self) -> str:
|
||||||
@@ -1700,9 +1650,7 @@ class Postgres(PgProtocol):
|
|||||||
|
|
||||||
if self.running:
|
if self.running:
|
||||||
assert self.node_name is not None
|
assert self.node_name is not None
|
||||||
self.env.neon_cli.pg_stop(self.node_name,
|
self.env.neon_cli.pg_stop(self.node_name, self.tenant_id)
|
||||||
self.tenant_id,
|
|
||||||
check_return_code=self.check_stop_result)
|
|
||||||
self.running = False
|
self.running = False
|
||||||
|
|
||||||
return self
|
return self
|
||||||
@@ -1714,10 +1662,7 @@ class Postgres(PgProtocol):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
assert self.node_name is not None
|
assert self.node_name is not None
|
||||||
self.env.neon_cli.pg_stop(self.node_name,
|
self.env.neon_cli.pg_stop(self.node_name, self.tenant_id, True)
|
||||||
self.tenant_id,
|
|
||||||
True,
|
|
||||||
check_return_code=self.check_stop_result)
|
|
||||||
self.node_name = None
|
self.node_name = None
|
||||||
self.running = False
|
self.running = False
|
||||||
|
|
||||||
@@ -1736,8 +1681,6 @@ class Postgres(PgProtocol):
|
|||||||
Returns self.
|
Returns self.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
started_at = time.time()
|
|
||||||
|
|
||||||
self.create(
|
self.create(
|
||||||
branch_name=branch_name,
|
branch_name=branch_name,
|
||||||
node_name=node_name,
|
node_name=node_name,
|
||||||
@@ -1745,8 +1688,6 @@ class Postgres(PgProtocol):
|
|||||||
lsn=lsn,
|
lsn=lsn,
|
||||||
).start()
|
).start()
|
||||||
|
|
||||||
log.info(f"Postgres startup took {time.time() - started_at} seconds")
|
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
@@ -2223,22 +2164,14 @@ def wait_until(number_of_iterations: int, interval: float, func):
|
|||||||
raise Exception("timed out while waiting for %s" % func) from last_exception
|
raise Exception("timed out while waiting for %s" % func) from last_exception
|
||||||
|
|
||||||
|
|
||||||
def assert_timeline_local(pageserver_http_client: NeonPageserverHttpClient,
|
def assert_local(pageserver_http_client: NeonPageserverHttpClient,
|
||||||
tenant: uuid.UUID,
|
tenant: uuid.UUID,
|
||||||
timeline: uuid.UUID):
|
timeline: uuid.UUID):
|
||||||
timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline)
|
timeline_detail = pageserver_http_client.timeline_detail(tenant, timeline)
|
||||||
assert timeline_detail.get('local', {}).get("disk_consistent_lsn"), timeline_detail
|
assert timeline_detail.get('local', {}).get("disk_consistent_lsn"), timeline_detail
|
||||||
return timeline_detail
|
return timeline_detail
|
||||||
|
|
||||||
|
|
||||||
def assert_no_in_progress_downloads_for_tenant(
|
|
||||||
pageserver_http_client: NeonPageserverHttpClient,
|
|
||||||
tenant: uuid.UUID,
|
|
||||||
):
|
|
||||||
tenant_status = pageserver_http_client.tenant_status(tenant)
|
|
||||||
assert tenant_status['has_in_progress_downloads'] is False, tenant_status
|
|
||||||
|
|
||||||
|
|
||||||
def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
|
def remote_consistent_lsn(pageserver_http_client: NeonPageserverHttpClient,
|
||||||
tenant: uuid.UUID,
|
tenant: uuid.UUID,
|
||||||
timeline: uuid.UUID) -> int:
|
timeline: uuid.UUID) -> int:
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ def test_compare_pg_stats_rw_with_pgbench_default(neon_with_baseline: PgCompare,
|
|||||||
|
|
||||||
with env.record_pg_stats(pg_stats_rw):
|
with env.record_pg_stats(pg_stats_rw):
|
||||||
env.pg_bin.run_capture(
|
env.pg_bin.run_capture(
|
||||||
['pgbench', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()])
|
['pgbench', f'-T{duration}', f'--random-seed={seed}', '-Mprepared', env.pg.connstr()])
|
||||||
env.flush()
|
env.flush()
|
||||||
|
|
||||||
|
|
||||||
@@ -46,8 +46,14 @@ def test_compare_pg_stats_wo_with_pgbench_simple_update(neon_with_baseline: PgCo
|
|||||||
env.flush()
|
env.flush()
|
||||||
|
|
||||||
with env.record_pg_stats(pg_stats_wo):
|
with env.record_pg_stats(pg_stats_wo):
|
||||||
env.pg_bin.run_capture(
|
env.pg_bin.run_capture([
|
||||||
['pgbench', '-N', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()])
|
'pgbench',
|
||||||
|
'-N',
|
||||||
|
f'-T{duration}',
|
||||||
|
f'--random-seed={seed}',
|
||||||
|
'-Mprepared',
|
||||||
|
env.pg.connstr()
|
||||||
|
])
|
||||||
env.flush()
|
env.flush()
|
||||||
|
|
||||||
|
|
||||||
@@ -65,8 +71,14 @@ def test_compare_pg_stats_ro_with_pgbench_select_only(neon_with_baseline: PgComp
|
|||||||
env.flush()
|
env.flush()
|
||||||
|
|
||||||
with env.record_pg_stats(pg_stats_ro):
|
with env.record_pg_stats(pg_stats_ro):
|
||||||
env.pg_bin.run_capture(
|
env.pg_bin.run_capture([
|
||||||
['pgbench', '-S', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()])
|
'pgbench',
|
||||||
|
'-S',
|
||||||
|
f'-T{duration}',
|
||||||
|
f'--random-seed={seed}',
|
||||||
|
'-Mprepared',
|
||||||
|
env.pg.connstr()
|
||||||
|
])
|
||||||
env.flush()
|
env.flush()
|
||||||
|
|
||||||
|
|
||||||
@@ -85,5 +97,5 @@ def test_compare_pg_stats_wal_with_pgbench_default(neon_with_baseline: PgCompare
|
|||||||
|
|
||||||
with env.record_pg_stats(pg_stats_wal):
|
with env.record_pg_stats(pg_stats_wal):
|
||||||
env.pg_bin.run_capture(
|
env.pg_bin.run_capture(
|
||||||
['pgbench', f'-T{duration}', f'--random-seed={seed}', env.pg.connstr()])
|
['pgbench', f'-T{duration}', f'--random-seed={seed}', '-Mprepared', env.pg.connstr()])
|
||||||
env.flush()
|
env.flush()
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ def test_hot_page(env: PgCompare):
|
|||||||
|
|
||||||
with closing(env.pg.connect()) as conn:
|
with closing(env.pg.connect()) as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute('drop table if exists t, f;')
|
|
||||||
|
|
||||||
# Write many updates to the same row
|
# Write many updates to the same row
|
||||||
with env.record_duration('write'):
|
with env.record_duration('write'):
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ def test_hot_table(env: PgCompare):
|
|||||||
|
|
||||||
with closing(env.pg.connect()) as conn:
|
with closing(env.pg.connect()) as conn:
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute('drop table if exists t;')
|
|
||||||
|
|
||||||
# Write many updates to a small table
|
# Write many updates to a small table
|
||||||
with env.record_duration('write'):
|
with env.record_duration('write'):
|
||||||
|
|||||||
@@ -78,11 +78,13 @@ def run_test_pgbench(env: PgCompare, scale: int, duration: int):
|
|||||||
|
|
||||||
# Run simple-update workload
|
# Run simple-update workload
|
||||||
run_pgbench(env,
|
run_pgbench(env,
|
||||||
"simple-update", ['pgbench', '-N', '-c4', f'-T{duration}', '-P2', env.pg.connstr()])
|
"simple-update",
|
||||||
|
['pgbench', '-N', '-c4', f'-T{duration}', '-P2', '-Mprepared', env.pg.connstr()])
|
||||||
|
|
||||||
# Run SELECT workload
|
# Run SELECT workload
|
||||||
run_pgbench(env,
|
run_pgbench(env,
|
||||||
"select-only", ['pgbench', '-S', '-c4', f'-T{duration}', '-P2', env.pg.connstr()])
|
"select-only",
|
||||||
|
['pgbench', '-S', '-c4', f'-T{duration}', '-P2', '-Mprepared', env.pg.connstr()])
|
||||||
|
|
||||||
env.report_size()
|
env.report_size()
|
||||||
|
|
||||||
|
|||||||
@@ -116,6 +116,7 @@ def start_pgbench_simple_update_workload(env: PgCompare, duration: int):
|
|||||||
'-c10',
|
'-c10',
|
||||||
'-N',
|
'-N',
|
||||||
f'-T{duration}',
|
f'-T{duration}',
|
||||||
|
'-Mprepared',
|
||||||
env.pg.connstr(options="-csynchronous_commit=off")
|
env.pg.connstr(options="-csynchronous_commit=off")
|
||||||
])
|
])
|
||||||
env.flush()
|
env.flush()
|
||||||
|
|||||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: 9c99008445...35ad142301
Reference in New Issue
Block a user