mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-24 13:50:37 +00:00
Compare commits
4 Commits
enable-imp
...
fixture-re
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0705c99fdb | ||
|
|
21089d5217 | ||
|
|
bd33ea9fae | ||
|
|
414279726d |
6
.github/actions/upload/action.yml
vendored
6
.github/actions/upload/action.yml
vendored
@@ -29,12 +29,8 @@ runs:
|
||||
time tar -C ${SOURCE} -cf ${ARCHIVE} --zstd .
|
||||
elif [ -f ${SOURCE} ]; then
|
||||
time tar -cf ${ARCHIVE} --zstd ${SOURCE}
|
||||
elif ! ls ${SOURCE} > /dev/null 2>&1; then
|
||||
echo 2>&1 "${SOURCE} does not exist"
|
||||
exit 2
|
||||
else
|
||||
echo 2>&1 "${SOURCE} is neither a directory nor a file, do not know how to handle it"
|
||||
exit 3
|
||||
echo 2>&1 "${SOURCE} neither directory nor file, don't know how to handle it"
|
||||
fi
|
||||
|
||||
- name: Upload artifact
|
||||
|
||||
30
.github/ansible/get_binaries.sh
vendored
30
.github/ansible/get_binaries.sh
vendored
@@ -2,14 +2,30 @@
|
||||
|
||||
set -e
|
||||
|
||||
if [ -n "${DOCKER_TAG}" ]; then
|
||||
# Verson is DOCKER_TAG but without prefix
|
||||
VERSION=$(echo $DOCKER_TAG | sed 's/^.*-//g')
|
||||
RELEASE=${RELEASE:-false}
|
||||
|
||||
# look at docker hub for latest tag for neon docker image
|
||||
if [ "${RELEASE}" = "true" ]; then
|
||||
echo "search latest release tag"
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep release | sed 's/release-//g' | grep -E '^[0-9]+$' | sort -n | tail -1)
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "no any docker tags found, exiting..."
|
||||
exit 1
|
||||
else
|
||||
TAG="release-${VERSION}"
|
||||
fi
|
||||
else
|
||||
echo "Please set DOCKER_TAG environment variable"
|
||||
exit 1
|
||||
echo "search latest dev tag"
|
||||
VERSION=$(curl -s https://registry.hub.docker.com/v1/repositories/neondatabase/neon/tags |jq -r -S '.[].name' | grep -E '^[0-9]+$' | sort -n | tail -1)
|
||||
if [ -z "${VERSION}" ]; then
|
||||
echo "no any docker tags found, exiting..."
|
||||
exit 1
|
||||
else
|
||||
TAG="${VERSION}"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "found ${VERSION}"
|
||||
|
||||
# do initial cleanup
|
||||
rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
|
||||
@@ -17,8 +33,8 @@ mkdir neon_install
|
||||
|
||||
# retrieve binaries from docker image
|
||||
echo "getting binaries from docker image"
|
||||
docker pull --quiet neondatabase/neon:${DOCKER_TAG}
|
||||
ID=$(docker create neondatabase/neon:${DOCKER_TAG})
|
||||
docker pull --quiet neondatabase/neon:${TAG}
|
||||
ID=$(docker create neondatabase/neon:${TAG})
|
||||
docker cp ${ID}:/data/postgres_install.tar.gz .
|
||||
tar -xzf postgres_install.tar.gz -C neon_install
|
||||
docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
|
||||
|
||||
119
.github/workflows/benchmarking.yml
vendored
119
.github/workflows/benchmarking.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Benchmarking
|
||||
name: benchmarking
|
||||
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
@@ -15,15 +15,6 @@ on:
|
||||
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow per any non-`main` branch.
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
bench:
|
||||
# this workflow runs on self hosteed runner
|
||||
@@ -69,6 +60,7 @@ jobs:
|
||||
- name: Setup cluster
|
||||
env:
|
||||
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
set -e
|
||||
|
||||
@@ -104,9 +96,7 @@ jobs:
|
||||
# since it might generate duplicates when calling ingest_perf_test_result.py
|
||||
rm -rf perf-report-staging
|
||||
mkdir -p perf-report-staging
|
||||
# Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
|
||||
# it's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
||||
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --sparse-ordering --skip-interfering-proc-check --out-dir perf-report-staging --timeout 3600
|
||||
./scripts/pytest test_runner/performance/ -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-staging --timeout 3600
|
||||
|
||||
- name: Submit result
|
||||
env:
|
||||
@@ -123,106 +113,3 @@ jobs:
|
||||
slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
pgbench-compare:
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10gb"
|
||||
REMOTE_ENV: "1"
|
||||
POSTGRES_DISTRIB_DIR: /usr
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
connstr: [ BENCHMARK_CAPTEST_CONNSTR, BENCHMARK_RDS_CONNSTR ]
|
||||
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2817580636
|
||||
|
||||
timeout-minutes: 360 # 6h
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Cache poetry deps
|
||||
id: cache_poetry
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.cache/pypoetry/virtualenvs
|
||||
key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
|
||||
|
||||
- name: Install Python deps
|
||||
run: ./scripts/pysync
|
||||
|
||||
- name: Calculate platform
|
||||
id: calculate-platform
|
||||
env:
|
||||
CONNSTR: ${{ matrix.connstr }}
|
||||
run: |
|
||||
if [ "${CONNSTR}" = "BENCHMARK_CAPTEST_CONNSTR" ]; then
|
||||
PLATFORM=neon-captest
|
||||
elif [ "${CONNSTR}" = "BENCHMARK_RDS_CONNSTR" ]; then
|
||||
PLATFORM=rds-aurora
|
||||
else
|
||||
echo 2>&1 "Unknown CONNSTR=${CONNSTR}. Allowed are BENCHMARK_CAPTEST_CONNSTR, and BENCHMARK_RDS_CONNSTR only"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::set-output name=PLATFORM::${PLATFORM}"
|
||||
|
||||
- name: Install Deps
|
||||
run: |
|
||||
echo "deb http://apt.postgresql.org/pub/repos/apt focal-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
|
||||
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
|
||||
sudo apt -y update
|
||||
sudo apt install -y postgresql-14 postgresql-client-14
|
||||
|
||||
- name: Benchmark init
|
||||
env:
|
||||
PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
|
||||
BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
|
||||
run: |
|
||||
mkdir -p perf-report-captest
|
||||
|
||||
psql $BENCHMARK_CONNSTR -c "SELECT 1;"
|
||||
./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_init -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
|
||||
|
||||
- name: Benchmark simple-update
|
||||
env:
|
||||
PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
|
||||
BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
|
||||
run: |
|
||||
psql $BENCHMARK_CONNSTR -c "SELECT 1;"
|
||||
./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_simple_update -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
|
||||
|
||||
- name: Benchmark select-only
|
||||
env:
|
||||
PLATFORM: ${{ steps.calculate-platform.outputs.PLATFORM }}
|
||||
BENCHMARK_CONNSTR: ${{ secrets[matrix.connstr] }}
|
||||
run: |
|
||||
psql $BENCHMARK_CONNSTR -c "SELECT 1;"
|
||||
./scripts/pytest test_runner/performance/test_perf_pgbench.py::test_pgbench_remote_select_only -v -m "remote_cluster" --skip-interfering-proc-check --out-dir perf-report-captest --timeout 21600
|
||||
|
||||
- name: Submit result
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
run: |
|
||||
REPORT_FROM=$(realpath perf-report-captest) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
|
||||
|
||||
- name: Upload logs
|
||||
if: always()
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: bench-captest-${{ steps.calculate-platform.outputs.PLATFORM }}
|
||||
path: /tmp/test_output/
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
357
.github/workflows/build_and_test.yml
vendored
357
.github/workflows/build_and_test.yml
vendored
@@ -7,6 +7,10 @@ on:
|
||||
- release
|
||||
pull_request:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow per any non-`main` branch.
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
|
||||
@@ -17,39 +21,9 @@ env:
|
||||
COPT: '-Werror'
|
||||
|
||||
jobs:
|
||||
tag:
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get build tag
|
||||
run: |
|
||||
echo run:$GITHUB_RUN_ID
|
||||
echo ref:$GITHUB_REF_NAME
|
||||
echo rev:$(git rev-list --count HEAD)
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
echo "::set-output name=tag::$GITHUB_RUN_ID"
|
||||
fi
|
||||
shell: bash
|
||||
id: build-tag
|
||||
|
||||
build-neon:
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -61,7 +35,7 @@ jobs:
|
||||
GIT_VERSION: ${{ github.sha }}
|
||||
|
||||
steps:
|
||||
- name: Fix git ownership
|
||||
- name: Fix git ownerwhip
|
||||
run: |
|
||||
# Workaround for `fatal: detected dubious ownership in repository at ...`
|
||||
#
|
||||
@@ -80,7 +54,6 @@ jobs:
|
||||
- name: Set pg revision for caching
|
||||
id: pg_ver
|
||||
run: echo ::set-output name=pg_rev::$(git rev-parse HEAD:vendor/postgres)
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
# Set some environment variables used by all the steps.
|
||||
#
|
||||
@@ -104,7 +77,6 @@ jobs:
|
||||
echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
|
||||
echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV
|
||||
echo "CARGO_FLAGS=${CARGO_FLAGS}" >> $GITHUB_ENV
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
# Don't include the ~/.cargo/registry/src directory. It contains just
|
||||
# uncompressed versions of the crates in ~/.cargo/registry/cache
|
||||
@@ -121,8 +93,8 @@ jobs:
|
||||
target/
|
||||
# Fall back to older versions of the key, if no cache for current Cargo.lock was found
|
||||
key: |
|
||||
v6-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
|
||||
v6-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
|
||||
v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
|
||||
v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-
|
||||
|
||||
- name: Cache postgres build
|
||||
id: cache_pg
|
||||
@@ -134,17 +106,14 @@ jobs:
|
||||
- name: Build postgres
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: mold -run make postgres -j$(nproc)
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Run cargo build
|
||||
run: |
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Run cargo test
|
||||
run: |
|
||||
${cov_prefix} cargo test $CARGO_FLAGS
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Install rust binaries
|
||||
run: |
|
||||
@@ -185,11 +154,9 @@ jobs:
|
||||
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
|
||||
done
|
||||
fi
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Install postgres binaries
|
||||
run: cp -a tmp_install /tmp/neon/pg_install
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Upload Neon artifact
|
||||
uses: ./.github/actions/upload
|
||||
@@ -204,9 +171,7 @@ jobs:
|
||||
|
||||
pg_regress-tests:
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
|
||||
needs: [ build-neon ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -234,9 +199,7 @@ jobs:
|
||||
|
||||
other-tests:
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
|
||||
needs: [ build-neon ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -267,9 +230,7 @@ jobs:
|
||||
|
||||
benchmarks:
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
|
||||
needs: [ build-neon ]
|
||||
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
|
||||
strategy:
|
||||
@@ -300,9 +261,7 @@ jobs:
|
||||
|
||||
coverage-report:
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:2746987948
|
||||
needs: [ other-tests, pg_regress-tests ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -325,7 +284,7 @@ jobs:
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git/
|
||||
target/
|
||||
key: v5-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
|
||||
key: v3-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
|
||||
|
||||
- name: Get Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
@@ -341,7 +300,6 @@ jobs:
|
||||
|
||||
- name: Merge coverage data
|
||||
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
- name: Build and upload coverage report
|
||||
run: |
|
||||
@@ -374,13 +332,9 @@ jobs:
|
||||
\"description\": \"Coverage report is ready\",
|
||||
\"target_url\": \"$REPORT_URL\"
|
||||
}"
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
trigger-e2e-tests:
|
||||
runs-on: dev
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ build-neon ]
|
||||
steps:
|
||||
- name: Set PR's status to pending and request a remote CI test
|
||||
@@ -415,160 +369,150 @@ jobs:
|
||||
}
|
||||
}"
|
||||
|
||||
dockerfile-check:
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
docker-image:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ pg_regress-tests, other-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
outputs:
|
||||
value: ${{ steps.dockerfile-check.outputs.any_changed }}
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Get specific changed files
|
||||
id: dockerfile-check
|
||||
uses: tj-actions/changed-files@802732316a11c01531ea72773ec7998155238e31 # v25
|
||||
with:
|
||||
files: |
|
||||
Dockerfile
|
||||
Dockerfile.compute-tools
|
||||
./vendor/postgres/Dockerfile
|
||||
|
||||
neon-image:
|
||||
# force building for all 3 images
|
||||
if: needs.dockerfile-check.outputs.value != 'true'
|
||||
runs-on: dev
|
||||
needs: [ dockerfile-check ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
environment: dev
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure ECR login
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Kaniko build console
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:$GITHUB_RUN_ID
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
with:
|
||||
driver: docker
|
||||
|
||||
compute-tools-image:
|
||||
if: needs.dockerfile-check.outputs.value != 'true'
|
||||
runs-on: dev
|
||||
needs: [ dockerfile-check ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
environment: dev
|
||||
- name: Get build tag
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
id: build-tag
|
||||
|
||||
- name: Get legacy build tag
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "::set-output name=tag::latest"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "::set-output name=tag::release"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
id: legacy-build-tag
|
||||
|
||||
- name: Build neon Docker image
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: .
|
||||
build-args: |
|
||||
GIT_VERSION="${{github.sha}}"
|
||||
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
|
||||
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
|
||||
pull: true
|
||||
push: true
|
||||
tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
|
||||
|
||||
docker-image-compute:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
needs: [ pg_regress-tests, other-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
|
||||
- name: Configure ECR login
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
|
||||
- name: Kaniko build console
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:$GITHUB_RUN_ID
|
||||
|
||||
compute-node-image:
|
||||
if: needs.dockerfile-check.outputs.value != 'true'
|
||||
runs-on: dev
|
||||
needs: [ dockerfile-check ]
|
||||
container: gcr.io/kaniko-project/executor:v1.9.0-debug
|
||||
environment: dev
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v1 # v3 won't work with kaniko
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure ECR login
|
||||
run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Kaniko build console
|
||||
working-directory: ./vendor/postgres/
|
||||
run: /kaniko/executor --snapshotMode=redo --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --snapshotMode=redo --context . --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:$GITHUB_RUN_ID
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
with:
|
||||
driver: docker
|
||||
|
||||
promote-images:
|
||||
runs-on: dev
|
||||
needs: [ neon-image, compute-tools-image, compute-node-image ]
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
container: amazon/aws-cli
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
name: [ neon, compute-tools, compute-node ]
|
||||
|
||||
steps:
|
||||
- name: Promote image to latest
|
||||
run:
|
||||
MANIFEST=$(aws ecr batch-get-image --repository-name ${{ matrix.name }} --image-ids imageTag=$GITHUB_RUN_ID --query 'images[].imageManifest' --output text) && aws ecr put-image --repository-name ${{ matrix.name }} --image-tag latest --image-manifest "$MANIFEST"
|
||||
|
||||
push-docker-hub:
|
||||
runs-on: dev
|
||||
needs: [ promote-images, tag ]
|
||||
container: golang:1.19-bullseye
|
||||
environment: dev
|
||||
|
||||
steps:
|
||||
- name: Install Crane & ECR helper
|
||||
- name: Get build tag
|
||||
run: |
|
||||
go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
|
||||
go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
|
||||
|
||||
# - name: Get build tag
|
||||
# run: |
|
||||
# if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
# echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
||||
# elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
# echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
||||
# else
|
||||
# echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release' "
|
||||
# echo "::set-output name=tag::$GITHUB_RUN_ID"
|
||||
# fi
|
||||
# id: build-tag
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "::set-output name=tag::$(git rev-list --count HEAD)"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "::set-output name=tag::release-$(git rev-list --count HEAD)"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
id: build-tag
|
||||
|
||||
- name: Configure ECR login
|
||||
- name: Get legacy build tag
|
||||
run: |
|
||||
mkdir /github/home/.docker/
|
||||
echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "::set-output name=tag::latest"
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "::set-output name=tag::release"
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
exit 1
|
||||
fi
|
||||
id: legacy-build-tag
|
||||
|
||||
- name: Pull neon image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:latest neon
|
||||
- name: Build compute-tools Docker image
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: .
|
||||
build-args: |
|
||||
GIT_VERSION="${{github.sha}}"
|
||||
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
|
||||
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
|
||||
push: false
|
||||
file: Dockerfile.compute-tools
|
||||
tags: neondatabase/compute-tools:local
|
||||
|
||||
- name: Pull compute tools image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest compute-tools
|
||||
- name: Push compute-tools Docker image
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: .
|
||||
build-args: |
|
||||
GIT_VERSION="${{github.sha}}"
|
||||
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
|
||||
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
|
||||
push: true
|
||||
file: Dockerfile.compute-tools
|
||||
tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}
|
||||
|
||||
- name: Pull compute node image from ECR
|
||||
run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest compute-node
|
||||
|
||||
- name: Configure docker login
|
||||
run: |
|
||||
# ECR Credential Helper & Docker Hub don't work together in config, hence reset
|
||||
echo "" > /github/home/.docker/config.json
|
||||
crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io
|
||||
|
||||
- name: Push neon image to Docker Hub
|
||||
run: crane push neon neondatabase/neon:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Push compute tools image to Docker Hub
|
||||
run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Push compute node image to Docker Hub
|
||||
run: crane push compute-node neondatabase/compute-node:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Add latest tag to images
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
run: |
|
||||
crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node:${{needs.tag.outputs.build-tag}} latest
|
||||
- name: Build compute-node Docker image
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: ./vendor/postgres/
|
||||
build-args:
|
||||
COMPUTE_TOOLS_TAG=local
|
||||
push: true
|
||||
tags: neondatabase/compute-node:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/compute-node:${{steps.build-tag.outputs.tag}}
|
||||
|
||||
calculate-deploy-targets:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
@@ -594,16 +538,14 @@ jobs:
|
||||
|
||||
deploy:
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
#container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
# We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
|
||||
# If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag ]
|
||||
# We need both storage **and** compute images for deploy, because control plane
|
||||
# picks the compute version based on the storage version. If it notices a fresh
|
||||
# storage it may bump the compute version. And if compute image failed to build
|
||||
# it may break things badly.
|
||||
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
|
||||
@@ -614,19 +556,12 @@ jobs:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Setup python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.10'
|
||||
|
||||
- name: Setup ansible
|
||||
run: |
|
||||
export PATH="/root/.local/bin:$PATH"
|
||||
pip install --progress-bar off --user ansible boto3
|
||||
|
||||
- name: Redeploy
|
||||
run: |
|
||||
export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
cd "$(pwd)/.github/ansible"
|
||||
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
@@ -649,16 +584,13 @@ jobs:
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-proxy:
|
||||
runs-on: dev
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag ]
|
||||
runs-on: [ self-hosted, Linux, k8s-runner ]
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it
|
||||
# to run all deploy jobs consistently.
|
||||
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
|
||||
@@ -671,9 +603,6 @@ jobs:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Add curl
|
||||
run: apt update && apt install curl -y
|
||||
|
||||
- name: Store kubeconfig file
|
||||
run: |
|
||||
echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
|
||||
@@ -686,6 +615,6 @@ jobs:
|
||||
|
||||
- name: Re-deploy proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
DOCKER_TAG=${{needs.docker-image.outputs.build-tag}}
|
||||
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace default --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
2
.github/workflows/codestyle.yml
vendored
2
.github/workflows/codestyle.yml
vendored
@@ -101,7 +101,7 @@ jobs:
|
||||
!~/.cargo/registry/src
|
||||
~/.cargo/git
|
||||
target
|
||||
key: v2-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
|
||||
key: v1-${{ runner.os }}-cargo-${{ hashFiles('./Cargo.lock') }}-rust-${{ matrix.rust_toolchain }}
|
||||
|
||||
- name: Run cargo clippy
|
||||
run: ./run_clippy.sh
|
||||
|
||||
19
.github/workflows/pg_clients.yml
vendored
19
.github/workflows/pg_clients.yml
vendored
@@ -19,12 +19,8 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
test-postgres-client-libs:
|
||||
# TODO: switch to gen2 runner, requires docker
|
||||
runs-on: [ ubuntu-latest ]
|
||||
|
||||
env:
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
@@ -51,7 +47,7 @@ jobs:
|
||||
env:
|
||||
REMOTE_ENV: 1
|
||||
BENCHMARK_CONNSTR: "${{ secrets.BENCHMARK_STAGING_CONNSTR }}"
|
||||
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
@@ -65,18 +61,9 @@ jobs:
|
||||
-m "remote_cluster" \
|
||||
-rA "test_runner/pg_clients"
|
||||
|
||||
# We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
|
||||
# It will be fixed after switching to gen2 runner
|
||||
- name: Upload python test logs
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
retention-days: 7
|
||||
name: python-test-pg_clients-${{ runner.os }}-stage-logs
|
||||
path: ${{ env.TEST_OUTPUT }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
if: failure()
|
||||
id: slack
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
|
||||
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -2269,7 +2269,6 @@ dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"base64",
|
||||
"bstr",
|
||||
"bytes",
|
||||
"clap 3.2.12",
|
||||
"futures",
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use std::io::Write;
|
||||
use std::net::TcpStream;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
use std::sync::Arc;
|
||||
@@ -240,23 +241,37 @@ impl SafekeeperNode {
|
||||
),
|
||||
}
|
||||
|
||||
// Wait until process is gone
|
||||
for i in 0..600 {
|
||||
let signal = None; // Send no signal, just get the error code
|
||||
match kill(pid, signal) {
|
||||
Ok(_) => (), // Process exists, keep waiting
|
||||
Err(Errno::ESRCH) => {
|
||||
// Process not found, we're done
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to pageserver with pid {}: {}",
|
||||
pid,
|
||||
err.desc()
|
||||
),
|
||||
};
|
||||
let address = connection_address(&self.pg_connection_config);
|
||||
|
||||
// TODO Remove this "timeout" and handle it on caller side instead.
|
||||
// Shutting down may take a long time,
|
||||
// if safekeeper flushes a lot of data
|
||||
let mut tcp_stopped = false;
|
||||
for i in 0..600 {
|
||||
if !tcp_stopped {
|
||||
if let Err(err) = TcpStream::connect(&address) {
|
||||
tcp_stopped = true;
|
||||
if err.kind() != io::ErrorKind::ConnectionRefused {
|
||||
eprintln!("\nSafekeeper connection failed with error: {err}");
|
||||
}
|
||||
}
|
||||
}
|
||||
if tcp_stopped {
|
||||
// Also check status on the HTTP port
|
||||
match self.check_status() {
|
||||
Err(SafekeeperHttpError::Transport(err)) if err.is_connect() => {
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("\nSafekeeper status check failed with error: {err}");
|
||||
return Ok(());
|
||||
}
|
||||
Ok(()) => {
|
||||
// keep waiting
|
||||
}
|
||||
}
|
||||
}
|
||||
if i % 10 == 0 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Write};
|
||||
use std::net::TcpStream;
|
||||
use std::num::NonZeroU64;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
@@ -311,23 +312,38 @@ impl PageServerNode {
|
||||
),
|
||||
}
|
||||
|
||||
// Wait until process is gone
|
||||
for i in 0..600 {
|
||||
let signal = None; // Send no signal, just get the error code
|
||||
match kill(pid, signal) {
|
||||
Ok(_) => (), // Process exists, keep waiting
|
||||
Err(Errno::ESRCH) => {
|
||||
// Process not found, we're done
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => bail!(
|
||||
"Failed to send signal to pageserver with pid {}: {}",
|
||||
pid,
|
||||
err.desc()
|
||||
),
|
||||
};
|
||||
let address = connection_address(&self.pg_connection_config);
|
||||
|
||||
// TODO Remove this "timeout" and handle it on caller side instead.
|
||||
// Shutting down may take a long time,
|
||||
// if pageserver checkpoints a lot of data
|
||||
let mut tcp_stopped = false;
|
||||
for i in 0..600 {
|
||||
if !tcp_stopped {
|
||||
if let Err(err) = TcpStream::connect(&address) {
|
||||
tcp_stopped = true;
|
||||
if err.kind() != io::ErrorKind::ConnectionRefused {
|
||||
eprintln!("\nPageserver connection failed with error: {err}");
|
||||
}
|
||||
}
|
||||
}
|
||||
if tcp_stopped {
|
||||
// Also check status on the HTTP port
|
||||
|
||||
match self.check_status() {
|
||||
Err(PageserverHttpError::Transport(err)) if err.is_connect() => {
|
||||
println!("done!");
|
||||
return Ok(());
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("\nPageserver status check failed with error: {err}");
|
||||
return Ok(());
|
||||
}
|
||||
Ok(()) => {
|
||||
// keep waiting
|
||||
}
|
||||
}
|
||||
}
|
||||
if i % 10 == 0 {
|
||||
print!(".");
|
||||
io::stdout().flush().unwrap();
|
||||
|
||||
@@ -44,7 +44,7 @@ impl ParseCallbacks for PostgresFfiCallbacks {
|
||||
|
||||
fn main() {
|
||||
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
||||
println!("cargo:rerun-if-changed=bindgen_deps.h");
|
||||
println!("cargo:rerun-if-changed=pg_control_ffi.h");
|
||||
|
||||
// Finding the location of C headers for the Postgres server:
|
||||
// - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/tmp_install`
|
||||
@@ -88,9 +88,9 @@ fn main() {
|
||||
// the resulting bindings.
|
||||
let bindings = bindgen::Builder::default()
|
||||
//
|
||||
// All the needed PostgreSQL headers are included from 'bindgen_deps.h'
|
||||
// All the needed PostgreSQL headers are included from 'pg_control_ffi.h'
|
||||
//
|
||||
.header("bindgen_deps.h")
|
||||
.header("pg_control_ffi.h")
|
||||
//
|
||||
// Tell cargo to invalidate the built crate whenever any of the
|
||||
// included header files changed.
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::pg_constants;
|
||||
use once_cell::sync::OnceCell;
|
||||
use regex::Regex;
|
||||
|
||||
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, thiserror::Error, PartialEq)]
|
||||
pub enum FilePathError {
|
||||
#[error("invalid relation fork name")]
|
||||
InvalidForkName,
|
||||
|
||||
@@ -16,22 +16,22 @@ use crate::XLogRecord;
|
||||
use crate::XLOG_PAGE_MAGIC;
|
||||
|
||||
use crate::pg_constants::WAL_SEGMENT_SIZE;
|
||||
use crate::waldecoder::WalStreamDecoder;
|
||||
|
||||
use anyhow::{anyhow, bail, ensure};
|
||||
use byteorder::{ByteOrder, LittleEndian};
|
||||
use bytes::BytesMut;
|
||||
use bytes::{Buf, Bytes};
|
||||
|
||||
use crc32c::*;
|
||||
use log::*;
|
||||
|
||||
use std::fs::File;
|
||||
use std::cmp::max;
|
||||
use std::cmp::min;
|
||||
use std::fs::{self, File};
|
||||
use std::io::prelude::*;
|
||||
use std::io::ErrorKind;
|
||||
use std::io::SeekFrom;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::SystemTime;
|
||||
use utils::bin_ser::DeserializeError;
|
||||
use utils::bin_ser::SerializeError;
|
||||
|
||||
use utils::const_assert;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
pub const XLOG_FNAME_LEN: usize = 24;
|
||||
@@ -80,12 +80,12 @@ pub fn XLogSegNoOffsetToRecPtr(
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize) -> String {
|
||||
format!(
|
||||
return format!(
|
||||
"{:>08X}{:>08X}{:>08X}",
|
||||
tli,
|
||||
logSegNo / XLogSegmentsPerXLogId(wal_segsz_bytes),
|
||||
logSegNo % XLogSegmentsPerXLogId(wal_segsz_bytes)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
@@ -140,93 +140,338 @@ pub fn to_pg_timestamp(time: SystemTime) -> TimestampTz {
|
||||
}
|
||||
}
|
||||
|
||||
// Returns (aligned) end_lsn of the last record in data_dir with WAL segments.
|
||||
// start_lsn must point to some previously known record boundary (beginning of
|
||||
// the next record). If no valid record after is found, start_lsn is returned
|
||||
// back.
|
||||
pub fn find_end_of_wal(
|
||||
/// Return offset of the last valid record in the segment segno, starting
|
||||
/// looking at start_offset. Returns start_offset if no records found.
|
||||
fn find_end_of_wal_segment(
|
||||
data_dir: &Path,
|
||||
segno: XLogSegNo,
|
||||
tli: TimeLineID,
|
||||
wal_seg_size: usize,
|
||||
start_lsn: Lsn, // start reading WAL at this point; must point at record start_lsn.
|
||||
) -> anyhow::Result<Lsn> {
|
||||
let mut result = start_lsn;
|
||||
let mut curr_lsn = start_lsn;
|
||||
start_offset: usize, // start reading at this point
|
||||
) -> anyhow::Result<u32> {
|
||||
// step back to the beginning of the page to read it in...
|
||||
let mut offs: usize = start_offset - start_offset % XLOG_BLCKSZ;
|
||||
let mut skipping_first_contrecord: bool = false;
|
||||
let mut contlen: usize = 0;
|
||||
let mut xl_crc: u32 = 0;
|
||||
let mut crc: u32 = 0;
|
||||
let mut rec_offs: usize = 0;
|
||||
let mut buf = [0u8; XLOG_BLCKSZ];
|
||||
let mut decoder = WalStreamDecoder::new(start_lsn);
|
||||
let file_name = XLogFileName(tli, segno, wal_seg_size);
|
||||
let mut last_valid_rec_pos: usize = start_offset; // assume at given start_offset begins new record
|
||||
let mut file = File::open(data_dir.join(file_name.clone() + ".partial"))?;
|
||||
file.seek(SeekFrom::Start(offs as u64))?;
|
||||
// xl_crc is the last field in XLogRecord, will not be read into rec_hdr
|
||||
const_assert!(XLOG_RECORD_CRC_OFFS + 4 == XLOG_SIZE_OF_XLOG_RECORD);
|
||||
let mut rec_hdr = [0u8; XLOG_RECORD_CRC_OFFS];
|
||||
|
||||
// loop over segments
|
||||
loop {
|
||||
let segno = curr_lsn.segment_number(wal_seg_size);
|
||||
let seg_file_name = XLogFileName(PG_TLI, segno, wal_seg_size);
|
||||
let seg_file_path = data_dir.join(seg_file_name);
|
||||
match open_wal_segment(&seg_file_path)? {
|
||||
None => {
|
||||
// no more segments
|
||||
info!(
|
||||
"find_end_of_wal reached end at {:?}, segment {:?} doesn't exist",
|
||||
result, seg_file_path
|
||||
trace!("find_end_of_wal_segment(data_dir={}, segno={}, tli={}, wal_seg_size={}, start_offset=0x{:x})", data_dir.display(), segno, tli, wal_seg_size, start_offset);
|
||||
while offs < wal_seg_size {
|
||||
// we are at the beginning of the page; read it in
|
||||
if offs % XLOG_BLCKSZ == 0 {
|
||||
trace!("offs=0x{:x}: new page", offs);
|
||||
let bytes_read = file.read(&mut buf)?;
|
||||
if bytes_read != buf.len() {
|
||||
bail!(
|
||||
"failed to read {} bytes from {} at {}",
|
||||
XLOG_BLCKSZ,
|
||||
file_name,
|
||||
offs
|
||||
);
|
||||
return Ok(result);
|
||||
}
|
||||
Some(mut segment) => {
|
||||
let seg_offs = curr_lsn.segment_offset(wal_seg_size);
|
||||
segment.seek(SeekFrom::Start(seg_offs as u64))?;
|
||||
// loop inside segment
|
||||
loop {
|
||||
let bytes_read = segment.read(&mut buf)?;
|
||||
if bytes_read == 0 {
|
||||
break; // EOF
|
||||
}
|
||||
curr_lsn += bytes_read as u64;
|
||||
decoder.feed_bytes(&buf[0..bytes_read]);
|
||||
|
||||
// advance result past all completely read records
|
||||
loop {
|
||||
match decoder.poll_decode() {
|
||||
Ok(Some(record)) => result = record.0,
|
||||
Err(e) => {
|
||||
info!(
|
||||
"find_end_of_wal reached end at {:?}, decode error: {:?}",
|
||||
result, e
|
||||
);
|
||||
return Ok(result);
|
||||
}
|
||||
Ok(None) => break, // need more data
|
||||
}
|
||||
let xlp_magic = LittleEndian::read_u16(&buf[0..2]);
|
||||
let xlp_info = LittleEndian::read_u16(&buf[2..4]);
|
||||
let xlp_rem_len = LittleEndian::read_u32(&buf[XLP_REM_LEN_OFFS..XLP_REM_LEN_OFFS + 4]);
|
||||
trace!(
|
||||
" xlp_magic=0x{:x}, xlp_info=0x{:x}, xlp_rem_len={}",
|
||||
xlp_magic,
|
||||
xlp_info,
|
||||
xlp_rem_len
|
||||
);
|
||||
// this is expected in current usage when valid WAL starts after page header
|
||||
if xlp_magic != XLOG_PAGE_MAGIC as u16 {
|
||||
trace!(
|
||||
" invalid WAL file {}.partial magic {} at {:?}",
|
||||
file_name,
|
||||
xlp_magic,
|
||||
Lsn(XLogSegNoOffsetToRecPtr(segno, offs as u32, wal_seg_size)),
|
||||
);
|
||||
}
|
||||
if offs == 0 {
|
||||
offs += XLOG_SIZE_OF_XLOG_LONG_PHD;
|
||||
if (xlp_info & XLP_FIRST_IS_CONTRECORD) != 0 {
|
||||
trace!(" first record is contrecord");
|
||||
skipping_first_contrecord = true;
|
||||
contlen = xlp_rem_len as usize;
|
||||
if offs < start_offset {
|
||||
// Pre-condition failed: the beginning of the segment is unexpectedly corrupted.
|
||||
ensure!(start_offset - offs >= contlen,
|
||||
"start_offset is in the middle of the first record (which happens to be a contrecord), \
|
||||
expected to be on a record boundary. Is beginning of the segment corrupted?");
|
||||
contlen = 0;
|
||||
// keep skipping_first_contrecord to avoid counting the contrecord as valid, we did not check it.
|
||||
}
|
||||
} else {
|
||||
trace!(" first record is not contrecord");
|
||||
}
|
||||
} else {
|
||||
offs += XLOG_SIZE_OF_XLOG_SHORT_PHD;
|
||||
}
|
||||
// ... and step forward again if asked
|
||||
trace!(" skipped header to 0x{:x}", offs);
|
||||
offs = max(offs, start_offset);
|
||||
// beginning of the next record
|
||||
} else if contlen == 0 {
|
||||
let page_offs = offs % XLOG_BLCKSZ;
|
||||
let xl_tot_len = LittleEndian::read_u32(&buf[page_offs..page_offs + 4]) as usize;
|
||||
trace!("offs=0x{:x}: new record, xl_tot_len={}", offs, xl_tot_len);
|
||||
if xl_tot_len == 0 {
|
||||
info!(
|
||||
"find_end_of_wal_segment reached zeros at {:?}, last records ends at {:?}",
|
||||
Lsn(XLogSegNoOffsetToRecPtr(segno, offs as u32, wal_seg_size)),
|
||||
Lsn(XLogSegNoOffsetToRecPtr(
|
||||
segno,
|
||||
last_valid_rec_pos as u32,
|
||||
wal_seg_size
|
||||
))
|
||||
);
|
||||
break; // zeros, reached the end
|
||||
}
|
||||
if skipping_first_contrecord {
|
||||
skipping_first_contrecord = false;
|
||||
trace!(" first contrecord has been just completed");
|
||||
} else {
|
||||
trace!(
|
||||
" updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
|
||||
last_valid_rec_pos,
|
||||
offs
|
||||
);
|
||||
last_valid_rec_pos = offs;
|
||||
}
|
||||
offs += 4;
|
||||
rec_offs = 4;
|
||||
contlen = xl_tot_len - 4;
|
||||
trace!(
|
||||
" reading rec_hdr[0..4] <-- [0x{:x}; 0x{:x})",
|
||||
page_offs,
|
||||
page_offs + 4
|
||||
);
|
||||
rec_hdr[0..4].copy_from_slice(&buf[page_offs..page_offs + 4]);
|
||||
} else {
|
||||
// we're continuing a record, possibly from previous page.
|
||||
let page_offs = offs % XLOG_BLCKSZ;
|
||||
let pageleft = XLOG_BLCKSZ - page_offs;
|
||||
|
||||
// read the rest of the record, or as much as fits on this page.
|
||||
let n = min(contlen, pageleft);
|
||||
trace!(
|
||||
"offs=0x{:x}, record continuation, pageleft={}, contlen={}",
|
||||
offs,
|
||||
pageleft,
|
||||
contlen
|
||||
);
|
||||
// fill rec_hdr header up to (but not including) xl_crc field
|
||||
trace!(
|
||||
" rec_offs={}, XLOG_RECORD_CRC_OFFS={}, XLOG_SIZE_OF_XLOG_RECORD={}",
|
||||
rec_offs,
|
||||
XLOG_RECORD_CRC_OFFS,
|
||||
XLOG_SIZE_OF_XLOG_RECORD
|
||||
);
|
||||
if rec_offs < XLOG_RECORD_CRC_OFFS {
|
||||
let len = min(XLOG_RECORD_CRC_OFFS - rec_offs, n);
|
||||
trace!(
|
||||
" reading rec_hdr[{}..{}] <-- [0x{:x}; 0x{:x})",
|
||||
rec_offs,
|
||||
rec_offs + len,
|
||||
page_offs,
|
||||
page_offs + len
|
||||
);
|
||||
rec_hdr[rec_offs..rec_offs + len].copy_from_slice(&buf[page_offs..page_offs + len]);
|
||||
}
|
||||
if rec_offs <= XLOG_RECORD_CRC_OFFS && rec_offs + n >= XLOG_SIZE_OF_XLOG_RECORD {
|
||||
let crc_offs = page_offs - rec_offs + XLOG_RECORD_CRC_OFFS;
|
||||
// All records are aligned on 8-byte boundary, so their 8-byte frames
|
||||
// cannot be split between pages. As xl_crc is the last field,
|
||||
// its content is always on the same page.
|
||||
const_assert!(XLOG_RECORD_CRC_OFFS % 8 == 4);
|
||||
// We should always start reading aligned records even in incorrect WALs so if
|
||||
// the condition is false it is likely a bug. However, it is localized somewhere
|
||||
// in this function, hence we do not crash and just report failure instead.
|
||||
ensure!(crc_offs % 8 == 4, "Record is not aligned properly (bug?)");
|
||||
xl_crc = LittleEndian::read_u32(&buf[crc_offs..crc_offs + 4]);
|
||||
trace!(
|
||||
" reading xl_crc: [0x{:x}; 0x{:x}) = 0x{:x}",
|
||||
crc_offs,
|
||||
crc_offs + 4,
|
||||
xl_crc
|
||||
);
|
||||
crc = crc32c_append(0, &buf[crc_offs + 4..page_offs + n]);
|
||||
trace!(
|
||||
" initializing crc: [0x{:x}; 0x{:x}); crc = 0x{:x}",
|
||||
crc_offs + 4,
|
||||
page_offs + n,
|
||||
crc
|
||||
);
|
||||
} else if rec_offs > XLOG_RECORD_CRC_OFFS {
|
||||
// As all records are 8-byte aligned, the header is already fully read and `crc` is initialized in the branch above.
|
||||
ensure!(rec_offs >= XLOG_SIZE_OF_XLOG_RECORD);
|
||||
let old_crc = crc;
|
||||
crc = crc32c_append(crc, &buf[page_offs..page_offs + n]);
|
||||
trace!(
|
||||
" appending to crc: [0x{:x}; 0x{:x}); 0x{:x} --> 0x{:x}",
|
||||
page_offs,
|
||||
page_offs + n,
|
||||
old_crc,
|
||||
crc
|
||||
);
|
||||
} else {
|
||||
// Correct because of the way conditions are written above.
|
||||
assert!(rec_offs + n < XLOG_SIZE_OF_XLOG_RECORD);
|
||||
// If `skipping_first_contrecord == true`, we may be reading from a middle of a record
|
||||
// which started in the previous segment. Hence there is no point in validating the header.
|
||||
if !skipping_first_contrecord && rec_offs + n > XLOG_RECORD_CRC_OFFS {
|
||||
info!(
|
||||
"Curiously corrupted WAL: a record stops inside the header; \
|
||||
offs=0x{:x}, record continuation, pageleft={}, contlen={}",
|
||||
offs, pageleft, contlen
|
||||
);
|
||||
break;
|
||||
}
|
||||
// Do nothing: we are still reading the header. It's accounted in CRC in the end of the record.
|
||||
}
|
||||
rec_offs += n;
|
||||
offs += n;
|
||||
contlen -= n;
|
||||
|
||||
if contlen == 0 {
|
||||
trace!(" record completed at 0x{:x}", offs);
|
||||
crc = crc32c_append(crc, &rec_hdr);
|
||||
offs = (offs + 7) & !7; // pad on 8 bytes boundary */
|
||||
trace!(
|
||||
" padded offs to 0x{:x}, crc is {:x}, expected crc is {:x}",
|
||||
offs,
|
||||
crc,
|
||||
xl_crc
|
||||
);
|
||||
if skipping_first_contrecord {
|
||||
// do nothing, the flag will go down on next iteration when we're reading new record
|
||||
trace!(" first conrecord has been just completed");
|
||||
} else if crc == xl_crc {
|
||||
// record is valid, advance the result to its end (with
|
||||
// alignment to the next record taken into account)
|
||||
trace!(
|
||||
" updating last_valid_rec_pos: 0x{:x} --> 0x{:x}",
|
||||
last_valid_rec_pos,
|
||||
offs
|
||||
);
|
||||
last_valid_rec_pos = offs;
|
||||
} else {
|
||||
info!(
|
||||
"CRC mismatch {} vs {} at {}",
|
||||
crc, xl_crc, last_valid_rec_pos
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
trace!("last_valid_rec_pos=0x{:x}", last_valid_rec_pos);
|
||||
Ok(last_valid_rec_pos as u32)
|
||||
}
|
||||
|
||||
// Open .partial or full WAL segment file, if present.
|
||||
fn open_wal_segment(seg_file_path: &Path) -> anyhow::Result<Option<File>> {
|
||||
let mut partial_path = seg_file_path.to_owned();
|
||||
partial_path.set_extension("partial");
|
||||
match File::open(partial_path) {
|
||||
Ok(file) => Ok(Some(file)),
|
||||
Err(e) => match e.kind() {
|
||||
ErrorKind::NotFound => {
|
||||
// .partial not found, try full
|
||||
match File::open(seg_file_path) {
|
||||
Ok(file) => Ok(Some(file)),
|
||||
Err(e) => match e.kind() {
|
||||
ErrorKind::NotFound => Ok(None),
|
||||
_ => Err(e.into()),
|
||||
},
|
||||
}
|
||||
}
|
||||
_ => Err(e.into()),
|
||||
},
|
||||
///
|
||||
/// Scan a directory that contains PostgreSQL WAL files, for the end of WAL.
|
||||
/// If precise, returns end LSN (next insertion point, basically);
|
||||
/// otherwise, start of the last segment.
|
||||
/// Returns (0, 0) if there is no WAL.
|
||||
///
|
||||
pub fn find_end_of_wal(
|
||||
data_dir: &Path,
|
||||
wal_seg_size: usize,
|
||||
precise: bool,
|
||||
start_lsn: Lsn, // start reading WAL at this point or later
|
||||
) -> anyhow::Result<(XLogRecPtr, TimeLineID)> {
|
||||
let mut high_segno: XLogSegNo = 0;
|
||||
let mut high_tli: TimeLineID = 0;
|
||||
let mut high_ispartial = false;
|
||||
|
||||
for entry in fs::read_dir(data_dir)?.flatten() {
|
||||
let ispartial: bool;
|
||||
let entry_name = entry.file_name();
|
||||
let fname = entry_name
|
||||
.to_str()
|
||||
.ok_or_else(|| anyhow!("Invalid file name"))?;
|
||||
|
||||
/*
|
||||
* Check if the filename looks like an xlog file, or a .partial file.
|
||||
*/
|
||||
if IsXLogFileName(fname) {
|
||||
ispartial = false;
|
||||
} else if IsPartialXLogFileName(fname) {
|
||||
ispartial = true;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
let (segno, tli) = XLogFromFileName(fname, wal_seg_size);
|
||||
if !ispartial && entry.metadata()?.len() != wal_seg_size as u64 {
|
||||
continue;
|
||||
}
|
||||
if segno > high_segno
|
||||
|| (segno == high_segno && tli > high_tli)
|
||||
|| (segno == high_segno && tli == high_tli && high_ispartial && !ispartial)
|
||||
{
|
||||
high_segno = segno;
|
||||
high_tli = tli;
|
||||
high_ispartial = ispartial;
|
||||
}
|
||||
}
|
||||
if high_segno > 0 {
|
||||
let mut high_offs = 0;
|
||||
/*
|
||||
* Move the starting pointer to the start of the next segment, if the
|
||||
* highest one we saw was completed.
|
||||
*/
|
||||
if !high_ispartial {
|
||||
high_segno += 1;
|
||||
} else if precise {
|
||||
/* otherwise locate last record in last partial segment */
|
||||
if start_lsn.segment_number(wal_seg_size) > high_segno {
|
||||
bail!(
|
||||
"provided start_lsn {:?} is beyond highest segno {:?} available",
|
||||
start_lsn,
|
||||
high_segno,
|
||||
);
|
||||
}
|
||||
let start_offset = if start_lsn.segment_number(wal_seg_size) == high_segno {
|
||||
start_lsn.segment_offset(wal_seg_size)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
high_offs = find_end_of_wal_segment(
|
||||
data_dir,
|
||||
high_segno,
|
||||
high_tli,
|
||||
wal_seg_size,
|
||||
start_offset,
|
||||
)?;
|
||||
}
|
||||
let high_ptr = XLogSegNoOffsetToRecPtr(high_segno, high_offs, wal_seg_size);
|
||||
return Ok((high_ptr, high_tli));
|
||||
}
|
||||
Ok((0, 0))
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
let mut data_dir = PathBuf::new();
|
||||
data_dir.push(".");
|
||||
let wal_end = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, Lsn(0)).unwrap();
|
||||
println!("wal_end={:?}", wal_end);
|
||||
let (wal_end, tli) = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, true, Lsn(0)).unwrap();
|
||||
println!(
|
||||
"wal_end={:>08X}{:>08X}, tli={}",
|
||||
(wal_end >> 32) as u32,
|
||||
wal_end as u32,
|
||||
tli
|
||||
);
|
||||
}
|
||||
|
||||
impl XLogRecord {
|
||||
@@ -350,10 +595,7 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result<Bytes, Seriali
|
||||
mod tests {
|
||||
use super::*;
|
||||
use regex::Regex;
|
||||
use std::cmp::min;
|
||||
use std::fs;
|
||||
use std::{env, str::FromStr};
|
||||
use utils::const_assert;
|
||||
|
||||
fn init_logging() {
|
||||
let _ = env_logger::Builder::from_env(
|
||||
@@ -364,7 +606,10 @@ mod tests {
|
||||
.try_init();
|
||||
}
|
||||
|
||||
fn test_end_of_wal<C: wal_craft::Crafter>(test_name: &str) {
|
||||
fn test_end_of_wal<C: wal_craft::Crafter>(
|
||||
test_name: &str,
|
||||
expected_end_of_wal_non_partial: Lsn,
|
||||
) {
|
||||
use wal_craft::*;
|
||||
// Craft some WAL
|
||||
let top_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
@@ -385,7 +630,7 @@ mod tests {
|
||||
.iter()
|
||||
.map(|&lsn| u64::from(lsn).into())
|
||||
.collect();
|
||||
let expected_end_of_wal: Lsn = u64::from(expected_end_of_wal_partial).into();
|
||||
let expected_end_of_wal_partial: Lsn = u64::from(expected_end_of_wal_partial).into();
|
||||
srv.kill();
|
||||
|
||||
// Check find_end_of_wal on the initial WAL
|
||||
@@ -397,10 +642,10 @@ mod tests {
|
||||
.filter(|fname| IsXLogFileName(fname))
|
||||
.max()
|
||||
.unwrap();
|
||||
check_pg_waldump_end_of_wal(&cfg, &last_segment, expected_end_of_wal);
|
||||
for start_lsn in intermediate_lsns
|
||||
.iter()
|
||||
.chain(std::iter::once(&expected_end_of_wal))
|
||||
check_pg_waldump_end_of_wal(&cfg, &last_segment, expected_end_of_wal_partial);
|
||||
for start_lsn in std::iter::once(Lsn(0))
|
||||
.chain(intermediate_lsns)
|
||||
.chain(std::iter::once(expected_end_of_wal_partial))
|
||||
{
|
||||
// Erase all WAL before `start_lsn` to ensure it's not used by `find_end_of_wal`.
|
||||
// We assume that `start_lsn` is non-decreasing.
|
||||
@@ -415,7 +660,7 @@ mod tests {
|
||||
}
|
||||
let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE);
|
||||
let seg_start_lsn = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);
|
||||
if seg_start_lsn > u64::from(*start_lsn) {
|
||||
if seg_start_lsn > u64::from(start_lsn) {
|
||||
continue;
|
||||
}
|
||||
let mut f = File::options().write(true).open(file.path()).unwrap();
|
||||
@@ -423,12 +668,18 @@ mod tests {
|
||||
f.write_all(
|
||||
&ZEROS[0..min(
|
||||
WAL_SEGMENT_SIZE,
|
||||
(u64::from(*start_lsn) - seg_start_lsn) as usize,
|
||||
(u64::from(start_lsn) - seg_start_lsn) as usize,
|
||||
)],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
check_end_of_wal(&cfg, &last_segment, *start_lsn, expected_end_of_wal);
|
||||
check_end_of_wal(
|
||||
&cfg,
|
||||
&last_segment,
|
||||
start_lsn,
|
||||
expected_end_of_wal_non_partial,
|
||||
expected_end_of_wal_partial,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -465,15 +716,18 @@ mod tests {
|
||||
cfg: &wal_craft::Conf,
|
||||
last_segment: &str,
|
||||
start_lsn: Lsn,
|
||||
expected_end_of_wal: Lsn,
|
||||
expected_end_of_wal_non_partial: Lsn,
|
||||
expected_end_of_wal_partial: Lsn,
|
||||
) {
|
||||
// Check end_of_wal on non-partial WAL segment (we treat it as fully populated)
|
||||
// let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();
|
||||
// info!(
|
||||
// "find_end_of_wal returned wal_end={} with non-partial WAL segment",
|
||||
// wal_end
|
||||
// );
|
||||
// assert_eq!(wal_end, expected_end_of_wal_non_partial);
|
||||
let (wal_end, tli) =
|
||||
find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, true, start_lsn).unwrap();
|
||||
let wal_end = Lsn(wal_end);
|
||||
info!(
|
||||
"find_end_of_wal returned (wal_end={}, tli={}) with non-partial WAL segment",
|
||||
wal_end, tli
|
||||
);
|
||||
assert_eq!(wal_end, expected_end_of_wal_non_partial);
|
||||
|
||||
// Rename file to partial to actually find last valid lsn, then rename it back.
|
||||
fs::rename(
|
||||
@@ -481,12 +735,14 @@ mod tests {
|
||||
cfg.wal_dir().join(format!("{}.partial", last_segment)),
|
||||
)
|
||||
.unwrap();
|
||||
let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();
|
||||
let (wal_end, tli) =
|
||||
find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, true, start_lsn).unwrap();
|
||||
let wal_end = Lsn(wal_end);
|
||||
info!(
|
||||
"find_end_of_wal returned wal_end={} with partial WAL segment",
|
||||
wal_end
|
||||
"find_end_of_wal returned (wal_end={}, tli={}) with partial WAL segment",
|
||||
wal_end, tli
|
||||
);
|
||||
assert_eq!(wal_end, expected_end_of_wal);
|
||||
assert_eq!(wal_end, expected_end_of_wal_partial);
|
||||
fs::rename(
|
||||
cfg.wal_dir().join(format!("{}.partial", last_segment)),
|
||||
cfg.wal_dir().join(last_segment),
|
||||
@@ -499,7 +755,10 @@ mod tests {
|
||||
#[test]
|
||||
pub fn test_find_end_of_wal_simple() {
|
||||
init_logging();
|
||||
test_end_of_wal::<wal_craft::Simple>("test_find_end_of_wal_simple");
|
||||
test_end_of_wal::<wal_craft::Simple>(
|
||||
"test_find_end_of_wal_simple",
|
||||
"0/2000000".parse::<Lsn>().unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -507,14 +766,17 @@ mod tests {
|
||||
init_logging();
|
||||
test_end_of_wal::<wal_craft::WalRecordCrossingSegmentFollowedBySmallOne>(
|
||||
"test_find_end_of_wal_crossing_segment_followed_by_small_one",
|
||||
"0/3000000".parse::<Lsn>().unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "not yet fixed, needs correct parsing of pre-last segments"] // TODO
|
||||
pub fn test_find_end_of_wal_last_crossing_segment() {
|
||||
init_logging();
|
||||
test_end_of_wal::<wal_craft::LastWalRecordCrossingSegment>(
|
||||
"test_find_end_of_wal_last_crossing_segment",
|
||||
"0/3000000".parse::<Lsn>().unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -265,7 +265,7 @@ mod tests {
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::io::Cursor;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ShortStruct {
|
||||
a: u8,
|
||||
b: u32,
|
||||
@@ -286,7 +286,7 @@ mod tests {
|
||||
const SHORT2_ENC_LE: &[u8] = &[8, 0, 0, 3, 7];
|
||||
const SHORT2_ENC_LE_TRAILING: &[u8] = &[8, 0, 0, 3, 7, 0xff, 0xff, 0xff];
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct LongMsg {
|
||||
pub tag: u8,
|
||||
pub blockpos: u32,
|
||||
|
||||
@@ -10,10 +10,12 @@ pub fn get_request_param<'a>(
|
||||
) -> Result<&'a str, ApiError> {
|
||||
match request.param(param_name) {
|
||||
Some(arg) => Ok(arg),
|
||||
None => Err(ApiError::BadRequest(format!(
|
||||
"no {} specified in path param",
|
||||
param_name
|
||||
))),
|
||||
None => {
|
||||
return Err(ApiError::BadRequest(format!(
|
||||
"no {} specified in path param",
|
||||
param_name
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ pub const XLOG_BLCKSZ: u32 = 8192;
|
||||
pub struct Lsn(pub u64);
|
||||
|
||||
/// We tried to parse an LSN from a string, but failed
|
||||
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
|
||||
#[derive(Debug, PartialEq, thiserror::Error)]
|
||||
#[error("LsnParseError")]
|
||||
pub struct LsnParseError;
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ pub trait Handler {
|
||||
|
||||
/// PostgresBackend protocol state.
|
||||
/// XXX: The order of the constructors matters.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
|
||||
#[derive(Clone, Copy, PartialEq, PartialOrd)]
|
||||
pub enum ProtoState {
|
||||
Initialization,
|
||||
Encrypted,
|
||||
|
||||
@@ -930,7 +930,7 @@ impl<'a> BeMessage<'a> {
|
||||
|
||||
// Neon extension of postgres replication protocol
|
||||
// See NEON_STATUS_UPDATE_TAG_BYTE
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ReplicationFeedback {
|
||||
// Last known size of the timeline. Used to enforce timeline size limit.
|
||||
pub current_timeline_size: u64,
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::sync::Mutex;
|
||||
use std::time::Duration;
|
||||
|
||||
/// An error happened while waiting for a number
|
||||
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
|
||||
#[derive(Debug, PartialEq, thiserror::Error)]
|
||||
#[error("SeqWaitError")]
|
||||
pub enum SeqWaitError {
|
||||
/// The wait timeout was reached
|
||||
|
||||
@@ -4,7 +4,7 @@ use serde::Deserialize;
|
||||
use std::io::Read;
|
||||
use utils::bin_ser::LeSer;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Deserialize)]
|
||||
#[derive(Debug, PartialEq, Deserialize)]
|
||||
pub struct HeaderData {
|
||||
magic: u16,
|
||||
info: u16,
|
||||
|
||||
@@ -30,9 +30,6 @@ static CERT: Lazy<rustls::Certificate> = Lazy::new(|| {
|
||||
});
|
||||
|
||||
#[test]
|
||||
// [false-positive](https://github.com/rust-lang/rust-clippy/issues/9274),
|
||||
// we resize the vector so doing some modifications after all
|
||||
#[allow(clippy::read_zero_byte_vec)]
|
||||
fn ssl() {
|
||||
let (mut client_sock, server_sock) = make_tcp_pair();
|
||||
|
||||
|
||||
@@ -167,7 +167,7 @@ fn local_timeline_info_from_repo_timeline(
|
||||
) -> anyhow::Result<LocalTimelineInfo> {
|
||||
match repo_timeline {
|
||||
RepositoryTimeline::Loaded(timeline) => local_timeline_info_from_loaded_timeline(
|
||||
timeline,
|
||||
&*timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
),
|
||||
|
||||
@@ -209,7 +209,7 @@ where
|
||||
reader: R,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub enum VisitDirection {
|
||||
Forwards,
|
||||
Backwards,
|
||||
|
||||
@@ -4,7 +4,6 @@ use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bytes::Bytes;
|
||||
use fail::fail_point;
|
||||
use itertools::Itertools;
|
||||
use metrics::core::{AtomicU64, GenericCounter};
|
||||
use once_cell::sync::Lazy;
|
||||
use tracing::*;
|
||||
|
||||
@@ -224,70 +223,6 @@ impl From<LayeredTimelineEntry> for RepositoryTimeline<LayeredTimeline> {
|
||||
}
|
||||
}
|
||||
|
||||
struct TimelineMetrics {
|
||||
pub reconstruct_time_histo: Histogram,
|
||||
pub materialized_page_cache_hit_counter: GenericCounter<AtomicU64>,
|
||||
pub flush_time_histo: Histogram,
|
||||
pub compact_time_histo: Histogram,
|
||||
pub create_images_time_histo: Histogram,
|
||||
pub init_logical_size_histo: Histogram,
|
||||
pub load_layer_map_histo: Histogram,
|
||||
pub last_record_gauge: IntGauge,
|
||||
pub wait_lsn_time_histo: Histogram,
|
||||
pub current_physical_size_gauge: UIntGauge,
|
||||
}
|
||||
|
||||
impl TimelineMetrics {
|
||||
fn new(tenant_id: &ZTenantId, timeline_id: &ZTimelineId) -> Self {
|
||||
let tenant_id = tenant_id.to_string();
|
||||
let timeline_id = timeline_id.to_string();
|
||||
|
||||
let reconstruct_time_histo = RECONSTRUCT_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let flush_time_histo = STORAGE_TIME
|
||||
.get_metric_with_label_values(&["layer flush", &tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let compact_time_histo = STORAGE_TIME
|
||||
.get_metric_with_label_values(&["compact", &tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let create_images_time_histo = STORAGE_TIME
|
||||
.get_metric_with_label_values(&["create images", &tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let init_logical_size_histo = STORAGE_TIME
|
||||
.get_metric_with_label_values(&["init logical size", &tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let load_layer_map_histo = STORAGE_TIME
|
||||
.get_metric_with_label_values(&["load layer map", &tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let last_record_gauge = LAST_RECORD_LSN
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let wait_lsn_time_histo = WAIT_LSN_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
let current_physical_size_gauge = CURRENT_PHYSICAL_SIZE
|
||||
.get_metric_with_label_values(&[&tenant_id, &timeline_id])
|
||||
.unwrap();
|
||||
|
||||
TimelineMetrics {
|
||||
reconstruct_time_histo,
|
||||
materialized_page_cache_hit_counter,
|
||||
flush_time_histo,
|
||||
compact_time_histo,
|
||||
create_images_time_histo,
|
||||
init_logical_size_histo,
|
||||
load_layer_map_histo,
|
||||
last_record_gauge,
|
||||
wait_lsn_time_histo,
|
||||
current_physical_size_gauge,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LayeredTimeline {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: Arc<RwLock<TenantConfOpt>>,
|
||||
@@ -334,7 +269,14 @@ pub struct LayeredTimeline {
|
||||
ancestor_lsn: Lsn,
|
||||
|
||||
// Metrics
|
||||
metrics: TimelineMetrics,
|
||||
reconstruct_time_histo: Histogram,
|
||||
materialized_page_cache_hit_counter: IntCounter,
|
||||
flush_time_histo: Histogram,
|
||||
compact_time_histo: Histogram,
|
||||
create_images_time_histo: Histogram,
|
||||
last_record_gauge: IntGauge,
|
||||
wait_lsn_time_histo: Histogram,
|
||||
current_physical_size_gauge: UIntGauge,
|
||||
|
||||
/// If `true`, will backup its files that appear after each checkpointing to the remote storage.
|
||||
upload_layers: AtomicBool,
|
||||
@@ -484,7 +426,7 @@ impl Timeline for LayeredTimeline {
|
||||
"wait_lsn called by WAL receiver thread"
|
||||
);
|
||||
|
||||
self.metrics.wait_lsn_time_histo.observe_closure_duration(
|
||||
self.wait_lsn_time_histo.observe_closure_duration(
|
||||
|| self.last_record_lsn
|
||||
.wait_for_timeout(lsn, self.conf.wait_lsn_timeout)
|
||||
.with_context(|| {
|
||||
@@ -526,8 +468,7 @@ impl Timeline for LayeredTimeline {
|
||||
|
||||
self.get_reconstruct_data(key, lsn, &mut reconstruct_state)?;
|
||||
|
||||
self.metrics
|
||||
.reconstruct_time_histo
|
||||
self.reconstruct_time_histo
|
||||
.observe_closure_duration(|| self.reconstruct_value(key, lsn, reconstruct_state))
|
||||
}
|
||||
|
||||
@@ -589,7 +530,7 @@ impl Timeline for LayeredTimeline {
|
||||
}
|
||||
|
||||
fn get_physical_size(&self) -> u64 {
|
||||
self.metrics.current_physical_size_gauge.get()
|
||||
self.current_physical_size_gauge.get()
|
||||
}
|
||||
|
||||
fn get_physical_size_non_incremental(&self) -> anyhow::Result<u64> {
|
||||
@@ -663,6 +604,43 @@ impl LayeredTimeline {
|
||||
walredo_mgr: Arc<dyn WalRedoManager + Send + Sync>,
|
||||
upload_layers: bool,
|
||||
) -> LayeredTimeline {
|
||||
let reconstruct_time_histo = RECONSTRUCT_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
|
||||
.unwrap();
|
||||
let materialized_page_cache_hit_counter = MATERIALIZED_PAGE_CACHE_HIT
|
||||
.get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
|
||||
.unwrap();
|
||||
let flush_time_histo = STORAGE_TIME
|
||||
.get_metric_with_label_values(&[
|
||||
"layer flush",
|
||||
&tenant_id.to_string(),
|
||||
&timeline_id.to_string(),
|
||||
])
|
||||
.unwrap();
|
||||
let compact_time_histo = STORAGE_TIME
|
||||
.get_metric_with_label_values(&[
|
||||
"compact",
|
||||
&tenant_id.to_string(),
|
||||
&timeline_id.to_string(),
|
||||
])
|
||||
.unwrap();
|
||||
let create_images_time_histo = STORAGE_TIME
|
||||
.get_metric_with_label_values(&[
|
||||
"create images",
|
||||
&tenant_id.to_string(),
|
||||
&timeline_id.to_string(),
|
||||
])
|
||||
.unwrap();
|
||||
let last_record_gauge = LAST_RECORD_LSN
|
||||
.get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
|
||||
.unwrap();
|
||||
let wait_lsn_time_histo = WAIT_LSN_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
|
||||
.unwrap();
|
||||
let current_physical_size_gauge = CURRENT_PHYSICAL_SIZE
|
||||
.get_metric_with_label_values(&[&tenant_id.to_string(), &timeline_id.to_string()])
|
||||
.unwrap();
|
||||
|
||||
let mut result = LayeredTimeline {
|
||||
conf,
|
||||
tenant_conf,
|
||||
@@ -685,7 +663,14 @@ impl LayeredTimeline {
|
||||
ancestor_timeline: ancestor,
|
||||
ancestor_lsn: metadata.ancestor_lsn(),
|
||||
|
||||
metrics: TimelineMetrics::new(&tenant_id, &timeline_id),
|
||||
reconstruct_time_histo,
|
||||
materialized_page_cache_hit_counter,
|
||||
flush_time_histo,
|
||||
compact_time_histo,
|
||||
create_images_time_histo,
|
||||
last_record_gauge,
|
||||
wait_lsn_time_histo,
|
||||
current_physical_size_gauge,
|
||||
|
||||
upload_layers: AtomicBool::new(upload_layers),
|
||||
|
||||
@@ -721,8 +706,6 @@ impl LayeredTimeline {
|
||||
let mut layers = self.layers.write().unwrap();
|
||||
let mut num_layers = 0;
|
||||
|
||||
let timer = self.metrics.load_layer_map_histo.start_timer();
|
||||
|
||||
// Scan timeline directory and create ImageFileName and DeltaFilename
|
||||
// structs representing all files on disk
|
||||
let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
|
||||
@@ -794,11 +777,7 @@ impl LayeredTimeline {
|
||||
"loaded layer map with {} layers at {}, total physical size: {}",
|
||||
num_layers, disk_consistent_lsn, total_physical_size
|
||||
);
|
||||
self.metrics
|
||||
.current_physical_size_gauge
|
||||
.set(total_physical_size);
|
||||
|
||||
timer.stop_and_record();
|
||||
self.current_physical_size_gauge.set(total_physical_size);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -829,16 +808,12 @@ impl LayeredTimeline {
|
||||
}
|
||||
}
|
||||
|
||||
let timer = self.metrics.init_logical_size_histo.start_timer();
|
||||
|
||||
// Have to calculate it the hard way
|
||||
let last_lsn = self.get_last_record_lsn();
|
||||
let logical_size = self.get_current_logical_size_non_incremental(last_lsn)?;
|
||||
self.current_logical_size
|
||||
.store(logical_size as isize, AtomicOrdering::SeqCst);
|
||||
debug!("calculated logical size the hard way: {}", logical_size);
|
||||
|
||||
timer.stop_and_record();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -903,7 +878,7 @@ impl LayeredTimeline {
|
||||
ValueReconstructResult::Continue => {
|
||||
// If we reached an earlier cached page image, we're done.
|
||||
if cont_lsn == cached_lsn + 1 {
|
||||
self.metrics.materialized_page_cache_hit_counter.inc_by(1);
|
||||
self.materialized_page_cache_hit_counter.inc_by(1);
|
||||
return Ok(());
|
||||
}
|
||||
if prev_lsn <= cont_lsn {
|
||||
@@ -1099,7 +1074,7 @@ impl LayeredTimeline {
|
||||
fn finish_write(&self, new_lsn: Lsn) {
|
||||
assert!(new_lsn.is_aligned());
|
||||
|
||||
self.metrics.last_record_gauge.set(new_lsn.0 as i64);
|
||||
self.last_record_gauge.set(new_lsn.0 as i64);
|
||||
self.last_record_lsn.advance(new_lsn);
|
||||
}
|
||||
|
||||
@@ -1203,7 +1178,7 @@ impl LayeredTimeline {
|
||||
}
|
||||
};
|
||||
|
||||
let timer = self.metrics.flush_time_histo.start_timer();
|
||||
let timer = self.flush_time_histo.start_timer();
|
||||
|
||||
loop {
|
||||
let layers = self.layers.read().unwrap();
|
||||
@@ -1374,7 +1349,7 @@ impl LayeredTimeline {
|
||||
|
||||
// update the timeline's physical size
|
||||
let sz = new_delta_path.metadata()?.len();
|
||||
self.metrics.current_physical_size_gauge.add(sz);
|
||||
self.current_physical_size_gauge.add(sz);
|
||||
// update metrics
|
||||
NUM_PERSISTENT_FILES_CREATED.inc_by(1);
|
||||
PERSISTENT_BYTES_WRITTEN.inc_by(sz);
|
||||
@@ -1443,7 +1418,7 @@ impl LayeredTimeline {
|
||||
}
|
||||
|
||||
// 3. Compact
|
||||
let timer = self.metrics.compact_time_histo.start_timer();
|
||||
let timer = self.compact_time_histo.start_timer();
|
||||
self.compact_level0(target_file_size)?;
|
||||
timer.stop_and_record();
|
||||
}
|
||||
@@ -1519,7 +1494,7 @@ impl LayeredTimeline {
|
||||
lsn: Lsn,
|
||||
force: bool,
|
||||
) -> Result<HashSet<PathBuf>> {
|
||||
let timer = self.metrics.create_images_time_histo.start_timer();
|
||||
let timer = self.create_images_time_histo.start_timer();
|
||||
let mut image_layers: Vec<ImageLayer> = Vec::new();
|
||||
let mut layer_paths_to_upload = HashSet::new();
|
||||
for partition in partitioning.parts.iter() {
|
||||
@@ -1563,8 +1538,7 @@ impl LayeredTimeline {
|
||||
|
||||
let mut layers = self.layers.write().unwrap();
|
||||
for l in image_layers {
|
||||
self.metrics
|
||||
.current_physical_size_gauge
|
||||
self.current_physical_size_gauge
|
||||
.add(l.path().metadata()?.len());
|
||||
layers.insert_historic(Arc::new(l));
|
||||
}
|
||||
@@ -1814,8 +1788,7 @@ impl LayeredTimeline {
|
||||
let new_delta_path = l.path();
|
||||
|
||||
// update the timeline's physical size
|
||||
self.metrics
|
||||
.current_physical_size_gauge
|
||||
self.current_physical_size_gauge
|
||||
.add(new_delta_path.metadata()?.len());
|
||||
|
||||
new_layer_paths.insert(new_delta_path);
|
||||
@@ -1828,9 +1801,7 @@ impl LayeredTimeline {
|
||||
drop(all_keys_iter);
|
||||
for l in deltas_to_compact {
|
||||
if let Some(path) = l.local_path() {
|
||||
self.metrics
|
||||
.current_physical_size_gauge
|
||||
.sub(path.metadata()?.len());
|
||||
self.current_physical_size_gauge.sub(path.metadata()?.len());
|
||||
layer_paths_do_delete.insert(path);
|
||||
}
|
||||
l.delete()?;
|
||||
@@ -2087,9 +2058,7 @@ impl LayeredTimeline {
|
||||
let mut layer_paths_to_delete = HashSet::with_capacity(layers_to_remove.len());
|
||||
for doomed_layer in layers_to_remove {
|
||||
if let Some(path) = doomed_layer.local_path() {
|
||||
self.metrics
|
||||
.current_physical_size_gauge
|
||||
.sub(path.metadata()?.len());
|
||||
self.current_physical_size_gauge.sub(path.metadata()?.len());
|
||||
layer_paths_to_delete.insert(path);
|
||||
}
|
||||
doomed_layer.delete()?;
|
||||
|
||||
@@ -979,7 +979,7 @@ enum DownloadStatus {
|
||||
#[derive(Debug)]
|
||||
enum UploadStatus {
|
||||
Uploaded,
|
||||
Failed(anyhow::Error),
|
||||
Failed,
|
||||
Nothing,
|
||||
}
|
||||
|
||||
@@ -1056,43 +1056,41 @@ where
|
||||
let (upload_status, download_status) = tokio::join!(
|
||||
async {
|
||||
if let Some(upload_data) = upload_data {
|
||||
let upload_retries = upload_data.retries;
|
||||
match validate_task_retries(upload_retries, max_sync_errors)
|
||||
match validate_task_retries(upload_data, max_sync_errors)
|
||||
.instrument(info_span!("retries_validation"))
|
||||
.await
|
||||
{
|
||||
ControlFlow::Continue(()) => {
|
||||
ControlFlow::Continue(new_upload_data) => {
|
||||
upload_timeline_data(
|
||||
conf,
|
||||
(storage.as_ref(), &index, sync_queue),
|
||||
current_remote_timeline.as_ref(),
|
||||
sync_id,
|
||||
upload_data,
|
||||
new_upload_data,
|
||||
sync_start,
|
||||
"upload",
|
||||
)
|
||||
.await
|
||||
.await;
|
||||
UploadStatus::Uploaded
|
||||
}
|
||||
ControlFlow::Break(()) => match update_remote_data(
|
||||
conf,
|
||||
storage.as_ref(),
|
||||
&index,
|
||||
sync_id,
|
||||
RemoteDataUpdate::Upload {
|
||||
uploaded_data: upload_data.data,
|
||||
upload_failed: true,
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => UploadStatus::Failed(anyhow::anyhow!(
|
||||
"Aborted after retries validation, current retries: {upload_retries}, max retries allowed: {max_sync_errors}"
|
||||
)),
|
||||
Err(e) => {
|
||||
ControlFlow::Break(failed_upload_data) => {
|
||||
if let Err(e) = update_remote_data(
|
||||
conf,
|
||||
storage.as_ref(),
|
||||
&index,
|
||||
sync_id,
|
||||
RemoteDataUpdate::Upload {
|
||||
uploaded_data: failed_upload_data.data,
|
||||
upload_failed: true,
|
||||
},
|
||||
)
|
||||
.await
|
||||
{
|
||||
error!("Failed to update remote timeline {sync_id}: {e:?}");
|
||||
UploadStatus::Failed(e)
|
||||
}
|
||||
},
|
||||
|
||||
UploadStatus::Failed
|
||||
}
|
||||
}
|
||||
} else {
|
||||
UploadStatus::Nothing
|
||||
@@ -1101,23 +1099,23 @@ where
|
||||
.instrument(info_span!("upload_timeline_data")),
|
||||
async {
|
||||
if let Some(download_data) = download_data {
|
||||
match validate_task_retries(download_data.retries, max_sync_errors)
|
||||
match validate_task_retries(download_data, max_sync_errors)
|
||||
.instrument(info_span!("retries_validation"))
|
||||
.await
|
||||
{
|
||||
ControlFlow::Continue(()) => {
|
||||
ControlFlow::Continue(new_download_data) => {
|
||||
return download_timeline_data(
|
||||
conf,
|
||||
(storage.as_ref(), &index, sync_queue),
|
||||
current_remote_timeline.as_ref(),
|
||||
sync_id,
|
||||
download_data,
|
||||
new_download_data,
|
||||
sync_start,
|
||||
"download",
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ControlFlow::Break(()) => {
|
||||
ControlFlow::Break(_) => {
|
||||
index
|
||||
.write()
|
||||
.await
|
||||
@@ -1134,29 +1132,29 @@ where
|
||||
if let Some(delete_data) = batch.delete {
|
||||
match upload_status {
|
||||
UploadStatus::Uploaded | UploadStatus::Nothing => {
|
||||
match validate_task_retries(delete_data.retries, max_sync_errors)
|
||||
match validate_task_retries(delete_data, max_sync_errors)
|
||||
.instrument(info_span!("retries_validation"))
|
||||
.await
|
||||
{
|
||||
ControlFlow::Continue(()) => {
|
||||
ControlFlow::Continue(new_delete_data) => {
|
||||
delete_timeline_data(
|
||||
conf,
|
||||
(storage.as_ref(), &index, sync_queue),
|
||||
sync_id,
|
||||
delete_data,
|
||||
new_delete_data,
|
||||
sync_start,
|
||||
"delete",
|
||||
)
|
||||
.instrument(info_span!("delete_timeline_data"))
|
||||
.await;
|
||||
}
|
||||
ControlFlow::Break(()) => {
|
||||
ControlFlow::Break(failed_delete_data) => {
|
||||
if let Err(e) = update_remote_data(
|
||||
conf,
|
||||
storage.as_ref(),
|
||||
&index,
|
||||
sync_id,
|
||||
RemoteDataUpdate::Delete(&delete_data.data.deleted_layers),
|
||||
RemoteDataUpdate::Delete(&failed_delete_data.data.deleted_layers),
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -1165,8 +1163,8 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
UploadStatus::Failed(e) => {
|
||||
warn!("Skipping delete task due to failed upload tasks, reenqueuing. Upload data: {:?}, delete data: {delete_data:?}. Upload failure: {e:#}", batch.upload);
|
||||
UploadStatus::Failed => {
|
||||
warn!("Skipping delete task due to failed upload tasks, reenqueuing");
|
||||
sync_queue.push(sync_id, SyncTask::Delete(delete_data));
|
||||
}
|
||||
}
|
||||
@@ -1351,8 +1349,7 @@ async fn upload_timeline_data<P, S>(
|
||||
new_upload_data: SyncData<LayersUpload>,
|
||||
sync_start: Instant,
|
||||
task_name: &str,
|
||||
) -> UploadStatus
|
||||
where
|
||||
) where
|
||||
P: Debug + Send + Sync + 'static,
|
||||
S: RemoteStorage<RemoteObjectId = P> + Send + Sync + 'static,
|
||||
{
|
||||
@@ -1365,9 +1362,9 @@ where
|
||||
)
|
||||
.await
|
||||
{
|
||||
UploadedTimeline::FailedAndRescheduled(e) => {
|
||||
UploadedTimeline::FailedAndRescheduled => {
|
||||
register_sync_status(sync_id, sync_start, task_name, Some(false));
|
||||
return UploadStatus::Failed(e);
|
||||
return;
|
||||
}
|
||||
UploadedTimeline::Successful(upload_data) => upload_data,
|
||||
};
|
||||
@@ -1386,14 +1383,12 @@ where
|
||||
{
|
||||
Ok(()) => {
|
||||
register_sync_status(sync_id, sync_start, task_name, Some(true));
|
||||
UploadStatus::Uploaded
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to update remote timeline {sync_id}: {e:?}");
|
||||
uploaded_data.retries += 1;
|
||||
sync_queue.push(sync_id, SyncTask::Upload(uploaded_data));
|
||||
register_sync_status(sync_id, sync_start, task_name, Some(false));
|
||||
UploadStatus::Failed(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1496,17 +1491,21 @@ where
|
||||
.context("Failed to upload new index part")
|
||||
}
|
||||
|
||||
async fn validate_task_retries(
|
||||
current_attempt: u32,
|
||||
async fn validate_task_retries<T>(
|
||||
sync_data: SyncData<T>,
|
||||
max_sync_errors: NonZeroU32,
|
||||
) -> ControlFlow<(), ()> {
|
||||
) -> ControlFlow<SyncData<T>, SyncData<T>> {
|
||||
let current_attempt = sync_data.retries;
|
||||
let max_sync_errors = max_sync_errors.get();
|
||||
if current_attempt >= max_sync_errors {
|
||||
return ControlFlow::Break(());
|
||||
error!(
|
||||
"Aborting task that failed {current_attempt} times, exceeding retries threshold of {max_sync_errors}",
|
||||
);
|
||||
return ControlFlow::Break(sync_data);
|
||||
}
|
||||
|
||||
exponential_backoff(current_attempt, 1.0, 30.0).await;
|
||||
ControlFlow::Continue(())
|
||||
ControlFlow::Continue(sync_data)
|
||||
}
|
||||
|
||||
fn schedule_first_sync_tasks(
|
||||
|
||||
@@ -95,8 +95,6 @@ where
|
||||
debug!("Reenqueuing failed delete task for timeline {sync_id}");
|
||||
delete_data.retries += 1;
|
||||
sync_queue.push(sync_id, SyncTask::Delete(delete_data));
|
||||
} else {
|
||||
info!("Successfully deleted all layers");
|
||||
}
|
||||
errored
|
||||
}
|
||||
|
||||
@@ -75,7 +75,7 @@ where
|
||||
#[derive(Debug)]
|
||||
pub(super) enum UploadedTimeline {
|
||||
/// Upload failed due to some error, the upload task is rescheduled for another retry.
|
||||
FailedAndRescheduled(anyhow::Error),
|
||||
FailedAndRescheduled,
|
||||
/// No issues happened during the upload, all task files were put into the remote storage.
|
||||
Successful(SyncData<LayersUpload>),
|
||||
}
|
||||
@@ -179,7 +179,7 @@ where
|
||||
})
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
|
||||
let mut errors = Vec::new();
|
||||
let mut errors_happened = false;
|
||||
while let Some(upload_result) = upload_tasks.next().await {
|
||||
match upload_result {
|
||||
Ok(uploaded_path) => {
|
||||
@@ -188,13 +188,13 @@ where
|
||||
}
|
||||
Err(e) => match e {
|
||||
UploadError::Other(e) => {
|
||||
errors_happened = true;
|
||||
error!("Failed to upload a layer for timeline {sync_id}: {e:?}");
|
||||
errors.push(format!("{e:#}"));
|
||||
}
|
||||
UploadError::MissingLocalFile(source_path, e) => {
|
||||
if source_path.exists() {
|
||||
errors_happened = true;
|
||||
error!("Failed to upload a layer for timeline {sync_id}: {e:?}");
|
||||
errors.push(format!("{e:#}"));
|
||||
} else {
|
||||
// We have run the upload sync task, but the file we wanted to upload is gone.
|
||||
// This is "fine" due the asynchronous nature of the sync loop: it only reacts to events and might need to
|
||||
@@ -217,17 +217,14 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
if errors.is_empty() {
|
||||
info!("Successfully uploaded all layers");
|
||||
UploadedTimeline::Successful(upload_data)
|
||||
} else {
|
||||
if errors_happened {
|
||||
debug!("Reenqueuing failed upload task for timeline {sync_id}");
|
||||
upload_data.retries += 1;
|
||||
sync_queue.push(sync_id, SyncTask::Upload(upload_data));
|
||||
UploadedTimeline::FailedAndRescheduled(anyhow::anyhow!(
|
||||
"Errors appeared during layer uploads: {:?}",
|
||||
errors
|
||||
))
|
||||
UploadedTimeline::FailedAndRescheduled
|
||||
} else {
|
||||
info!("Successfully uploaded all layers");
|
||||
UploadedTimeline::Successful(upload_data)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ pub mod defaults {
|
||||
pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
|
||||
pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
|
||||
pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
|
||||
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "3 seconds";
|
||||
pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
|
||||
pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ use std::{
|
||||
};
|
||||
|
||||
use anyhow::Context;
|
||||
use chrono::{NaiveDateTime, Utc};
|
||||
use chrono::{DateTime, Local, NaiveDateTime, Utc};
|
||||
use etcd_broker::{
|
||||
subscription_key::SubscriptionKey, subscription_value::SkTimelineInfo, BrokerSubscription,
|
||||
BrokerUpdate, Client,
|
||||
@@ -33,10 +33,11 @@ use crate::{
|
||||
use crate::{RepositoryImpl, TimelineImpl};
|
||||
use utils::{
|
||||
lsn::Lsn,
|
||||
pq_proto::ReplicationFeedback,
|
||||
zid::{NodeId, ZTenantTimelineId},
|
||||
};
|
||||
|
||||
use super::{walreceiver_connection::WalConnectionStatus, TaskEvent, TaskHandle};
|
||||
use super::{TaskEvent, TaskHandle};
|
||||
|
||||
/// Spawns the loop to take care of the timeline's WAL streaming connection.
|
||||
pub(super) fn spawn_connection_manager_task(
|
||||
@@ -113,26 +114,21 @@ async fn connection_manager_loop_step(
|
||||
}
|
||||
} => {
|
||||
let wal_connection = walreceiver_state.wal_connection.as_mut().expect("Should have a connection, as checked by the corresponding select! guard");
|
||||
match wal_connection_update {
|
||||
match &wal_connection_update {
|
||||
TaskEvent::Started => {
|
||||
wal_connection.latest_connection_update = Utc::now().naive_utc();
|
||||
*walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(0) += 1;
|
||||
},
|
||||
TaskEvent::NewEvent(status) => {
|
||||
if status.has_received_wal {
|
||||
// Reset connection attempts here only, we know that safekeeper is healthy
|
||||
// because it can send us a WAL update.
|
||||
walreceiver_state.wal_connection_attempts.remove(&wal_connection.sk_id);
|
||||
}
|
||||
wal_connection.status = status;
|
||||
TaskEvent::NewEvent(replication_feedback) => {
|
||||
wal_connection.latest_connection_update = DateTime::<Local>::from(replication_feedback.ps_replytime).naive_utc();
|
||||
// reset connection attempts here only, the only place where both nodes
|
||||
// explicitly confirmn with replication feedback that they are connected to each other
|
||||
walreceiver_state.wal_connection_attempts.remove(&wal_connection.sk_id);
|
||||
},
|
||||
TaskEvent::End(end_result) => {
|
||||
match end_result {
|
||||
Ok(()) => debug!("WAL receiving task finished"),
|
||||
Err(e) => {
|
||||
warn!("WAL receiving task failed: {e}");
|
||||
// If the task failed, set the connection attempts to at least 1, to try other safekeepers.
|
||||
let _ = *walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(1);
|
||||
}
|
||||
Err(e) => warn!("WAL receiving task failed: {e}"),
|
||||
};
|
||||
walreceiver_state.wal_connection = None;
|
||||
},
|
||||
@@ -261,21 +257,10 @@ struct WalreceiverState {
|
||||
struct WalConnection {
|
||||
/// Current safekeeper pageserver is connected to for WAL streaming.
|
||||
sk_id: NodeId,
|
||||
/// Status of the connection.
|
||||
status: WalConnectionStatus,
|
||||
/// Connection task start time or the timestamp of a latest connection message received.
|
||||
latest_connection_update: NaiveDateTime,
|
||||
/// WAL streaming task handle.
|
||||
connection_task: TaskHandle<WalConnectionStatus>,
|
||||
/// Have we discovered that other safekeeper has more recent WAL than we do?
|
||||
discovered_new_wal: Option<NewCommittedWAL>,
|
||||
}
|
||||
|
||||
/// Notion of a new committed WAL, which exists on other safekeeper.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct NewCommittedWAL {
|
||||
/// LSN of the new committed WAL.
|
||||
lsn: Lsn,
|
||||
/// When we discovered that the new committed WAL exists on other safekeeper.
|
||||
discovered_at: NaiveDateTime,
|
||||
connection_task: TaskHandle<ReplicationFeedback>,
|
||||
}
|
||||
|
||||
/// Data about the timeline to connect to, received from etcd.
|
||||
@@ -342,19 +327,10 @@ impl WalreceiverState {
|
||||
.instrument(info_span!("walreceiver_connection", id = %id))
|
||||
});
|
||||
|
||||
let now = Utc::now().naive_utc();
|
||||
self.wal_connection = Some(WalConnection {
|
||||
sk_id: new_sk_id,
|
||||
status: WalConnectionStatus {
|
||||
is_connected: false,
|
||||
has_received_wal: false,
|
||||
latest_connection_update: now,
|
||||
latest_wal_update: now,
|
||||
streaming_lsn: None,
|
||||
commit_lsn: None,
|
||||
},
|
||||
latest_connection_update: Utc::now().naive_utc(),
|
||||
connection_task: connection_handle,
|
||||
discovered_new_wal: None,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -385,16 +361,14 @@ impl WalreceiverState {
|
||||
/// Cleans up stale etcd records and checks the rest for the new connection candidate.
|
||||
/// Returns a new candidate, if the current state is absent or somewhat lagging, `None` otherwise.
|
||||
/// The current rules for approving new candidates:
|
||||
/// * pick a candidate different from the connected safekeeper with biggest `commit_lsn` and lowest failed connection attemps
|
||||
/// * pick from the input data from etcd for currently connected safekeeper (if any)
|
||||
/// * out of the rest input entries, pick one with biggest `commit_lsn` that's after than pageserver's latest Lsn for the timeline
|
||||
/// * if there's no such entry, no new candidate found, abort
|
||||
/// * otherwise check if the candidate is much better than the current one
|
||||
///
|
||||
/// To understand exact rules for determining if the candidate is better than the current one, refer to this function's implementation.
|
||||
/// General rules are following:
|
||||
/// * if connected safekeeper is not present, pick the candidate
|
||||
/// * if we haven't received any updates for some time, pick the candidate
|
||||
/// * if the candidate commit_lsn is much higher than the current one, pick the candidate
|
||||
/// * if connected safekeeper stopped sending us new WAL which is available on other safekeeper, pick the candidate
|
||||
/// * check the current connection time data for staleness, reconnect if stale
|
||||
/// * otherwise, check if etcd updates contain currently connected safekeeper
|
||||
/// * if not, that means no WAL updates happened after certain time (either none since the connection time or none since the last event after the connection)
|
||||
/// Reconnect if the time exceeds the threshold.
|
||||
/// * if there's one, compare its Lsn with the other candidate's, reconnect if candidate's over threshold
|
||||
///
|
||||
/// This way we ensure to keep up with the most up-to-date safekeeper and don't try to jump from one safekeeper to another too frequently.
|
||||
/// Both thresholds are configured per tenant.
|
||||
@@ -410,128 +384,53 @@ impl WalreceiverState {
|
||||
|
||||
let now = Utc::now().naive_utc();
|
||||
if let Ok(latest_interaciton) =
|
||||
(now - existing_wal_connection.status.latest_connection_update).to_std()
|
||||
(now - existing_wal_connection.latest_connection_update).to_std()
|
||||
{
|
||||
// Drop connection if we haven't received keepalive message for a while.
|
||||
if latest_interaciton > self.wal_connect_timeout {
|
||||
if latest_interaciton > self.lagging_wal_timeout {
|
||||
return Some(NewWalConnectionCandidate {
|
||||
safekeeper_id: new_sk_id,
|
||||
wal_source_connstr: new_wal_source_connstr,
|
||||
reason: ReconnectReason::NoKeepAlives {
|
||||
last_keep_alive: Some(
|
||||
existing_wal_connection.status.latest_connection_update,
|
||||
reason: ReconnectReason::NoWalTimeout {
|
||||
last_wal_interaction: Some(
|
||||
existing_wal_connection.latest_connection_update,
|
||||
),
|
||||
check_time: now,
|
||||
threshold: self.wal_connect_timeout,
|
||||
threshold: self.lagging_wal_timeout,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if !existing_wal_connection.status.is_connected {
|
||||
// We haven't connected yet and we shouldn't switch until connection timeout (condition above).
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(current_commit_lsn) = existing_wal_connection.status.commit_lsn {
|
||||
let new_commit_lsn = new_safekeeper_etcd_data.commit_lsn.unwrap_or(Lsn(0));
|
||||
// Check if the new candidate has much more WAL than the current one.
|
||||
match new_commit_lsn.0.checked_sub(current_commit_lsn.0) {
|
||||
Some(new_sk_lsn_advantage) => {
|
||||
if new_sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
|
||||
return Some(NewWalConnectionCandidate {
|
||||
safekeeper_id: new_sk_id,
|
||||
wal_source_connstr: new_wal_source_connstr,
|
||||
reason: ReconnectReason::LaggingWal {
|
||||
current_commit_lsn,
|
||||
new_commit_lsn,
|
||||
threshold: self.max_lsn_wal_lag,
|
||||
},
|
||||
});
|
||||
match self.wal_stream_candidates.get(&connected_sk_node) {
|
||||
Some(current_connection_etcd_data) => {
|
||||
let new_lsn = new_safekeeper_etcd_data.commit_lsn.unwrap_or(Lsn(0));
|
||||
let current_lsn = current_connection_etcd_data
|
||||
.timeline
|
||||
.commit_lsn
|
||||
.unwrap_or(Lsn(0));
|
||||
match new_lsn.0.checked_sub(current_lsn.0)
|
||||
{
|
||||
Some(new_sk_lsn_advantage) => {
|
||||
if new_sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
|
||||
return Some(
|
||||
NewWalConnectionCandidate {
|
||||
safekeeper_id: new_sk_id,
|
||||
wal_source_connstr: new_wal_source_connstr,
|
||||
reason: ReconnectReason::LaggingWal { current_lsn, new_lsn, threshold: self.max_lsn_wal_lag },
|
||||
});
|
||||
}
|
||||
}
|
||||
None => debug!("Best SK candidate has its commit Lsn behind the current timeline's latest consistent Lsn"),
|
||||
}
|
||||
}
|
||||
None => debug!(
|
||||
"Best SK candidate has its commit_lsn behind connected SK's commit_lsn"
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
let current_lsn = match existing_wal_connection.status.streaming_lsn {
|
||||
Some(lsn) => lsn,
|
||||
None => self.local_timeline.get_last_record_lsn(),
|
||||
};
|
||||
let current_commit_lsn = existing_wal_connection
|
||||
.status
|
||||
.commit_lsn
|
||||
.unwrap_or(current_lsn);
|
||||
let candidate_commit_lsn = new_safekeeper_etcd_data.commit_lsn.unwrap_or(Lsn(0));
|
||||
|
||||
// Keep discovered_new_wal only if connected safekeeper has not caught up yet.
|
||||
let mut discovered_new_wal = existing_wal_connection
|
||||
.discovered_new_wal
|
||||
.filter(|new_wal| new_wal.lsn > current_commit_lsn);
|
||||
|
||||
if discovered_new_wal.is_none() {
|
||||
// Check if the new candidate has more WAL than the current one.
|
||||
// If the new candidate has more WAL than the current one, we consider switching to the new candidate.
|
||||
discovered_new_wal = if candidate_commit_lsn > current_commit_lsn {
|
||||
trace!(
|
||||
"New candidate has commit_lsn {}, higher than current_commit_lsn {}",
|
||||
candidate_commit_lsn,
|
||||
current_commit_lsn
|
||||
);
|
||||
Some(NewCommittedWAL {
|
||||
lsn: candidate_commit_lsn,
|
||||
discovered_at: Utc::now().naive_utc(),
|
||||
None => {
|
||||
return Some(NewWalConnectionCandidate {
|
||||
safekeeper_id: new_sk_id,
|
||||
wal_source_connstr: new_wal_source_connstr,
|
||||
reason: ReconnectReason::NoEtcdDataForExistingConnection,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
}
|
||||
|
||||
let waiting_for_new_lsn_since = if current_lsn < current_commit_lsn {
|
||||
// Connected safekeeper has more WAL, but we haven't received updates for some time.
|
||||
trace!(
|
||||
"Connected safekeeper has more WAL, but we haven't received updates for {:?}. current_lsn: {}, current_commit_lsn: {}",
|
||||
(now - existing_wal_connection.status.latest_wal_update).to_std(),
|
||||
current_lsn,
|
||||
current_commit_lsn
|
||||
);
|
||||
Some(existing_wal_connection.status.latest_wal_update)
|
||||
} else {
|
||||
discovered_new_wal.as_ref().map(|new_wal| {
|
||||
// We know that new WAL is available on other safekeeper, but connected safekeeper don't have it.
|
||||
new_wal
|
||||
.discovered_at
|
||||
.max(existing_wal_connection.status.latest_wal_update)
|
||||
})
|
||||
};
|
||||
|
||||
// If we haven't received any WAL updates for a while and candidate has more WAL, switch to it.
|
||||
if let Some(waiting_for_new_lsn_since) = waiting_for_new_lsn_since {
|
||||
if let Ok(waiting_for_new_wal) = (now - waiting_for_new_lsn_since).to_std() {
|
||||
if candidate_commit_lsn > current_commit_lsn
|
||||
&& waiting_for_new_wal > self.lagging_wal_timeout
|
||||
{
|
||||
return Some(NewWalConnectionCandidate {
|
||||
safekeeper_id: new_sk_id,
|
||||
wal_source_connstr: new_wal_source_connstr,
|
||||
reason: ReconnectReason::NoWalTimeout {
|
||||
current_lsn,
|
||||
current_commit_lsn,
|
||||
candidate_commit_lsn,
|
||||
last_wal_interaction: Some(
|
||||
existing_wal_connection.status.latest_wal_update,
|
||||
),
|
||||
check_time: now,
|
||||
threshold: self.lagging_wal_timeout,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.wal_connection.as_mut().unwrap().discovered_new_wal = discovered_new_wal;
|
||||
}
|
||||
None => {
|
||||
let (new_sk_id, _, new_wal_source_connstr) =
|
||||
@@ -551,7 +450,7 @@ impl WalreceiverState {
|
||||
/// Optionally, omits the given node, to support gracefully switching from a healthy safekeeper to another.
|
||||
///
|
||||
/// The candidate that is chosen:
|
||||
/// * has fewest connection attempts from pageserver to safekeeper node (reset every time we receive a WAL message from the node)
|
||||
/// * has fewest connection attempts from pageserver to safekeeper node (reset every time the WAL replication feedback is sent)
|
||||
/// * has greatest data Lsn among the ones that are left
|
||||
///
|
||||
/// NOTE:
|
||||
@@ -590,13 +489,14 @@ impl WalreceiverState {
|
||||
.max_by_key(|(_, info, _)| info.commit_lsn)
|
||||
}
|
||||
|
||||
/// Returns a list of safekeepers that have valid info and ready for connection.
|
||||
fn applicable_connection_candidates(
|
||||
&self,
|
||||
) -> impl Iterator<Item = (NodeId, &SkTimelineInfo, String)> {
|
||||
self.wal_stream_candidates
|
||||
.iter()
|
||||
.filter(|(_, info)| info.timeline.commit_lsn.is_some())
|
||||
.filter(|(_, etcd_info)| {
|
||||
etcd_info.timeline.commit_lsn > Some(self.local_timeline.get_last_record_lsn())
|
||||
})
|
||||
.filter_map(|(sk_id, etcd_info)| {
|
||||
let info = &etcd_info.timeline;
|
||||
match wal_stream_connection_string(
|
||||
@@ -612,7 +512,6 @@ impl WalreceiverState {
|
||||
})
|
||||
}
|
||||
|
||||
/// Remove candidates which haven't sent etcd updates for a while.
|
||||
fn cleanup_old_candidates(&mut self) {
|
||||
let mut node_ids_to_remove = Vec::with_capacity(self.wal_stream_candidates.len());
|
||||
|
||||
@@ -647,24 +546,17 @@ struct NewWalConnectionCandidate {
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
enum ReconnectReason {
|
||||
NoExistingConnection,
|
||||
NoEtcdDataForExistingConnection,
|
||||
LaggingWal {
|
||||
current_commit_lsn: Lsn,
|
||||
new_commit_lsn: Lsn,
|
||||
current_lsn: Lsn,
|
||||
new_lsn: Lsn,
|
||||
threshold: NonZeroU64,
|
||||
},
|
||||
NoWalTimeout {
|
||||
current_lsn: Lsn,
|
||||
current_commit_lsn: Lsn,
|
||||
candidate_commit_lsn: Lsn,
|
||||
last_wal_interaction: Option<NaiveDateTime>,
|
||||
check_time: NaiveDateTime,
|
||||
threshold: Duration,
|
||||
},
|
||||
NoKeepAlives {
|
||||
last_keep_alive: Option<NaiveDateTime>,
|
||||
check_time: NaiveDateTime,
|
||||
threshold: Duration,
|
||||
},
|
||||
}
|
||||
|
||||
fn wal_stream_connection_string(
|
||||
@@ -688,6 +580,7 @@ fn wal_stream_connection_string(
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::time::SystemTime;
|
||||
|
||||
use crate::repository::{
|
||||
repo_harness::{RepoHarness, TIMELINE_ID},
|
||||
@@ -765,7 +658,7 @@ mod tests {
|
||||
backup_lsn: None,
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: None,
|
||||
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
|
||||
},
|
||||
etcd_version: 0,
|
||||
latest_update: delay_over_threshold,
|
||||
@@ -791,26 +684,22 @@ mod tests {
|
||||
let connected_sk_id = NodeId(0);
|
||||
let current_lsn = 100_000;
|
||||
|
||||
let connection_status = WalConnectionStatus {
|
||||
is_connected: true,
|
||||
has_received_wal: true,
|
||||
latest_connection_update: now,
|
||||
latest_wal_update: now,
|
||||
commit_lsn: Some(Lsn(current_lsn)),
|
||||
streaming_lsn: Some(Lsn(current_lsn)),
|
||||
};
|
||||
|
||||
state.max_lsn_wal_lag = NonZeroU64::new(100).unwrap();
|
||||
state.wal_connection = Some(WalConnection {
|
||||
sk_id: connected_sk_id,
|
||||
status: connection_status.clone(),
|
||||
latest_connection_update: now,
|
||||
connection_task: TaskHandle::spawn(move |sender, _| async move {
|
||||
sender
|
||||
.send(TaskEvent::NewEvent(connection_status.clone()))
|
||||
.send(TaskEvent::NewEvent(ReplicationFeedback {
|
||||
current_timeline_size: 1,
|
||||
ps_writelsn: 1,
|
||||
ps_applylsn: current_lsn,
|
||||
ps_flushlsn: 1,
|
||||
ps_replytime: SystemTime::now(),
|
||||
}))
|
||||
.ok();
|
||||
Ok(())
|
||||
}),
|
||||
discovered_new_wal: None,
|
||||
});
|
||||
state.wal_stream_candidates = HashMap::from([
|
||||
(
|
||||
@@ -1035,6 +924,65 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn connection_no_etcd_data_candidate() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("connection_no_etcd_data_candidate")?;
|
||||
let mut state = dummy_state(&harness);
|
||||
|
||||
let now = Utc::now().naive_utc();
|
||||
let current_lsn = Lsn(100_000).align();
|
||||
let connected_sk_id = NodeId(0);
|
||||
let other_sk_id = NodeId(connected_sk_id.0 + 1);
|
||||
|
||||
state.wal_connection = Some(WalConnection {
|
||||
sk_id: connected_sk_id,
|
||||
latest_connection_update: now,
|
||||
connection_task: TaskHandle::spawn(move |sender, _| async move {
|
||||
sender
|
||||
.send(TaskEvent::NewEvent(ReplicationFeedback {
|
||||
current_timeline_size: 1,
|
||||
ps_writelsn: current_lsn.0,
|
||||
ps_applylsn: 1,
|
||||
ps_flushlsn: 1,
|
||||
ps_replytime: SystemTime::now(),
|
||||
}))
|
||||
.ok();
|
||||
Ok(())
|
||||
}),
|
||||
});
|
||||
state.wal_stream_candidates = HashMap::from([(
|
||||
other_sk_id,
|
||||
EtcdSkTimeline {
|
||||
timeline: SkTimelineInfo {
|
||||
last_log_term: None,
|
||||
flush_lsn: None,
|
||||
commit_lsn: Some(Lsn(1 + state.max_lsn_wal_lag.get())),
|
||||
backup_lsn: None,
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
|
||||
},
|
||||
etcd_version: 0,
|
||||
latest_update: now,
|
||||
},
|
||||
)]);
|
||||
|
||||
let only_candidate = state
|
||||
.next_connection_candidate()
|
||||
.expect("Expected one candidate selected out of the only data option, but got none");
|
||||
assert_eq!(only_candidate.safekeeper_id, other_sk_id);
|
||||
assert_eq!(
|
||||
only_candidate.reason,
|
||||
ReconnectReason::NoEtcdDataForExistingConnection,
|
||||
"Should select new safekeeper due to missing etcd data, even if there's an existing connection with this safekeeper"
|
||||
);
|
||||
assert!(only_candidate
|
||||
.wal_source_connstr
|
||||
.contains(DUMMY_SAFEKEEPER_CONNSTR));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn lsn_wal_over_threshhold_current_candidate() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("lsn_wal_over_threshcurrent_candidate")?;
|
||||
@@ -1045,25 +993,21 @@ mod tests {
|
||||
let connected_sk_id = NodeId(0);
|
||||
let new_lsn = Lsn(current_lsn.0 + state.max_lsn_wal_lag.get() + 1);
|
||||
|
||||
let connection_status = WalConnectionStatus {
|
||||
is_connected: true,
|
||||
has_received_wal: true,
|
||||
latest_connection_update: now,
|
||||
latest_wal_update: now,
|
||||
commit_lsn: Some(current_lsn),
|
||||
streaming_lsn: Some(current_lsn),
|
||||
};
|
||||
|
||||
state.wal_connection = Some(WalConnection {
|
||||
sk_id: connected_sk_id,
|
||||
status: connection_status.clone(),
|
||||
latest_connection_update: now,
|
||||
connection_task: TaskHandle::spawn(move |sender, _| async move {
|
||||
sender
|
||||
.send(TaskEvent::NewEvent(connection_status.clone()))
|
||||
.send(TaskEvent::NewEvent(ReplicationFeedback {
|
||||
current_timeline_size: 1,
|
||||
ps_writelsn: current_lsn.0,
|
||||
ps_applylsn: 1,
|
||||
ps_flushlsn: 1,
|
||||
ps_replytime: SystemTime::now(),
|
||||
}))
|
||||
.ok();
|
||||
Ok(())
|
||||
}),
|
||||
discovered_new_wal: None,
|
||||
});
|
||||
state.wal_stream_candidates = HashMap::from([
|
||||
(
|
||||
@@ -1108,8 +1052,8 @@ mod tests {
|
||||
assert_eq!(
|
||||
over_threshcurrent_candidate.reason,
|
||||
ReconnectReason::LaggingWal {
|
||||
current_commit_lsn: current_lsn,
|
||||
new_commit_lsn: new_lsn,
|
||||
current_lsn,
|
||||
new_lsn,
|
||||
threshold: state.max_lsn_wal_lag
|
||||
},
|
||||
"Should select bigger WAL safekeeper if it starts to lag enough"
|
||||
@@ -1122,35 +1066,31 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn timeout_connection_threshhold_current_candidate() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("timeout_connection_threshhold_current_candidate")?;
|
||||
async fn timeout_wal_over_threshhold_current_candidate() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("timeout_wal_over_threshhold_current_candidate")?;
|
||||
let mut state = dummy_state(&harness);
|
||||
let current_lsn = Lsn(100_000).align();
|
||||
let now = Utc::now().naive_utc();
|
||||
|
||||
let wal_connect_timeout = chrono::Duration::from_std(state.wal_connect_timeout)?;
|
||||
let lagging_wal_timeout = chrono::Duration::from_std(state.lagging_wal_timeout)?;
|
||||
let time_over_threshold =
|
||||
Utc::now().naive_utc() - wal_connect_timeout - wal_connect_timeout;
|
||||
|
||||
let connection_status = WalConnectionStatus {
|
||||
is_connected: true,
|
||||
has_received_wal: true,
|
||||
latest_connection_update: time_over_threshold,
|
||||
latest_wal_update: time_over_threshold,
|
||||
commit_lsn: Some(current_lsn),
|
||||
streaming_lsn: Some(current_lsn),
|
||||
};
|
||||
Utc::now().naive_utc() - lagging_wal_timeout - lagging_wal_timeout;
|
||||
|
||||
state.wal_connection = Some(WalConnection {
|
||||
sk_id: NodeId(1),
|
||||
status: connection_status.clone(),
|
||||
latest_connection_update: time_over_threshold,
|
||||
connection_task: TaskHandle::spawn(move |sender, _| async move {
|
||||
sender
|
||||
.send(TaskEvent::NewEvent(connection_status.clone()))
|
||||
.send(TaskEvent::NewEvent(ReplicationFeedback {
|
||||
current_timeline_size: 1,
|
||||
ps_writelsn: current_lsn.0,
|
||||
ps_applylsn: 1,
|
||||
ps_flushlsn: 1,
|
||||
ps_replytime: SystemTime::now(),
|
||||
}))
|
||||
.ok();
|
||||
Ok(())
|
||||
}),
|
||||
discovered_new_wal: None,
|
||||
});
|
||||
state.wal_stream_candidates = HashMap::from([(
|
||||
NodeId(0),
|
||||
@@ -1175,12 +1115,12 @@ mod tests {
|
||||
|
||||
assert_eq!(over_threshcurrent_candidate.safekeeper_id, NodeId(0));
|
||||
match over_threshcurrent_candidate.reason {
|
||||
ReconnectReason::NoKeepAlives {
|
||||
last_keep_alive,
|
||||
ReconnectReason::NoWalTimeout {
|
||||
last_wal_interaction,
|
||||
threshold,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(last_keep_alive, Some(time_over_threshold));
|
||||
assert_eq!(last_wal_interaction, Some(time_over_threshold));
|
||||
assert_eq!(threshold, state.lagging_wal_timeout);
|
||||
}
|
||||
unexpected => panic!("Unexpected reason: {unexpected:?}"),
|
||||
@@ -1193,34 +1133,20 @@ mod tests {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn timeout_wal_over_threshhold_current_candidate() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("timeout_wal_over_threshhold_current_candidate")?;
|
||||
async fn timeout_connection_over_threshhold_current_candidate() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("timeout_connection_over_threshhold_current_candidate")?;
|
||||
let mut state = dummy_state(&harness);
|
||||
let current_lsn = Lsn(100_000).align();
|
||||
let new_lsn = Lsn(100_100).align();
|
||||
let now = Utc::now().naive_utc();
|
||||
|
||||
let lagging_wal_timeout = chrono::Duration::from_std(state.lagging_wal_timeout)?;
|
||||
let time_over_threshold =
|
||||
Utc::now().naive_utc() - lagging_wal_timeout - lagging_wal_timeout;
|
||||
|
||||
let connection_status = WalConnectionStatus {
|
||||
is_connected: true,
|
||||
has_received_wal: true,
|
||||
latest_connection_update: now,
|
||||
latest_wal_update: time_over_threshold,
|
||||
commit_lsn: Some(current_lsn),
|
||||
streaming_lsn: Some(current_lsn),
|
||||
};
|
||||
|
||||
state.wal_connection = Some(WalConnection {
|
||||
sk_id: NodeId(1),
|
||||
status: connection_status,
|
||||
latest_connection_update: time_over_threshold,
|
||||
connection_task: TaskHandle::spawn(move |_, _| async move { Ok(()) }),
|
||||
discovered_new_wal: Some(NewCommittedWAL {
|
||||
discovered_at: time_over_threshold,
|
||||
lsn: new_lsn,
|
||||
}),
|
||||
});
|
||||
state.wal_stream_candidates = HashMap::from([(
|
||||
NodeId(0),
|
||||
@@ -1228,7 +1154,7 @@ mod tests {
|
||||
timeline: SkTimelineInfo {
|
||||
last_log_term: None,
|
||||
flush_lsn: None,
|
||||
commit_lsn: Some(new_lsn),
|
||||
commit_lsn: Some(current_lsn),
|
||||
backup_lsn: None,
|
||||
remote_consistent_lsn: None,
|
||||
peer_horizon_lsn: None,
|
||||
@@ -1246,16 +1172,10 @@ mod tests {
|
||||
assert_eq!(over_threshcurrent_candidate.safekeeper_id, NodeId(0));
|
||||
match over_threshcurrent_candidate.reason {
|
||||
ReconnectReason::NoWalTimeout {
|
||||
current_lsn,
|
||||
current_commit_lsn,
|
||||
candidate_commit_lsn,
|
||||
last_wal_interaction,
|
||||
threshold,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(current_lsn, current_lsn);
|
||||
assert_eq!(current_commit_lsn, current_lsn);
|
||||
assert_eq!(candidate_commit_lsn, new_lsn);
|
||||
assert_eq!(last_wal_interaction, Some(time_over_threshold));
|
||||
assert_eq!(threshold, state.lagging_wal_timeout);
|
||||
}
|
||||
@@ -1282,7 +1202,7 @@ mod tests {
|
||||
.expect("Failed to create an empty timeline for dummy wal connection manager"),
|
||||
wal_connect_timeout: Duration::from_secs(1),
|
||||
lagging_wal_timeout: Duration::from_secs(1),
|
||||
max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(),
|
||||
max_lsn_wal_lag: NonZeroU64::new(1).unwrap(),
|
||||
wal_connection: None,
|
||||
wal_stream_candidates: HashMap::new(),
|
||||
wal_connection_attempts: HashMap::new(),
|
||||
|
||||
@@ -8,7 +8,6 @@ use std::{
|
||||
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use bytes::BytesMut;
|
||||
use chrono::{NaiveDateTime, Utc};
|
||||
use fail::fail_point;
|
||||
use futures::StreamExt;
|
||||
use postgres::{SimpleQueryMessage, SimpleQueryRow};
|
||||
@@ -30,29 +29,12 @@ use crate::{
|
||||
use postgres_ffi::waldecoder::WalStreamDecoder;
|
||||
use utils::{lsn::Lsn, pq_proto::ReplicationFeedback, zid::ZTenantTimelineId};
|
||||
|
||||
/// Status of the connection.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct WalConnectionStatus {
|
||||
/// If we were able to initiate a postgres connection, this means that safekeeper process is at least running.
|
||||
pub is_connected: bool,
|
||||
/// Defines a healthy connection as one on which we have received at least some WAL bytes.
|
||||
pub has_received_wal: bool,
|
||||
/// Connection establishment time or the timestamp of a latest connection message received.
|
||||
pub latest_connection_update: NaiveDateTime,
|
||||
/// Time of the latest WAL message received.
|
||||
pub latest_wal_update: NaiveDateTime,
|
||||
/// Latest WAL update contained WAL up to this LSN. Next WAL message with start from that LSN.
|
||||
pub streaming_lsn: Option<Lsn>,
|
||||
/// Latest commit_lsn received from the safekeeper. Can be zero if no message has been received yet.
|
||||
pub commit_lsn: Option<Lsn>,
|
||||
}
|
||||
|
||||
/// Open a connection to the given safekeeper and receive WAL, sending back progress
|
||||
/// messages as we go.
|
||||
pub async fn handle_walreceiver_connection(
|
||||
id: ZTenantTimelineId,
|
||||
wal_source_connstr: &str,
|
||||
events_sender: &watch::Sender<TaskEvent<WalConnectionStatus>>,
|
||||
events_sender: &watch::Sender<TaskEvent<ReplicationFeedback>>,
|
||||
mut cancellation: watch::Receiver<()>,
|
||||
connect_timeout: Duration,
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -67,26 +49,12 @@ pub async fn handle_walreceiver_connection(
|
||||
.await
|
||||
.context("Timed out while waiting for walreceiver connection to open")?
|
||||
.context("Failed to open walreceiver conection")?;
|
||||
|
||||
info!("connected!");
|
||||
let mut connection_status = WalConnectionStatus {
|
||||
is_connected: true,
|
||||
has_received_wal: false,
|
||||
latest_connection_update: Utc::now().naive_utc(),
|
||||
latest_wal_update: Utc::now().naive_utc(),
|
||||
streaming_lsn: None,
|
||||
commit_lsn: None,
|
||||
};
|
||||
if let Err(e) = events_sender.send(TaskEvent::NewEvent(connection_status.clone())) {
|
||||
warn!("Wal connection event listener dropped right after connection init, aborting the connection: {e}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// The connection object performs the actual communication with the database,
|
||||
// so spawn it off to run on its own.
|
||||
let mut connection_cancellation = cancellation.clone();
|
||||
tokio::spawn(
|
||||
async move {
|
||||
info!("connected!");
|
||||
select! {
|
||||
connection_result = connection => match connection_result{
|
||||
Ok(()) => info!("Walreceiver db connection closed"),
|
||||
@@ -116,14 +84,6 @@ pub async fn handle_walreceiver_connection(
|
||||
|
||||
let identify = identify_system(&mut replication_client).await?;
|
||||
info!("{identify:?}");
|
||||
|
||||
connection_status.latest_connection_update = Utc::now().naive_utc();
|
||||
if let Err(e) = events_sender.send(TaskEvent::NewEvent(connection_status.clone())) {
|
||||
warn!("Wal connection event listener dropped after IDENTIFY_SYSTEM, aborting the connection: {e}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// NB: this is a flush_lsn, not a commit_lsn.
|
||||
let end_of_wal = Lsn::from(u64::from(identify.xlogpos));
|
||||
let mut caught_up = false;
|
||||
let ZTenantTimelineId {
|
||||
@@ -158,7 +118,7 @@ pub async fn handle_walreceiver_connection(
|
||||
// There might be some padding after the last full record, skip it.
|
||||
startpoint += startpoint.calc_padding(8u32);
|
||||
|
||||
info!("last_record_lsn {last_rec_lsn} starting replication from {startpoint}, safekeeper is at {end_of_wal}...");
|
||||
info!("last_record_lsn {last_rec_lsn} starting replication from {startpoint}, server is at {end_of_wal}...");
|
||||
|
||||
let query = format!("START_REPLICATION PHYSICAL {startpoint}");
|
||||
|
||||
@@ -180,33 +140,6 @@ pub async fn handle_walreceiver_connection(
|
||||
}
|
||||
} {
|
||||
let replication_message = replication_message?;
|
||||
let now = Utc::now().naive_utc();
|
||||
|
||||
// Update the connection status before processing the message. If the message processing
|
||||
// fails (e.g. in walingest), we still want to know latests LSNs from the safekeeper.
|
||||
match &replication_message {
|
||||
ReplicationMessage::XLogData(xlog_data) => {
|
||||
connection_status.latest_connection_update = now;
|
||||
connection_status.commit_lsn = Some(Lsn::from(xlog_data.wal_end()));
|
||||
connection_status.streaming_lsn = Some(Lsn::from(
|
||||
xlog_data.wal_start() + xlog_data.data().len() as u64,
|
||||
));
|
||||
if !xlog_data.data().is_empty() {
|
||||
connection_status.latest_wal_update = now;
|
||||
connection_status.has_received_wal = true;
|
||||
}
|
||||
}
|
||||
ReplicationMessage::PrimaryKeepAlive(keepalive) => {
|
||||
connection_status.latest_connection_update = now;
|
||||
connection_status.commit_lsn = Some(Lsn::from(keepalive.wal_end()));
|
||||
}
|
||||
&_ => {}
|
||||
};
|
||||
if let Err(e) = events_sender.send(TaskEvent::NewEvent(connection_status.clone())) {
|
||||
warn!("Wal connection event listener dropped, aborting the connection: {e}");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let status_update = match replication_message {
|
||||
ReplicationMessage::XLogData(xlog_data) => {
|
||||
// Pass the WAL data to the decoder, and see if we can decode
|
||||
@@ -324,6 +257,10 @@ pub async fn handle_walreceiver_connection(
|
||||
.as_mut()
|
||||
.zenith_status_update(data.len() as u64, &data)
|
||||
.await?;
|
||||
if let Err(e) = events_sender.send(TaskEvent::NewEvent(zenith_status_update)) {
|
||||
warn!("Wal connection event listener dropped, aborting the connection: {e}");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
49
poetry.lock
generated
49
poetry.lock
generated
File diff suppressed because one or more lines are too long
@@ -7,7 +7,6 @@ edition = "2021"
|
||||
anyhow = "1.0"
|
||||
async-trait = "0.1"
|
||||
base64 = "0.13.0"
|
||||
bstr = "0.2.17"
|
||||
bytes = { version = "1.0.1", features = ['serde'] }
|
||||
clap = "3.0"
|
||||
futures = "0.3.13"
|
||||
|
||||
@@ -12,7 +12,7 @@ use password_hack::PasswordHackPayload;
|
||||
mod flow;
|
||||
pub use flow::*;
|
||||
|
||||
use crate::error::UserFacingError;
|
||||
use crate::{error::UserFacingError, waiters};
|
||||
use std::io;
|
||||
use thiserror::Error;
|
||||
|
||||
@@ -22,54 +22,51 @@ pub type Result<T> = std::result::Result<T, AuthError>;
|
||||
/// Common authentication error.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum AuthErrorImpl {
|
||||
// This will be dropped in the future.
|
||||
/// Authentication error reported by the console.
|
||||
#[error(transparent)]
|
||||
Legacy(#[from] backend::LegacyAuthError),
|
||||
Console(#[from] backend::AuthError),
|
||||
|
||||
#[error(transparent)]
|
||||
Link(#[from] backend::LinkAuthError),
|
||||
GetAuthInfo(#[from] backend::console::ConsoleAuthError),
|
||||
|
||||
#[error(transparent)]
|
||||
GetAuthInfo(#[from] backend::GetAuthInfoError),
|
||||
|
||||
#[error(transparent)]
|
||||
WakeCompute(#[from] backend::WakeComputeError),
|
||||
|
||||
/// SASL protocol errors (includes [SCRAM](crate::scram)).
|
||||
#[error(transparent)]
|
||||
Sasl(#[from] crate::sasl::Error),
|
||||
|
||||
#[error("Unsupported authentication method: {0}")]
|
||||
BadAuthMethod(Box<str>),
|
||||
|
||||
#[error("Malformed password message: {0}")]
|
||||
MalformedPassword(&'static str),
|
||||
|
||||
#[error(
|
||||
"Project name is not specified. \
|
||||
Either please upgrade the postgres client library (libpq) for SNI support \
|
||||
or pass the project name as a parameter: '&options=project%3D<project-name>'. \
|
||||
See more at https://neon.tech/sni"
|
||||
)]
|
||||
MissingProjectName,
|
||||
|
||||
/// Errors produced by e.g. [`crate::stream::PqStream`].
|
||||
/// Errors produced by [`crate::stream::PqStream`].
|
||||
#[error(transparent)]
|
||||
Io(#[from] io::Error),
|
||||
}
|
||||
|
||||
impl AuthErrorImpl {
|
||||
pub fn auth_failed(msg: impl Into<String>) -> Self {
|
||||
Self::Console(backend::AuthError::auth_failed(msg))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<waiters::RegisterError> for AuthErrorImpl {
|
||||
fn from(e: waiters::RegisterError) -> Self {
|
||||
Self::Console(backend::AuthError::from(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<waiters::WaitError> for AuthErrorImpl {
|
||||
fn from(e: waiters::WaitError) -> Self {
|
||||
Self::Console(backend::AuthError::from(e))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[error(transparent)]
|
||||
pub struct AuthError(Box<AuthErrorImpl>);
|
||||
|
||||
impl AuthError {
|
||||
pub fn bad_auth_method(name: impl Into<Box<str>>) -> Self {
|
||||
AuthErrorImpl::BadAuthMethod(name.into()).into()
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Into<AuthErrorImpl>> From<E> for AuthError {
|
||||
fn from(e: E) -> Self {
|
||||
impl<T> From<T> for AuthError
|
||||
where
|
||||
AuthErrorImpl: From<T>,
|
||||
{
|
||||
fn from(e: T) -> Self {
|
||||
Self(Box::new(e.into()))
|
||||
}
|
||||
}
|
||||
@@ -78,14 +75,10 @@ impl UserFacingError for AuthError {
|
||||
fn to_string_client(&self) -> String {
|
||||
use AuthErrorImpl::*;
|
||||
match self.0.as_ref() {
|
||||
Legacy(e) => e.to_string_client(),
|
||||
Link(e) => e.to_string_client(),
|
||||
Console(e) => e.to_string_client(),
|
||||
GetAuthInfo(e) => e.to_string_client(),
|
||||
WakeCompute(e) => e.to_string_client(),
|
||||
Sasl(e) => e.to_string_client(),
|
||||
BadAuthMethod(_) => self.to_string(),
|
||||
MalformedPassword(_) => self.to_string(),
|
||||
MissingProjectName => self.to_string(),
|
||||
_ => "Internal error".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
mod link;
|
||||
mod postgres;
|
||||
|
||||
mod link;
|
||||
pub use link::LinkAuthError;
|
||||
|
||||
mod console;
|
||||
pub use console::{GetAuthInfoError, WakeComputeError};
|
||||
pub mod console;
|
||||
|
||||
mod legacy_console;
|
||||
pub use legacy_console::LegacyAuthError;
|
||||
pub use legacy_console::{AuthError, AuthErrorImpl};
|
||||
|
||||
use crate::{
|
||||
auth::{self, AuthFlow, ClientCredentials},
|
||||
@@ -86,7 +83,7 @@ impl From<DatabaseInfo> for tokio_postgres::Config {
|
||||
/// * However, when we substitute `T` with [`ClientCredentials`],
|
||||
/// this helps us provide the credentials only to those auth
|
||||
/// backends which require them for the authentication process.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum BackendType<T> {
|
||||
/// Legacy Cloud API (V1) + link auth.
|
||||
LegacyConsole(T),
|
||||
|
||||
@@ -13,11 +13,21 @@ use std::future::Future;
|
||||
use thiserror::Error;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
|
||||
const REQUEST_FAILED: &str = "Console request failed";
|
||||
pub type Result<T> = std::result::Result<T, ConsoleAuthError>;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum TransportError {
|
||||
#[error("Console responded with a malformed JSON: {0}")]
|
||||
pub enum ConsoleAuthError {
|
||||
#[error(transparent)]
|
||||
BadProjectName(#[from] auth::credentials::ClientCredsParseError),
|
||||
|
||||
// We shouldn't include the actual secret here.
|
||||
#[error("Bad authentication secret")]
|
||||
BadSecret,
|
||||
|
||||
#[error("Console responded with a malformed compute address: '{0}'")]
|
||||
BadComputeAddress(String),
|
||||
|
||||
#[error("Console responded with a malformed JSON: '{0}'")]
|
||||
BadResponse(#[from] serde_json::Error),
|
||||
|
||||
/// HTTP status (other than 200) returned by the console.
|
||||
@@ -28,72 +38,19 @@ pub enum TransportError {
|
||||
Io(#[from] std::io::Error),
|
||||
}
|
||||
|
||||
impl UserFacingError for TransportError {
|
||||
impl UserFacingError for ConsoleAuthError {
|
||||
fn to_string_client(&self) -> String {
|
||||
use TransportError::*;
|
||||
use ConsoleAuthError::*;
|
||||
match self {
|
||||
HttpStatus(_) => self.to_string(),
|
||||
_ => REQUEST_FAILED.to_owned(),
|
||||
BadProjectName(e) => e.to_string_client(),
|
||||
_ => "Internal error".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helps eliminate graceless `.map_err` calls without introducing another ctor.
|
||||
impl From<reqwest::Error> for TransportError {
|
||||
fn from(e: reqwest::Error) -> Self {
|
||||
io_error(e).into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum GetAuthInfoError {
|
||||
// We shouldn't include the actual secret here.
|
||||
#[error("Console responded with a malformed auth secret")]
|
||||
BadSecret,
|
||||
|
||||
#[error(transparent)]
|
||||
Transport(TransportError),
|
||||
}
|
||||
|
||||
impl UserFacingError for GetAuthInfoError {
|
||||
fn to_string_client(&self) -> String {
|
||||
use GetAuthInfoError::*;
|
||||
match self {
|
||||
BadSecret => REQUEST_FAILED.to_owned(),
|
||||
Transport(e) => e.to_string_client(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Into<TransportError>> From<E> for GetAuthInfoError {
|
||||
fn from(e: E) -> Self {
|
||||
Self::Transport(e.into())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum WakeComputeError {
|
||||
// We shouldn't show users the address even if it's broken.
|
||||
#[error("Console responded with a malformed compute address: {0}")]
|
||||
BadComputeAddress(String),
|
||||
|
||||
#[error(transparent)]
|
||||
Transport(TransportError),
|
||||
}
|
||||
|
||||
impl UserFacingError for WakeComputeError {
|
||||
fn to_string_client(&self) -> String {
|
||||
use WakeComputeError::*;
|
||||
match self {
|
||||
BadComputeAddress(_) => REQUEST_FAILED.to_owned(),
|
||||
Transport(e) => e.to_string_client(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: Into<TransportError>> From<E> for WakeComputeError {
|
||||
fn from(e: E) -> Self {
|
||||
Self::Transport(e.into())
|
||||
impl From<&auth::credentials::ClientCredsParseError> for ConsoleAuthError {
|
||||
fn from(e: &auth::credentials::ClientCredsParseError) -> Self {
|
||||
ConsoleAuthError::BadProjectName(e.clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,7 +95,7 @@ impl<'a> Api<'a> {
|
||||
handle_user(client, &self, Self::get_auth_info, Self::wake_compute).await
|
||||
}
|
||||
|
||||
async fn get_auth_info(&self) -> Result<AuthInfo, GetAuthInfoError> {
|
||||
async fn get_auth_info(&self) -> Result<AuthInfo> {
|
||||
let mut url = self.endpoint.clone();
|
||||
url.path_segments_mut().push("proxy_get_role_secret");
|
||||
url.query_pairs_mut()
|
||||
@@ -148,20 +105,21 @@ impl<'a> Api<'a> {
|
||||
// TODO: use a proper logger
|
||||
println!("cplane request: {url}");
|
||||
|
||||
let resp = reqwest::get(url.into_inner()).await?;
|
||||
let resp = reqwest::get(url.into_inner()).await.map_err(io_error)?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(TransportError::HttpStatus(resp.status()).into());
|
||||
return Err(ConsoleAuthError::HttpStatus(resp.status()));
|
||||
}
|
||||
|
||||
let response: GetRoleSecretResponse = serde_json::from_str(&resp.text().await?)?;
|
||||
let response: GetRoleSecretResponse =
|
||||
serde_json::from_str(&resp.text().await.map_err(io_error)?)?;
|
||||
|
||||
scram::ServerSecret::parse(&response.role_secret)
|
||||
scram::ServerSecret::parse(response.role_secret.as_str())
|
||||
.map(AuthInfo::Scram)
|
||||
.ok_or(GetAuthInfoError::BadSecret)
|
||||
.ok_or(ConsoleAuthError::BadSecret)
|
||||
}
|
||||
|
||||
/// Wake up the compute node and return the corresponding connection info.
|
||||
pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
|
||||
pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg> {
|
||||
let mut url = self.endpoint.clone();
|
||||
url.path_segments_mut().push("proxy_wake_compute");
|
||||
url.query_pairs_mut()
|
||||
@@ -170,16 +128,17 @@ impl<'a> Api<'a> {
|
||||
// TODO: use a proper logger
|
||||
println!("cplane request: {url}");
|
||||
|
||||
let resp = reqwest::get(url.into_inner()).await?;
|
||||
let resp = reqwest::get(url.into_inner()).await.map_err(io_error)?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(TransportError::HttpStatus(resp.status()).into());
|
||||
return Err(ConsoleAuthError::HttpStatus(resp.status()));
|
||||
}
|
||||
|
||||
let response: GetWakeComputeResponse = serde_json::from_str(&resp.text().await?)?;
|
||||
let response: GetWakeComputeResponse =
|
||||
serde_json::from_str(&resp.text().await.map_err(io_error)?)?;
|
||||
|
||||
// Unfortunately, ownership won't let us use `Option::ok_or` here.
|
||||
let (host, port) = match parse_host_port(&response.address) {
|
||||
None => return Err(WakeComputeError::BadComputeAddress(response.address)),
|
||||
None => return Err(ConsoleAuthError::BadComputeAddress(response.address)),
|
||||
Some(x) => x,
|
||||
};
|
||||
|
||||
@@ -203,8 +162,8 @@ pub(super) async fn handle_user<'a, Endpoint, GetAuthInfo, WakeCompute>(
|
||||
wake_compute: impl FnOnce(&'a Endpoint) -> WakeCompute,
|
||||
) -> auth::Result<compute::NodeInfo>
|
||||
where
|
||||
GetAuthInfo: Future<Output = Result<AuthInfo, GetAuthInfoError>>,
|
||||
WakeCompute: Future<Output = Result<ComputeConnCfg, WakeComputeError>>,
|
||||
GetAuthInfo: Future<Output = Result<AuthInfo>>,
|
||||
WakeCompute: Future<Output = Result<ComputeConnCfg>>,
|
||||
{
|
||||
let auth_info = get_auth_info(endpoint).await?;
|
||||
|
||||
@@ -212,7 +171,7 @@ where
|
||||
let scram_keys = match auth_info {
|
||||
AuthInfo::Md5(_) => {
|
||||
// TODO: decide if we should support MD5 in api v2
|
||||
return Err(auth::AuthError::bad_auth_method("MD5"));
|
||||
return Err(auth::AuthErrorImpl::auth_failed("MD5 is not supported").into());
|
||||
}
|
||||
AuthInfo::Scram(secret) => {
|
||||
let scram = auth::Scram(&secret);
|
||||
|
||||
@@ -14,7 +14,7 @@ use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use utils::pq_proto::BeMessage as Be;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum LegacyAuthError {
|
||||
pub enum AuthErrorImpl {
|
||||
/// Authentication error reported by the console.
|
||||
#[error("Authentication failed: {0}")]
|
||||
AuthFailed(String),
|
||||
@@ -24,7 +24,7 @@ pub enum LegacyAuthError {
|
||||
HttpStatus(reqwest::StatusCode),
|
||||
|
||||
#[error("Console responded with a malformed JSON: {0}")]
|
||||
BadResponse(#[from] serde_json::Error),
|
||||
MalformedResponse(#[from] serde_json::Error),
|
||||
|
||||
#[error(transparent)]
|
||||
Transport(#[from] reqwest::Error),
|
||||
@@ -36,10 +36,30 @@ pub enum LegacyAuthError {
|
||||
WaiterWait(#[from] waiters::WaitError),
|
||||
}
|
||||
|
||||
impl UserFacingError for LegacyAuthError {
|
||||
#[derive(Debug, Error)]
|
||||
#[error(transparent)]
|
||||
pub struct AuthError(Box<AuthErrorImpl>);
|
||||
|
||||
impl AuthError {
|
||||
/// Smart constructor for authentication error reported by `mgmt`.
|
||||
pub fn auth_failed(msg: impl Into<String>) -> Self {
|
||||
Self(Box::new(AuthErrorImpl::AuthFailed(msg.into())))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<T> for AuthError
|
||||
where
|
||||
AuthErrorImpl: From<T>,
|
||||
{
|
||||
fn from(e: T) -> Self {
|
||||
Self(Box::new(e.into()))
|
||||
}
|
||||
}
|
||||
|
||||
impl UserFacingError for AuthError {
|
||||
fn to_string_client(&self) -> String {
|
||||
use LegacyAuthError::*;
|
||||
match self {
|
||||
use AuthErrorImpl::*;
|
||||
match self.0.as_ref() {
|
||||
AuthFailed(_) | HttpStatus(_) => self.to_string(),
|
||||
_ => "Internal error".to_string(),
|
||||
}
|
||||
@@ -68,7 +88,7 @@ async fn authenticate_proxy_client(
|
||||
md5_response: &str,
|
||||
salt: &[u8; 4],
|
||||
psql_session_id: &str,
|
||||
) -> Result<DatabaseInfo, LegacyAuthError> {
|
||||
) -> Result<DatabaseInfo, AuthError> {
|
||||
let mut url = auth_endpoint.clone();
|
||||
url.query_pairs_mut()
|
||||
.append_pair("login", &creds.user)
|
||||
@@ -82,17 +102,17 @@ async fn authenticate_proxy_client(
|
||||
// TODO: leverage `reqwest::Client` to reuse connections
|
||||
let resp = reqwest::get(url).await?;
|
||||
if !resp.status().is_success() {
|
||||
return Err(LegacyAuthError::HttpStatus(resp.status()));
|
||||
return Err(AuthErrorImpl::HttpStatus(resp.status()).into());
|
||||
}
|
||||
|
||||
let auth_info = serde_json::from_str(resp.text().await?.as_str())?;
|
||||
let auth_info: ProxyAuthResponse = serde_json::from_str(resp.text().await?.as_str())?;
|
||||
println!("got auth info: {:?}", auth_info);
|
||||
|
||||
use ProxyAuthResponse::*;
|
||||
let db_info = match auth_info {
|
||||
Ready { conn_info } => conn_info,
|
||||
Error { error } => return Err(LegacyAuthError::AuthFailed(error)),
|
||||
NotReady { .. } => waiter.await?.map_err(LegacyAuthError::AuthFailed)?,
|
||||
Error { error } => return Err(AuthErrorImpl::AuthFailed(error).into()),
|
||||
NotReady { .. } => waiter.await?.map_err(AuthErrorImpl::AuthFailed)?,
|
||||
};
|
||||
|
||||
Ok(db_info)
|
||||
@@ -104,7 +124,7 @@ async fn handle_existing_user(
|
||||
auth_endpoint: &reqwest::Url,
|
||||
client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
|
||||
creds: &ClientCredentials,
|
||||
) -> auth::Result<compute::NodeInfo> {
|
||||
) -> Result<compute::NodeInfo, auth::AuthError> {
|
||||
let psql_session_id = super::link::new_psql_session_id();
|
||||
let md5_salt = rand::random();
|
||||
|
||||
|
||||
@@ -1,34 +1,7 @@
|
||||
use crate::{auth, compute, error::UserFacingError, stream::PqStream, waiters};
|
||||
use thiserror::Error;
|
||||
use crate::{auth, compute, stream::PqStream};
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use utils::pq_proto::{BeMessage as Be, BeParameterStatusMessage};
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum LinkAuthError {
|
||||
/// Authentication error reported by the console.
|
||||
#[error("Authentication failed: {0}")]
|
||||
AuthFailed(String),
|
||||
|
||||
#[error(transparent)]
|
||||
WaiterRegister(#[from] waiters::RegisterError),
|
||||
|
||||
#[error(transparent)]
|
||||
WaiterWait(#[from] waiters::WaitError),
|
||||
|
||||
#[error(transparent)]
|
||||
Io(#[from] std::io::Error),
|
||||
}
|
||||
|
||||
impl UserFacingError for LinkAuthError {
|
||||
fn to_string_client(&self) -> String {
|
||||
use LinkAuthError::*;
|
||||
match self {
|
||||
AuthFailed(_) => self.to_string(),
|
||||
_ => "Internal error".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn hello_message(redirect_uri: &str, session_id: &str) -> String {
|
||||
format!(
|
||||
concat![
|
||||
@@ -61,7 +34,7 @@ pub async fn handle_user(
|
||||
.await?;
|
||||
|
||||
// Wait for web console response (see `mgmt`)
|
||||
waiter.await?.map_err(LinkAuthError::AuthFailed)
|
||||
waiter.await?.map_err(auth::AuthErrorImpl::auth_failed)
|
||||
})
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
use crate::{
|
||||
auth::{
|
||||
self,
|
||||
backend::console::{self, AuthInfo, GetAuthInfoError, TransportError, WakeComputeError},
|
||||
backend::console::{self, AuthInfo, Result},
|
||||
ClientCredentials,
|
||||
},
|
||||
compute::{self, ComputeConnCfg},
|
||||
@@ -20,13 +20,6 @@ pub(super) struct Api<'a> {
|
||||
creds: &'a ClientCredentials,
|
||||
}
|
||||
|
||||
// Helps eliminate graceless `.map_err` calls without introducing another ctor.
|
||||
impl From<tokio_postgres::Error> for TransportError {
|
||||
fn from(e: tokio_postgres::Error) -> Self {
|
||||
io_error(e).into()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Api<'a> {
|
||||
/// Construct an API object containing the auth parameters.
|
||||
pub(super) fn new(endpoint: &'a ApiUrl, creds: &'a ClientCredentials) -> Self {
|
||||
@@ -43,16 +36,21 @@ impl<'a> Api<'a> {
|
||||
}
|
||||
|
||||
/// This implementation fetches the auth info from a local postgres instance.
|
||||
async fn get_auth_info(&self) -> Result<AuthInfo, GetAuthInfoError> {
|
||||
async fn get_auth_info(&self) -> Result<AuthInfo> {
|
||||
// Perhaps we could persist this connection, but then we'd have to
|
||||
// write more code for reopening it if it got closed, which doesn't
|
||||
// seem worth it.
|
||||
let (client, connection) =
|
||||
tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;
|
||||
tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls)
|
||||
.await
|
||||
.map_err(io_error)?;
|
||||
|
||||
tokio::spawn(connection);
|
||||
let query = "select rolpassword from pg_catalog.pg_authid where rolname = $1";
|
||||
let rows = client.query(query, &[&self.creds.user]).await?;
|
||||
let rows = client
|
||||
.query(query, &[&self.creds.user])
|
||||
.await
|
||||
.map_err(io_error)?;
|
||||
|
||||
match &rows[..] {
|
||||
// We can't get a secret if there's no such user.
|
||||
@@ -76,13 +74,13 @@ impl<'a> Api<'a> {
|
||||
}))
|
||||
})
|
||||
// Putting the secret into this message is a security hazard!
|
||||
.ok_or(GetAuthInfoError::BadSecret)
|
||||
.ok_or(console::ConsoleAuthError::BadSecret)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We don't need to wake anything locally, so we just return the connection info.
|
||||
pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg, WakeComputeError> {
|
||||
pub(super) async fn wake_compute(&self) -> Result<ComputeConnCfg> {
|
||||
let mut config = ComputeConnCfg::new();
|
||||
config
|
||||
.host(self.endpoint.host_str().unwrap_or("localhost"))
|
||||
|
||||
@@ -75,12 +75,13 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, PasswordHack> {
|
||||
.strip_suffix(&[0])
|
||||
.ok_or(AuthErrorImpl::MalformedPassword("missing terminator"))?;
|
||||
|
||||
let payload = PasswordHackPayload::parse(password)
|
||||
// If we ended up here and the payload is malformed, it means that
|
||||
// the user neither enabled SNI nor resorted to any other method
|
||||
// for passing the project name we rely on. We should show them
|
||||
// the most helpful error message and point to the documentation.
|
||||
.ok_or(AuthErrorImpl::MissingProjectName)?;
|
||||
// The so-called "password" should contain a base64-encoded json.
|
||||
// We will use it later to route the client to their project.
|
||||
let bytes = base64::decode(password)
|
||||
.map_err(|_| AuthErrorImpl::MalformedPassword("bad encoding"))?;
|
||||
|
||||
let payload = serde_json::from_slice(&bytes)
|
||||
.map_err(|_| AuthErrorImpl::MalformedPassword("invalid payload"))?;
|
||||
|
||||
Ok(payload)
|
||||
}
|
||||
@@ -97,7 +98,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
|
||||
|
||||
// Currently, the only supported SASL method is SCRAM.
|
||||
if !scram::METHODS.contains(&sasl.method) {
|
||||
return Err(super::AuthError::bad_auth_method(sasl.method));
|
||||
return Err(AuthErrorImpl::auth_failed("method not supported").into());
|
||||
}
|
||||
|
||||
let secret = self.state.0;
|
||||
|
||||
@@ -1,46 +1,102 @@
|
||||
//! Payload for ad hoc authentication method for clients that don't support SNI.
|
||||
//! See the `impl` for [`super::backend::BackendType<ClientCredentials>`].
|
||||
//! Read more: <https://github.com/neondatabase/cloud/issues/1620#issuecomment-1165332290>.
|
||||
//! UPDATE (Mon Aug 8 13:20:34 UTC 2022): the payload format has been simplified.
|
||||
|
||||
use bstr::ByteSlice;
|
||||
use serde::{de, Deserialize, Deserializer};
|
||||
use std::fmt;
|
||||
|
||||
pub struct PasswordHackPayload {
|
||||
pub project: String,
|
||||
pub password: Vec<u8>,
|
||||
#[derive(Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum Password {
|
||||
/// A regular string for utf-8 encoded passwords.
|
||||
Simple { password: String },
|
||||
|
||||
/// Password is base64-encoded because it may contain arbitrary byte sequences.
|
||||
Encoded {
|
||||
#[serde(rename = "password_", deserialize_with = "deserialize_base64")]
|
||||
password: Vec<u8>,
|
||||
},
|
||||
}
|
||||
|
||||
impl PasswordHackPayload {
|
||||
pub fn parse(bytes: &[u8]) -> Option<Self> {
|
||||
// The format is `project=<utf-8>;<password-bytes>`.
|
||||
let mut iter = bytes.strip_prefix(b"project=")?.splitn_str(2, ";");
|
||||
let project = iter.next()?.to_str().ok()?.to_owned();
|
||||
let password = iter.next()?.to_owned();
|
||||
|
||||
Some(Self { project, password })
|
||||
impl AsRef<[u8]> for Password {
|
||||
fn as_ref(&self) -> &[u8] {
|
||||
match self {
|
||||
Password::Simple { password } => password.as_ref(),
|
||||
Password::Encoded { password } => password.as_ref(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct PasswordHackPayload {
|
||||
pub project: String,
|
||||
|
||||
#[serde(flatten)]
|
||||
pub password: Password,
|
||||
}
|
||||
|
||||
fn deserialize_base64<'a, D: Deserializer<'a>>(des: D) -> Result<Vec<u8>, D::Error> {
|
||||
// It's very tempting to replace this with
|
||||
//
|
||||
// ```
|
||||
// let base64: &str = Deserialize::deserialize(des)?;
|
||||
// base64::decode(base64).map_err(serde::de::Error::custom)
|
||||
// ```
|
||||
//
|
||||
// Unfortunately, we can't always deserialize into `&str`, so we'd
|
||||
// have to use an allocating `String` instead. Thus, visitor is better.
|
||||
struct Visitor;
|
||||
|
||||
impl<'de> de::Visitor<'de> for Visitor {
|
||||
type Value = Vec<u8>;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("a string")
|
||||
}
|
||||
|
||||
fn visit_str<E: de::Error>(self, v: &str) -> Result<Self::Value, E> {
|
||||
base64::decode(v).map_err(de::Error::custom)
|
||||
}
|
||||
}
|
||||
|
||||
des.deserialize_str(Visitor)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use rstest::rstest;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn parse_password_hack_payload() {
|
||||
let bytes = b"";
|
||||
assert!(PasswordHackPayload::parse(bytes).is_none());
|
||||
fn parse_password() -> anyhow::Result<()> {
|
||||
let password: Password = serde_json::from_value(json!({
|
||||
"password": "foo",
|
||||
}))?;
|
||||
assert_eq!(password.as_ref(), "foo".as_bytes());
|
||||
|
||||
let bytes = b"project=";
|
||||
assert!(PasswordHackPayload::parse(bytes).is_none());
|
||||
let password: Password = serde_json::from_value(json!({
|
||||
"password_": base64::encode("foo"),
|
||||
}))?;
|
||||
assert_eq!(password.as_ref(), "foo".as_bytes());
|
||||
|
||||
let bytes = b"project=;";
|
||||
let payload = PasswordHackPayload::parse(bytes).expect("parsing failed");
|
||||
assert_eq!(payload.project, "");
|
||||
assert_eq!(payload.password, b"");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
let bytes = b"project=foobar;pass;word";
|
||||
let payload = PasswordHackPayload::parse(bytes).expect("parsing failed");
|
||||
assert_eq!(payload.project, "foobar");
|
||||
assert_eq!(payload.password, b"pass;word");
|
||||
#[rstest]
|
||||
#[case("password", str::to_owned)]
|
||||
#[case("password_", base64::encode)]
|
||||
fn parse(#[case] key: &str, #[case] encode: fn(&'static str) -> String) -> anyhow::Result<()> {
|
||||
let (password, project) = ("password", "pie-in-the-sky");
|
||||
let payload = json!({
|
||||
"project": project,
|
||||
key: encode(password),
|
||||
});
|
||||
|
||||
let payload: PasswordHackPayload = serde_json::from_value(payload)?;
|
||||
assert_eq!(payload.password.as_ref(), password.as_bytes());
|
||||
assert_eq!(payload.project, project);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,17 +65,8 @@ impl NodeInfo {
|
||||
// require for our business.
|
||||
let mut connection_error = None;
|
||||
let ports = self.config.get_ports();
|
||||
let hosts = self.config.get_hosts();
|
||||
// the ports array is supposed to have 0 entries, 1 entry, or as many entries as in the hosts array
|
||||
if ports.len() > 1 && ports.len() != hosts.len() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("couldn't connect: bad compute config, ports and hosts entries' count does not match: {:?}", self.config),
|
||||
));
|
||||
}
|
||||
|
||||
for (i, host) in hosts.iter().enumerate() {
|
||||
let port = ports.get(i).or_else(|| ports.first()).unwrap_or(&5432);
|
||||
for (i, host) in self.config.get_hosts().iter().enumerate() {
|
||||
let port = ports.get(i).or_else(|| ports.get(0)).unwrap_or(&5432);
|
||||
let host = match host {
|
||||
Host::Tcp(host) => host.as_str(),
|
||||
Host::Unix(_) => continue, // unix sockets are not welcome here
|
||||
|
||||
@@ -14,7 +14,7 @@ pub const SCRAM_RAW_NONCE_LEN: usize = 18;
|
||||
fn validate_sasl_extensions<'a>(parts: impl Iterator<Item = &'a str>) -> Option<()> {
|
||||
for mut chars in parts.map(|s| s.chars()) {
|
||||
let attr = chars.next()?;
|
||||
if !('a'..='z').contains(&attr) && !('A'..='Z').contains(&attr) {
|
||||
if !('a'..'z').contains(&attr) && !('A'..'Z').contains(&attr) {
|
||||
return None;
|
||||
}
|
||||
let eq = chars.next()?;
|
||||
|
||||
@@ -26,7 +26,6 @@ pytest-lazy-fixture = "^0.6.3"
|
||||
prometheus-client = "^0.14.1"
|
||||
pytest-timeout = "^2.1.0"
|
||||
Werkzeug = "2.1.2"
|
||||
pytest-order = "^1.0.1"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
yapf = "==0.31.0"
|
||||
|
||||
@@ -40,7 +40,7 @@ struct SafeKeeperStateV1 {
|
||||
wal_start_lsn: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ServerInfoV2 {
|
||||
/// Postgres server version
|
||||
pub pg_version: u32,
|
||||
@@ -70,7 +70,7 @@ pub struct SafeKeeperStateV2 {
|
||||
pub wal_start_lsn: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ServerInfoV3 {
|
||||
/// Postgres server version
|
||||
pub pg_version: u32,
|
||||
|
||||
@@ -127,7 +127,7 @@ impl AcceptorState {
|
||||
|
||||
/// Information about Postgres. Safekeeper gets it once and then verifies
|
||||
/// all further connections from computes match.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ServerInfo {
|
||||
/// Postgres server version
|
||||
pub pg_version: u32,
|
||||
|
||||
@@ -36,7 +36,7 @@ const NEON_STATUS_UPDATE_TAG_BYTE: u8 = b'z';
|
||||
type FullTransactionId = u64;
|
||||
|
||||
/// Hot standby feedback received from replica
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
|
||||
pub struct HotStandbyFeedback {
|
||||
pub ts: TimestampTz,
|
||||
pub xmin: FullTransactionId,
|
||||
|
||||
@@ -332,7 +332,7 @@ impl Storage for PhysicalStorage {
|
||||
self.write_lsn = if state.commit_lsn == Lsn(0) {
|
||||
Lsn(0)
|
||||
} else {
|
||||
find_end_of_wal(&self.timeline_dir, wal_seg_size, state.commit_lsn)?
|
||||
Lsn(find_end_of_wal(&self.timeline_dir, wal_seg_size, true, state.commit_lsn)?.0)
|
||||
};
|
||||
|
||||
self.write_record_lsn = self.write_lsn;
|
||||
|
||||
@@ -18,10 +18,6 @@ exclude = ^vendor/
|
||||
# some tests don't typecheck when this flag is set
|
||||
check_untyped_defs = false
|
||||
|
||||
# Help mypy find imports when running against list of individual files.
|
||||
# Without this line it would behave differently when executed on the entire project.
|
||||
mypy_path = $MYPY_CONFIG_FILE_DIR:$MYPY_CONFIG_FILE_DIR/test_runner
|
||||
|
||||
disallow_incomplete_defs = false
|
||||
disallow_untyped_calls = false
|
||||
disallow_untyped_decorators = false
|
||||
|
||||
@@ -120,7 +120,10 @@ def test_import_from_pageserver_small(pg_bin: PgBin, neon_env_builder: NeonEnvBu
|
||||
|
||||
|
||||
@pytest.mark.timeout(1800)
|
||||
@pytest.mark.skipif(os.environ.get('BUILD_TYPE') == "debug", reason="only run with release build")
|
||||
# TODO: temporarily disable `test_import_from_pageserver_multisegment` test, enable
|
||||
# the test back after finding the failure cause.
|
||||
# @pytest.mark.skipif(os.environ.get('BUILD_TYPE') == "debug", reason="only run with release build")
|
||||
@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/2255")
|
||||
def test_import_from_pageserver_multisegment(pg_bin: PgBin, neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
neon_env_builder.enable_local_fs_remote_storage()
|
||||
|
||||
@@ -2,6 +2,16 @@ from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.log_helper import log
|
||||
|
||||
|
||||
# Test that the pageserver fixture is implemented correctly, allowing quick restarts.
|
||||
# This is a regression test, see https://github.com/neondatabase/neon/issues/2247
|
||||
def test_fixture_restart(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
for i in range(3):
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
|
||||
# Test restarting page server, while safekeeper and compute node keep
|
||||
# running.
|
||||
def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import pytest
|
||||
import psycopg2
|
||||
import json
|
||||
import base64
|
||||
|
||||
|
||||
def test_proxy_select_1(static_proxy):
|
||||
@@ -12,14 +13,22 @@ def test_password_hack(static_proxy):
|
||||
static_proxy.safe_psql(f"create role {user} with login password '{password}'",
|
||||
options='project=irrelevant')
|
||||
|
||||
# Note the format of `magic`!
|
||||
magic = f"project=irrelevant;{password}"
|
||||
def encode(s: str) -> str:
|
||||
return base64.b64encode(s.encode('utf-8')).decode('utf-8')
|
||||
|
||||
magic = encode(json.dumps({
|
||||
'project': 'irrelevant',
|
||||
'password': password,
|
||||
}))
|
||||
|
||||
static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic)
|
||||
|
||||
# Must also check that invalid magic won't be accepted.
|
||||
with pytest.raises(psycopg2.errors.OperationalError):
|
||||
magic = "broken"
|
||||
static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic)
|
||||
magic = encode(json.dumps({
|
||||
'project': 'irrelevant',
|
||||
'password_': encode(password),
|
||||
}))
|
||||
|
||||
static_proxy.safe_psql('select 1', sslsni=0, user=user, password=magic)
|
||||
|
||||
|
||||
# Pass extra options to the server.
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
"""Tests for the code in test fixtures"""
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
# Test that pageserver and safekeeper can restart quickly.
|
||||
# This is a regression test, see https://github.com/neondatabase/neon/issues/2247
|
||||
def test_fixture_restart(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
for i in range(3):
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
|
||||
for i in range(3):
|
||||
env.safekeepers[0].stop()
|
||||
env.safekeepers[0].start()
|
||||
@@ -1090,9 +1090,11 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
|
||||
|
||||
# Remove initial tenant fully (two branches are active)
|
||||
response = sk_http.tenant_delete_force(tenant_id)
|
||||
assert response[timeline_id_3] == {
|
||||
"dir_existed": True,
|
||||
"was_active": True,
|
||||
assert response == {
|
||||
timeline_id_3: {
|
||||
"dir_existed": True,
|
||||
"was_active": True,
|
||||
}
|
||||
}
|
||||
assert not (sk_data_dir / tenant_id).exists()
|
||||
assert (sk_data_dir / tenant_id_other / timeline_id_other).is_dir()
|
||||
|
||||
@@ -520,68 +520,3 @@ def test_race_conditions(neon_env_builder: NeonEnvBuilder):
|
||||
pg = env.postgres.create_start('test_safekeepers_race_conditions')
|
||||
|
||||
asyncio.run(run_race_conditions(env, pg))
|
||||
|
||||
|
||||
# Check that pageserver can select safekeeper with largest commit_lsn
|
||||
# and switch if LSN is not updated for some time (NoWalTimeout).
|
||||
async def run_wal_lagging(env: NeonEnv, pg: Postgres):
|
||||
def safekeepers_guc(env: NeonEnv, active_sk: List[bool]) -> str:
|
||||
# use ports 10, 11 and 12 to simulate unavailable safekeepers
|
||||
return ','.join([
|
||||
f'localhost:{sk.port.pg if active else 10 + i}'
|
||||
for i, (sk, active) in enumerate(zip(env.safekeepers, active_sk))
|
||||
])
|
||||
|
||||
conn = await pg.connect_async()
|
||||
await conn.execute('CREATE TABLE t(key int primary key, value text)')
|
||||
await conn.close()
|
||||
pg.stop()
|
||||
|
||||
n_iterations = 20
|
||||
n_txes = 10000
|
||||
expected_sum = 0
|
||||
i = 1
|
||||
quorum = len(env.safekeepers) // 2 + 1
|
||||
|
||||
for it in range(n_iterations):
|
||||
active_sk = list(map(lambda _: random.random() >= 0.5, env.safekeepers))
|
||||
active_count = sum(active_sk)
|
||||
|
||||
if active_count < quorum:
|
||||
it -= 1
|
||||
continue
|
||||
|
||||
pg.adjust_for_safekeepers(safekeepers_guc(env, active_sk))
|
||||
log.info(f'Iteration {it}: {active_sk}')
|
||||
|
||||
pg.start()
|
||||
conn = await pg.connect_async()
|
||||
|
||||
for _ in range(n_txes):
|
||||
await conn.execute(f"INSERT INTO t values ({i}, 'payload')")
|
||||
expected_sum += i
|
||||
i += 1
|
||||
|
||||
await conn.close()
|
||||
pg.stop()
|
||||
|
||||
pg.adjust_for_safekeepers(safekeepers_guc(env, [True] * len(env.safekeepers)))
|
||||
pg.start()
|
||||
conn = await pg.connect_async()
|
||||
|
||||
log.info(f'Executed {i-1} queries')
|
||||
|
||||
res = await conn.fetchval('SELECT sum(key) FROM t')
|
||||
assert res == expected_sum
|
||||
|
||||
|
||||
# do inserts while restarting postgres and messing with safekeeper addresses
|
||||
def test_wal_lagging(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
neon_env_builder.num_safekeepers = 3
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
env.neon_cli.create_branch('test_wal_lagging')
|
||||
pg = env.postgres.create_start('test_wal_lagging')
|
||||
|
||||
asyncio.run(run_wal_lagging(env, pg))
|
||||
|
||||
@@ -1,21 +1,23 @@
|
||||
import calendar
|
||||
import dataclasses
|
||||
import enum
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import timeit
|
||||
import uuid
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
# Type-related stuff
|
||||
from typing import Iterator, Optional
|
||||
|
||||
import re
|
||||
import subprocess
|
||||
import timeit
|
||||
import calendar
|
||||
import enum
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
import pytest
|
||||
from _pytest.config import Config
|
||||
from _pytest.terminal import TerminalReporter
|
||||
import warnings
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
# Type-related stuff
|
||||
from typing import Iterator, Optional
|
||||
"""
|
||||
This file contains fixtures for micro-benchmarks.
|
||||
|
||||
@@ -75,7 +77,7 @@ class PgBenchRunResult:
|
||||
|
||||
# we know significant parts of these values from test input
|
||||
# but to be precise take them from output
|
||||
for line in stdout_lines:
|
||||
for line in stdout.splitlines():
|
||||
# scaling factor: 5
|
||||
if line.startswith("scaling factor:"):
|
||||
scale = int(line.split()[-1])
|
||||
@@ -129,58 +131,6 @@ class PgBenchRunResult:
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PgBenchInitResult:
|
||||
total: float
|
||||
drop_tables: Optional[float]
|
||||
create_tables: Optional[float]
|
||||
client_side_generate: Optional[float]
|
||||
vacuum: Optional[float]
|
||||
primary_keys: Optional[float]
|
||||
duration: float
|
||||
start_timestamp: int
|
||||
end_timestamp: int
|
||||
|
||||
@classmethod
|
||||
def parse_from_stderr(
|
||||
cls,
|
||||
stderr: str,
|
||||
duration: float,
|
||||
start_timestamp: int,
|
||||
end_timestamp: int,
|
||||
):
|
||||
# Parses pgbench initialize output for default initialization steps (dtgvp)
|
||||
# Example: done in 5.66 s (drop tables 0.05 s, create tables 0.31 s, client-side generate 2.01 s, vacuum 0.53 s, primary keys 0.38 s).
|
||||
|
||||
last_line = stderr.splitlines()[-1]
|
||||
|
||||
regex = re.compile(r"done in (\d+\.\d+) s "
|
||||
r"\("
|
||||
r"(?:drop tables (\d+\.\d+) s)?(?:, )?"
|
||||
r"(?:create tables (\d+\.\d+) s)?(?:, )?"
|
||||
r"(?:client-side generate (\d+\.\d+) s)?(?:, )?"
|
||||
r"(?:vacuum (\d+\.\d+) s)?(?:, )?"
|
||||
r"(?:primary keys (\d+\.\d+) s)?(?:, )?"
|
||||
r"\)\.")
|
||||
|
||||
if (m := regex.match(last_line)) is not None:
|
||||
total, drop_tables, create_tables, client_side_generate, vacuum, primary_keys = [float(v) for v in m.groups() if v is not None]
|
||||
else:
|
||||
raise RuntimeError(f"can't parse pgbench initialize results from `{last_line}`")
|
||||
|
||||
return cls(
|
||||
total=total,
|
||||
drop_tables=drop_tables,
|
||||
create_tables=create_tables,
|
||||
client_side_generate=client_side_generate,
|
||||
vacuum=vacuum,
|
||||
primary_keys=primary_keys,
|
||||
duration=duration,
|
||||
start_timestamp=start_timestamp,
|
||||
end_timestamp=end_timestamp,
|
||||
)
|
||||
|
||||
|
||||
@enum.unique
|
||||
class MetricReport(str, enum.Enum): # str is a hack to make it json serializable
|
||||
# this means that this is a constant test parameter
|
||||
@@ -282,32 +232,6 @@ class NeonBenchmarker:
|
||||
'',
|
||||
MetricReport.TEST_PARAM)
|
||||
|
||||
def record_pg_bench_init_result(self, prefix: str, result: PgBenchInitResult):
|
||||
test_params = [
|
||||
"start_timestamp",
|
||||
"end_timestamp",
|
||||
]
|
||||
for test_param in test_params:
|
||||
self.record(f"{prefix}.{test_param}",
|
||||
getattr(result, test_param),
|
||||
'',
|
||||
MetricReport.TEST_PARAM)
|
||||
|
||||
metrics = [
|
||||
"duration",
|
||||
"drop_tables",
|
||||
"create_tables",
|
||||
"client_side_generate",
|
||||
"vacuum",
|
||||
"primary_keys",
|
||||
]
|
||||
for metric in metrics:
|
||||
if (value := getattr(result, metric)) is not None:
|
||||
self.record(f"{prefix}.{metric}",
|
||||
value,
|
||||
unit="s",
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
|
||||
def get_io_writes(self, pageserver) -> int:
|
||||
"""
|
||||
Fetch the "cumulative # of bytes written" metric from the pageserver
|
||||
|
||||
@@ -1488,6 +1488,17 @@ class NeonPageserver(PgProtocol):
|
||||
self.running = True
|
||||
return self
|
||||
|
||||
def _wait_for_death(self):
|
||||
"""Wait for pageserver to die. Assumes kill signal is sent."""
|
||||
pid_path = pathlib.Path(self.env.repo_dir) / "pageserver.pid"
|
||||
pid = read_pid(pid_path)
|
||||
retries_left = 20
|
||||
while check_pid(pid):
|
||||
time.sleep(0.2)
|
||||
retries_left -= 1
|
||||
if retries_left == 0:
|
||||
raise AssertionError("Pageserver failed to die")
|
||||
|
||||
def stop(self, immediate=False) -> 'NeonPageserver':
|
||||
"""
|
||||
Stop the page server.
|
||||
@@ -1495,6 +1506,7 @@ class NeonPageserver(PgProtocol):
|
||||
"""
|
||||
if self.running:
|
||||
self.env.neon_cli.pageserver_stop(immediate)
|
||||
self._wait_for_death()
|
||||
self.running = False
|
||||
return self
|
||||
|
||||
@@ -2004,6 +2016,17 @@ def read_pid(path: Path) -> int:
|
||||
return int(path.read_text())
|
||||
|
||||
|
||||
def check_pid(pid):
|
||||
"""Check whether pid is running."""
|
||||
try:
|
||||
# If sig is 0, then no signal is sent, but error checking is still performed.
|
||||
os.kill(pid, 0)
|
||||
except OSError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
@dataclass
|
||||
class SafekeeperPort:
|
||||
pg: int
|
||||
@@ -2440,7 +2463,7 @@ def wait_for_upload(pageserver_http_client: NeonPageserverHttpClient,
|
||||
timeline: uuid.UUID,
|
||||
lsn: int):
|
||||
"""waits for local timeline upload up to specified lsn"""
|
||||
for i in range(20):
|
||||
for i in range(10):
|
||||
current_lsn = remote_consistent_lsn(pageserver_http_client, tenant, timeline)
|
||||
if current_lsn >= lsn:
|
||||
return
|
||||
|
||||
@@ -32,16 +32,10 @@ def subprocess_capture(capture_dir: str, cmd: List[str], **kwargs: Any) -> str:
|
||||
stdout_filename = basepath + '.stdout'
|
||||
stderr_filename = basepath + '.stderr'
|
||||
|
||||
try:
|
||||
with open(stdout_filename, 'w') as stdout_f:
|
||||
with open(stderr_filename, 'w') as stderr_f:
|
||||
log.info(f'Capturing stdout to "{base}.stdout" and stderr to "{base}.stderr"')
|
||||
subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
|
||||
finally:
|
||||
# Remove empty files if there is no output
|
||||
for filename in (stdout_filename, stderr_filename):
|
||||
if os.stat(filename).st_size == 0:
|
||||
os.remove(filename)
|
||||
with open(stdout_filename, 'w') as stdout_f:
|
||||
with open(stderr_filename, 'w') as stderr_f:
|
||||
log.info('(capturing output to "{}.stdout")'.format(base))
|
||||
subprocess.run(cmd, **kwargs, stdout=stdout_f, stderr=stderr_f)
|
||||
|
||||
return basepath
|
||||
|
||||
@@ -146,12 +140,3 @@ def parse_delta_layer(f_name: str) -> Tuple[int, int, int, int]:
|
||||
key_parts = parts[0].split("-")
|
||||
lsn_parts = parts[1].split("-")
|
||||
return int(key_parts[0], 16), int(key_parts[1], 16), int(lsn_parts[0], 16), int(lsn_parts[1], 16)
|
||||
|
||||
|
||||
def get_scale_for_db(size_mb: int) -> int:
|
||||
"""Returns pgbench scale factor for given target db size in MB.
|
||||
|
||||
Ref https://www.cybertec-postgresql.com/en/a-formula-to-calculate-pgbench-scaling-factor-for-target-db-size/
|
||||
"""
|
||||
|
||||
return round(0.06689 * size_mb - 0.5)
|
||||
|
||||
@@ -10,7 +10,7 @@ In the CI, the performance tests are run in the same environment as the other in
|
||||
|
||||
## Remote tests
|
||||
|
||||
There are a few tests that marked with `pytest.mark.remote_cluster`. These tests do not set up a local environment, and instead require a libpq connection string to connect to. So they can be run on any Postgres compatible database. Currently, the CI runs these tests on our staging and captest environments daily. Those are not an isolated environments, so there can be noise in the results due to activity of other clusters.
|
||||
There are a few tests that marked with `pytest.mark.remote_cluster`. These tests do not set up a local environment, and instead require a libpq connection string to connect to. So they can be run on any Postgres compatible database. Currently, the CI runs these tests our staging environment daily. Staging is not an isolated environment, so there can be noise in the results due to activity of other clusters.
|
||||
|
||||
## Noise
|
||||
|
||||
|
||||
@@ -1,23 +1,17 @@
|
||||
import calendar
|
||||
import enum
|
||||
import os
|
||||
import timeit
|
||||
from datetime import datetime
|
||||
from contextlib import closing
|
||||
from fixtures.neon_fixtures import PgBin, VanillaPostgres, NeonEnv, profiling_supported
|
||||
from fixtures.compare_fixtures import PgCompare, VanillaCompare, NeonCompare
|
||||
|
||||
from fixtures.benchmark_fixture import PgBenchRunResult, MetricReport, NeonBenchmarker
|
||||
from fixtures.log_helper import log
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
from fixtures.benchmark_fixture import MetricReport, PgBenchInitResult, PgBenchRunResult
|
||||
from fixtures.compare_fixtures import NeonCompare, PgCompare
|
||||
from fixtures.neon_fixtures import profiling_supported
|
||||
from fixtures.utils import get_scale_for_db
|
||||
|
||||
|
||||
@enum.unique
|
||||
class PgBenchLoadType(enum.Enum):
|
||||
INIT = "init"
|
||||
SIMPLE_UPDATE = "simple_update"
|
||||
SELECT_ONLY = "select-only"
|
||||
from datetime import datetime
|
||||
import calendar
|
||||
import os
|
||||
import timeit
|
||||
|
||||
|
||||
def utc_now_timestamp() -> int:
|
||||
@@ -28,24 +22,23 @@ def init_pgbench(env: PgCompare, cmdline):
|
||||
# calculate timestamps and durations separately
|
||||
# timestamp is intended to be used for linking to grafana and logs
|
||||
# duration is actually a metric and uses float instead of int for timestamp
|
||||
start_timestamp = utc_now_timestamp()
|
||||
init_start_timestamp = utc_now_timestamp()
|
||||
t0 = timeit.default_timer()
|
||||
with env.record_pageserver_writes('init.pageserver_writes'):
|
||||
out = env.pg_bin.run_capture(cmdline)
|
||||
env.pg_bin.run_capture(cmdline)
|
||||
env.flush()
|
||||
init_duration = timeit.default_timer() - t0
|
||||
init_end_timestamp = utc_now_timestamp()
|
||||
|
||||
duration = timeit.default_timer() - t0
|
||||
end_timestamp = utc_now_timestamp()
|
||||
|
||||
stderr = Path(f"{out}.stderr").read_text()
|
||||
|
||||
res = PgBenchInitResult.parse_from_stderr(
|
||||
stderr=stderr,
|
||||
duration=duration,
|
||||
start_timestamp=start_timestamp,
|
||||
end_timestamp=end_timestamp,
|
||||
)
|
||||
env.zenbenchmark.record_pg_bench_init_result("init", res)
|
||||
env.zenbenchmark.record("init.duration",
|
||||
init_duration,
|
||||
unit="s",
|
||||
report=MetricReport.LOWER_IS_BETTER)
|
||||
env.zenbenchmark.record("init.start_timestamp",
|
||||
init_start_timestamp,
|
||||
'',
|
||||
MetricReport.TEST_PARAM)
|
||||
env.zenbenchmark.record("init.end_timestamp", init_end_timestamp, '', MetricReport.TEST_PARAM)
|
||||
|
||||
|
||||
def run_pgbench(env: PgCompare, prefix: str, cmdline):
|
||||
@@ -77,84 +70,38 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline):
|
||||
# the test database.
|
||||
#
|
||||
# Currently, the # of connections is hardcoded at 4
|
||||
def run_test_pgbench(env: PgCompare, scale: int, duration: int, workload_type: PgBenchLoadType):
|
||||
def run_test_pgbench(env: PgCompare, scale: int, duration: int):
|
||||
|
||||
# Record the scale and initialize
|
||||
env.zenbenchmark.record("scale", scale, '', MetricReport.TEST_PARAM)
|
||||
init_pgbench(env, ['pgbench', f'-s{scale}', '-i', env.pg.connstr()])
|
||||
|
||||
if workload_type == PgBenchLoadType.INIT:
|
||||
# Run initialize
|
||||
init_pgbench(
|
||||
env, ['pgbench', f'-s{scale}', '-i', env.pg.connstr(options='-cstatement_timeout=1h')])
|
||||
# Run simple-update workload
|
||||
run_pgbench(env,
|
||||
"simple-update", ['pgbench', '-N', '-c4', f'-T{duration}', '-P2', env.pg.connstr()])
|
||||
|
||||
if workload_type == PgBenchLoadType.SIMPLE_UPDATE:
|
||||
# Run simple-update workload
|
||||
run_pgbench(env,
|
||||
"simple-update",
|
||||
[
|
||||
'pgbench',
|
||||
'-N',
|
||||
'-c4',
|
||||
f'-T{duration}',
|
||||
'-P2',
|
||||
'--progress-timestamp',
|
||||
env.pg.connstr(),
|
||||
])
|
||||
|
||||
if workload_type == PgBenchLoadType.SELECT_ONLY:
|
||||
# Run SELECT workload
|
||||
run_pgbench(env,
|
||||
"select-only",
|
||||
[
|
||||
'pgbench',
|
||||
'-S',
|
||||
'-c4',
|
||||
f'-T{duration}',
|
||||
'-P2',
|
||||
'--progress-timestamp',
|
||||
env.pg.connstr(),
|
||||
])
|
||||
# Run SELECT workload
|
||||
run_pgbench(env,
|
||||
"select-only", ['pgbench', '-S', '-c4', f'-T{duration}', '-P2', env.pg.connstr()])
|
||||
|
||||
env.report_size()
|
||||
|
||||
|
||||
def get_durations_matrix(default: int = 45) -> List[int]:
|
||||
def get_durations_matrix(default: int = 45):
|
||||
durations = os.getenv("TEST_PG_BENCH_DURATIONS_MATRIX", default=str(default))
|
||||
rv = []
|
||||
for d in durations.split(","):
|
||||
d = d.strip().lower()
|
||||
if d.endswith('h'):
|
||||
duration = int(d.removesuffix('h')) * 60 * 60
|
||||
elif d.endswith('m'):
|
||||
duration = int(d.removesuffix('m')) * 60
|
||||
else:
|
||||
duration = int(d.removesuffix('s'))
|
||||
rv.append(duration)
|
||||
|
||||
return rv
|
||||
return list(map(int, durations.split(",")))
|
||||
|
||||
|
||||
def get_scales_matrix(default: int = 10) -> List[int]:
|
||||
def get_scales_matrix(default: int = 10):
|
||||
scales = os.getenv("TEST_PG_BENCH_SCALES_MATRIX", default=str(default))
|
||||
rv = []
|
||||
for s in scales.split(","):
|
||||
s = s.strip().lower()
|
||||
if s.endswith('mb'):
|
||||
scale = get_scale_for_db(int(s.removesuffix('mb')))
|
||||
elif s.endswith('gb'):
|
||||
scale = get_scale_for_db(int(s.removesuffix('gb')) * 1024)
|
||||
else:
|
||||
scale = int(s)
|
||||
rv.append(scale)
|
||||
|
||||
return rv
|
||||
return list(map(int, scales.split(",")))
|
||||
|
||||
|
||||
# Run the pgbench tests against vanilla Postgres and neon
|
||||
@pytest.mark.parametrize("scale", get_scales_matrix())
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
def test_pgbench(neon_with_baseline: PgCompare, scale: int, duration: int):
|
||||
run_test_pgbench(neon_with_baseline, scale, duration, PgBenchLoadType.INIT)
|
||||
run_test_pgbench(neon_with_baseline, scale, duration, PgBenchLoadType.SIMPLE_UPDATE)
|
||||
run_test_pgbench(neon_with_baseline, scale, duration, PgBenchLoadType.SELECT_ONLY)
|
||||
run_test_pgbench(neon_with_baseline, scale, duration)
|
||||
|
||||
|
||||
# Run the pgbench tests, and generate a flamegraph from it
|
||||
@@ -176,34 +123,12 @@ profiling="page_requests"
|
||||
env = neon_env_builder.init_start()
|
||||
env.neon_cli.create_branch("empty", "main")
|
||||
|
||||
neon_compare = NeonCompare(zenbenchmark, env, pg_bin, "pgbench")
|
||||
run_test_pgbench(neon_compare, scale, duration, PgBenchLoadType.INIT)
|
||||
run_test_pgbench(neon_compare, scale, duration, PgBenchLoadType.SIMPLE_UPDATE)
|
||||
run_test_pgbench(neon_compare, scale, duration, PgBenchLoadType.SELECT_ONLY)
|
||||
run_test_pgbench(NeonCompare(zenbenchmark, env, pg_bin, "pgbench"), scale, duration)
|
||||
|
||||
|
||||
# The following 3 tests run on an existing database as it was set up by previous tests,
|
||||
# and leaves the database in a state that would be used in the next tests.
|
||||
# Modifying the definition order of these functions or adding other remote tests in between will alter results.
|
||||
# See usage of --sparse-ordering flag in the pytest invocation in the CI workflow
|
||||
#
|
||||
# Run the pgbench tests against an existing Postgres cluster
|
||||
@pytest.mark.parametrize("scale", get_scales_matrix())
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_pgbench_remote_init(remote_compare: PgCompare, scale: int, duration: int):
|
||||
run_test_pgbench(remote_compare, scale, duration, PgBenchLoadType.INIT)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("scale", get_scales_matrix())
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_pgbench_remote_simple_update(remote_compare: PgCompare, scale: int, duration: int):
|
||||
run_test_pgbench(remote_compare, scale, duration, PgBenchLoadType.SIMPLE_UPDATE)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("scale", get_scales_matrix())
|
||||
@pytest.mark.parametrize("duration", get_durations_matrix())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_pgbench_remote_select_only(remote_compare: PgCompare, scale: int, duration: int):
|
||||
run_test_pgbench(remote_compare, scale, duration, PgBenchLoadType.SELECT_ONLY)
|
||||
def test_pgbench_remote(remote_compare: PgCompare, scale: int, duration: int):
|
||||
run_test_pgbench(remote_compare, scale, duration)
|
||||
|
||||
@@ -3,10 +3,10 @@ import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import pytest
|
||||
from fixtures.neon_fixtures import RemotePostgres
|
||||
from fixtures.utils import subprocess_capture
|
||||
|
||||
|
||||
@pytest.mark.remote_cluster
|
||||
@@ -25,7 +25,7 @@ from fixtures.utils import subprocess_capture
|
||||
"typescript/postgresql-client",
|
||||
],
|
||||
)
|
||||
def test_pg_clients(test_output_dir: Path, remote_pg: RemotePostgres, client: str):
|
||||
def test_pg_clients(remote_pg: RemotePostgres, client: str):
|
||||
conn_options = remote_pg.conn_options()
|
||||
|
||||
env_file = None
|
||||
@@ -43,10 +43,12 @@ def test_pg_clients(test_output_dir: Path, remote_pg: RemotePostgres, client: st
|
||||
if docker_bin is None:
|
||||
raise RuntimeError("docker is required for running this test")
|
||||
|
||||
build_cmd = [docker_bin, "build", "--tag", image_tag, f"{Path(__file__).parent / client}"]
|
||||
subprocess_capture(str(test_output_dir), build_cmd, check=True)
|
||||
|
||||
build_cmd = [
|
||||
docker_bin, "build", "--quiet", "--tag", image_tag, f"{Path(__file__).parent / client}"
|
||||
]
|
||||
run_cmd = [docker_bin, "run", "--rm", "--env-file", env_file, image_tag]
|
||||
basepath = subprocess_capture(str(test_output_dir), run_cmd, check=True)
|
||||
|
||||
assert Path(f"{basepath}.stdout").read_text().strip() == "1"
|
||||
subprocess.run(build_cmd, check=True)
|
||||
result = subprocess.run(run_cmd, check=True, capture_output=True, text=True)
|
||||
|
||||
assert result.stdout.strip() == "1"
|
||||
|
||||
2
vendor/postgres
vendored
2
vendor/postgres
vendored
Submodule vendor/postgres updated: 49015ce98f...0a9045c9ff
Reference in New Issue
Block a user