mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-26 14:50:36 +00:00
Compare commits
1 Commits
proxy-skip
...
problame/c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b144f22be4 |
@@ -8,7 +8,6 @@
|
||||
!scripts/combine_control_files.py
|
||||
!scripts/ninstall.sh
|
||||
!vm-cgconfig.conf
|
||||
!docker-compose/run-tests.sh
|
||||
|
||||
# Directories
|
||||
!.cargo/
|
||||
@@ -21,7 +20,7 @@
|
||||
!patches/
|
||||
!pgxn/
|
||||
!proxy/
|
||||
!storage_scrubber/
|
||||
!s3_scrubber/
|
||||
!safekeeper/
|
||||
!storage_broker/
|
||||
!storage_controller/
|
||||
|
||||
14
.github/workflows/actionlint.yml
vendored
14
.github/workflows/actionlint.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
|
||||
actionlint:
|
||||
needs: [ check-permissions ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: reviewdog/action-actionlint@v1
|
||||
@@ -36,15 +36,3 @@ jobs:
|
||||
fail_on_error: true
|
||||
filter_mode: nofilter
|
||||
level: error
|
||||
- run: |
|
||||
PAT='^\s*runs-on:.*-latest'
|
||||
if grep -ERq $PAT .github/workflows
|
||||
then
|
||||
grep -ERl $PAT .github/workflows |\
|
||||
while read -r f
|
||||
do
|
||||
l=$(grep -nE $PAT .github/workflows/release.yml | awk -F: '{print $1}' | head -1)
|
||||
echo "::error file=$f,line=$l::Please, do not use ubuntu-latest images to run on, use LTS instead."
|
||||
done
|
||||
exit 1
|
||||
fi
|
||||
|
||||
59
.github/workflows/approved-for-ci-run.yml
vendored
59
.github/workflows/approved-for-ci-run.yml
vendored
@@ -44,7 +44,7 @@ jobs:
|
||||
contains(fromJSON('["opened", "synchronize", "reopened", "closed"]'), github.event.action) &&
|
||||
contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
|
||||
@@ -60,7 +60,7 @@ jobs:
|
||||
github.event.action == 'labeled' &&
|
||||
contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
|
||||
@@ -69,41 +69,15 @@ jobs:
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
- name: Look for existing PR
|
||||
id: get-pr
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
|
||||
echo "ALREADY_CREATED=${ALREADY_CREATED}" >> ${GITHUB_OUTPUT}
|
||||
|
||||
- name: Get changed labels
|
||||
id: get-labels
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
env:
|
||||
ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
LABELS_TO_REMOVE=$(comm -23 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) | sort) \
|
||||
<(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
|
||||
( grep -v run-e2e-tests-in-draft || true ) | paste -sd , -)
|
||||
LABELS_TO_ADD=$(comm -13 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) |sort) \
|
||||
<(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
|
||||
paste -sd , -)
|
||||
echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
|
||||
echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
|
||||
|
||||
- run: gh pr checkout "${PR_NUMBER}"
|
||||
|
||||
- run: git checkout -b "${BRANCH}"
|
||||
|
||||
- run: git push --force origin "${BRANCH}"
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED == ''
|
||||
|
||||
- name: Create a Pull Request for CI run (if required)
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED == ''
|
||||
env:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
cat << EOF > body.md
|
||||
@@ -114,33 +88,16 @@ jobs:
|
||||
Feel free to review/comment/discuss the original PR #${PR_NUMBER}.
|
||||
EOF
|
||||
|
||||
LABELS=$( (gh pr --repo "${GITHUB_REPOSITORY}" view ${PR_NUMBER} --json labels --jq '.labels.[].name'; echo run-e2e-tests-in-draft )| \
|
||||
grep -E '^run' | paste -sd , -)
|
||||
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
|
||||
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
|
||||
if [ -z "${ALREADY_CREATED}" ]; then
|
||||
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
|
||||
--body-file "body.md" \
|
||||
--head "${BRANCH}" \
|
||||
--base "main" \
|
||||
--label ${LABELS} \
|
||||
--label "run-e2e-tests-in-draft" \
|
||||
--draft
|
||||
- name: Modify the existing pull request (if required)
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
LABELS_TO_ADD: ${{ steps.get-labels.outputs.LABELS_TO_ADD }}
|
||||
LABELS_TO_REMOVE: ${{ steps.get-labels.outputs.LABELS_TO_REMOVE }}
|
||||
ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
|
||||
run: |
|
||||
ADD_CMD=
|
||||
REMOVE_CMD=
|
||||
[ -z "${LABELS_TO_ADD}" ] || ADD_CMD="--add-label ${LABELS_TO_ADD}"
|
||||
[ -z "${LABELS_TO_REMOVE}" ] || REMOVE_CMD="--remove-label ${LABELS_TO_REMOVE}"
|
||||
if [ -n "${ADD_CMD}" ] || [ -n "${REMOVE_CMD}" ]; then
|
||||
gh pr --repo "${GITHUB_REPOSITORY}" edit ${ALREADY_CREATED} ${ADD_CMD} ${REMOVE_CMD}
|
||||
fi
|
||||
|
||||
- run: git push --force origin "${BRANCH}"
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
|
||||
cleanup:
|
||||
# Close PRs and delete branchs if the original PR is closed.
|
||||
|
||||
@@ -152,7 +109,7 @@ jobs:
|
||||
github.event.action == 'closed' &&
|
||||
github.event.pull_request.head.repo.full_name != github.repository
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Close PR and delete `ci-run/pr-${{ env.PR_NUMBER }}` branch
|
||||
|
||||
104
.github/workflows/benchmarking.yml
vendored
104
.github/workflows/benchmarking.yml
vendored
@@ -38,11 +38,6 @@ on:
|
||||
description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch'
|
||||
required: false
|
||||
default: false
|
||||
run_only_pgvector_tests:
|
||||
type: boolean
|
||||
description: 'Run pgvector tests but no other tests. If not set, all tests including pgvector tests will be run'
|
||||
required: false
|
||||
default: false
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -55,7 +50,6 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
bench:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
||||
@@ -99,7 +93,7 @@ jobs:
|
||||
# Set --sparse-ordering option of pytest-order plugin
|
||||
# to ensure tests are running in order of appears in the file.
|
||||
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
||||
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py --ignore test_runner/performance/test_perf_pgvector_queries.py
|
||||
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -126,7 +120,6 @@ jobs:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
generate-matrices:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
|
||||
#
|
||||
# Available platforms:
|
||||
@@ -137,7 +130,7 @@ jobs:
|
||||
# - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
|
||||
env:
|
||||
RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
|
||||
olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
|
||||
@@ -204,7 +197,6 @@ jobs:
|
||||
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
|
||||
|
||||
pgbench-compare:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
needs: [ generate-matrices ]
|
||||
|
||||
strategy:
|
||||
@@ -351,92 +343,6 @@ jobs:
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
pgbench-pgvector:
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "1"
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: "neon-captest-pgvector"
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Add Postgres binaries to PATH
|
||||
run: |
|
||||
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
|
||||
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
QUERIES=("SELECT version()")
|
||||
QUERIES+=("SHOW neon.tenant_id")
|
||||
QUERIES+=("SHOW neon.timeline_id")
|
||||
|
||||
for q in "${QUERIES[@]}"; do
|
||||
psql ${CONNSTR} -c "${q}"
|
||||
done
|
||||
|
||||
- name: Benchmark pgvector hnsw indexing
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_perf_olap.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
- name: Benchmark pgvector queries
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_perf_pgvector_queries.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: "Periodic perf testing neon-captest-pgvector: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
|
||||
clickbench-compare:
|
||||
# ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters
|
||||
# we use for performance testing in pgbench-compare.
|
||||
@@ -445,7 +351,7 @@ jobs:
|
||||
#
|
||||
# *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
|
||||
# *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [ generate-matrices, pgbench-compare ]
|
||||
|
||||
strategy:
|
||||
@@ -549,7 +455,7 @@ jobs:
|
||||
# We might change it after https://github.com/neondatabase/neon/issues/2900.
|
||||
#
|
||||
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [ generate-matrices, clickbench-compare ]
|
||||
|
||||
strategy:
|
||||
@@ -651,7 +557,7 @@ jobs:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
user-examples-compare:
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [ generate-matrices, tpch-compare ]
|
||||
|
||||
strategy:
|
||||
|
||||
@@ -55,7 +55,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
# Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
|
||||
# The default value is ~/.docker
|
||||
@@ -88,7 +88,7 @@ jobs:
|
||||
|
||||
merge-images:
|
||||
needs: [ build-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
IMAGE_TAG: ${{ inputs.image-tag }}
|
||||
|
||||
109
.github/workflows/build_and_test.yml
vendored
109
.github/workflows/build_and_test.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
cancel-previous-e2e-tests:
|
||||
needs: [ check-permissions ]
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Cancel previous e2e-tests runs for this PR
|
||||
@@ -337,8 +337,34 @@ jobs:
|
||||
run: |
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
||||
|
||||
# Do install *before* running rust tests because they might recompile the
|
||||
# binaries with different features/flags.
|
||||
- name: Run rust tests
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
#nextest does not yet support running doctests
|
||||
cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
||||
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
|
||||
done
|
||||
|
||||
# Run separate tests for real S3
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
|
||||
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
|
||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||
|
||||
- name: Install rust binaries
|
||||
run: |
|
||||
# Install target binaries
|
||||
@@ -379,32 +405,6 @@ jobs:
|
||||
done
|
||||
fi
|
||||
|
||||
- name: Run rust tests
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
#nextest does not yet support running doctests
|
||||
cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
||||
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
|
||||
done
|
||||
|
||||
# Run separate tests for real S3
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
|
||||
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
|
||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||
|
||||
- name: Install postgres binaries
|
||||
run: cp -a pg_install /tmp/neon/pg_install
|
||||
|
||||
@@ -432,8 +432,9 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ debug, release ]
|
||||
pg_version: [ v14, v15, v16 ]
|
||||
build_type: [ debug ]
|
||||
pg_version: [ v15 ]
|
||||
pageserver_compaction_algorithm_kind: [ "legacy", "tiered" ]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@@ -461,6 +462,9 @@ jobs:
|
||||
PAGESERVER_GET_VECTORED_IMPL: vectored
|
||||
PAGESERVER_GET_IMPL: vectored
|
||||
PAGESERVER_VALIDATE_VEC_GET: true
|
||||
PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM: 'kind="${{ matrix.pageserver_compaction_algorithm_kind }}"'
|
||||
# catch the tests that override `tenant_config` as a whole without specifying the compaction algorithm `kind`
|
||||
NEON_PAGESERVER_PANIC_ON_UNSPECIFIED_COMPACTION_ALGORITHM: true
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||
@@ -549,7 +553,7 @@ jobs:
|
||||
report-benchmarks-failures:
|
||||
needs: [ benchmarks, create-test-report ]
|
||||
if: github.ref_name == 'main' && failure() && needs.benchmarks.result == 'failure'
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: slackapi/slack-github-action@v1
|
||||
@@ -774,7 +778,7 @@ jobs:
|
||||
|
||||
neon-image:
|
||||
needs: [ neon-image-arch, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
@@ -859,26 +863,6 @@ jobs:
|
||||
tags: |
|
||||
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
||||
|
||||
- name: Build neon extensions test image
|
||||
if: matrix.version == 'v16'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
build-args: |
|
||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
PG_VERSION=${{ matrix.version }}
|
||||
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
|
||||
provenance: false
|
||||
push: true
|
||||
pull: true
|
||||
file: Dockerfile.compute-node
|
||||
target: neon-pg-ext-test
|
||||
cache-from: type=registry,ref=neondatabase/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }}
|
||||
cache-to: type=registry,ref=neondatabase/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }},mode=max
|
||||
tags: |
|
||||
neondatabase/neon-test-extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }}
|
||||
|
||||
- name: Build compute-tools image
|
||||
# compute-tools are Postgres independent, so build it only once
|
||||
if: matrix.version == 'v16'
|
||||
@@ -904,7 +888,7 @@ jobs:
|
||||
|
||||
compute-node-image:
|
||||
needs: [ compute-node-image-arch, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -922,13 +906,6 @@ jobs:
|
||||
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-x64 \
|
||||
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64
|
||||
|
||||
- name: Create multi-arch neon-test-extensions image
|
||||
if: matrix.version == 'v16'
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-x64 \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64
|
||||
|
||||
- name: Create multi-arch compute-tools image
|
||||
if: matrix.version == 'v16'
|
||||
run: |
|
||||
@@ -965,7 +942,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v1
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -1047,7 +1024,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Verify docker-compose example and test extensions
|
||||
- name: Verify docker-compose example
|
||||
timeout-minutes: 20
|
||||
run: env TAG=${{needs.tag.outputs.build-tag}} ./docker-compose/docker_compose_test.sh
|
||||
|
||||
@@ -1059,7 +1036,7 @@ jobs:
|
||||
|
||||
promote-images:
|
||||
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
VERSIONS: v14 v15 v16
|
||||
@@ -1101,12 +1078,10 @@ jobs:
|
||||
$repo/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
|
||||
done
|
||||
done
|
||||
docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
|
||||
neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
trigger-custom-extensions-build-and-wait:
|
||||
needs: [ check-permissions, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set PR's status to pending and request a remote CI test
|
||||
run: |
|
||||
|
||||
@@ -19,7 +19,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
check-image:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
|
||||
found: ${{ steps.check-image.outputs.found }}
|
||||
|
||||
2
.github/workflows/check-permissions.yml
vendored
2
.github/workflows/check-permissions.yml
vendored
@@ -16,7 +16,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
check-permissions:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Disallow CI runs on PRs from forks
|
||||
if: |
|
||||
|
||||
@@ -9,7 +9,7 @@ on:
|
||||
|
||||
jobs:
|
||||
cleanup:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Cleanup
|
||||
run: |
|
||||
|
||||
2
.github/workflows/pg_clients.yml
vendored
2
.github/workflows/pg_clients.yml
vendored
@@ -20,7 +20,7 @@ concurrency:
|
||||
jobs:
|
||||
test-postgres-client-libs:
|
||||
# TODO: switch to gen2 runner, requires docker
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [ ubuntu-latest ]
|
||||
|
||||
env:
|
||||
DEFAULT_PG_VERSION: 14
|
||||
|
||||
2
.github/workflows/pin-build-tools-image.yml
vendored
2
.github/workflows/pin-build-tools-image.yml
vendored
@@ -26,7 +26,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
tag-image:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
FROM_TAG: ${{ inputs.from-tag }}
|
||||
|
||||
2
.github/workflows/release-notify.yml
vendored
2
.github/workflows/release-notify.yml
vendored
@@ -19,7 +19,7 @@ on:
|
||||
|
||||
jobs:
|
||||
notify:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [ ubuntu-latest ]
|
||||
|
||||
steps:
|
||||
- uses: neondatabase/dev-actions/release-pr-notify@main
|
||||
|
||||
4
.github/workflows/release.yml
vendored
4
.github/workflows/release.yml
vendored
@@ -26,7 +26,7 @@ defaults:
|
||||
jobs:
|
||||
create-storage-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }}
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: write # for `git push`
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
|
||||
create-proxy-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }}
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: write # for `git push`
|
||||
|
||||
6
.github/workflows/trigger-e2e-tests.yml
vendored
6
.github/workflows/trigger-e2e-tests.yml
vendored
@@ -19,7 +19,7 @@ env:
|
||||
jobs:
|
||||
cancel-previous-e2e-tests:
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Cancel previous e2e-tests runs for this PR
|
||||
@@ -31,7 +31,7 @@ jobs:
|
||||
--field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
|
||||
|
||||
tag:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [ ubuntu-latest ]
|
||||
outputs:
|
||||
build-tag: ${{ steps.build-tag.outputs.tag }}
|
||||
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
|
||||
trigger-e2e-tests:
|
||||
needs: [ tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
TAG: ${{ needs.tag.outputs.build-tag }}
|
||||
steps:
|
||||
|
||||
265
Cargo.lock
generated
265
Cargo.lock
generated
@@ -776,6 +776,7 @@ dependencies = [
|
||||
"pin-project",
|
||||
"serde",
|
||||
"time",
|
||||
"tz-rs",
|
||||
"url",
|
||||
"uuid",
|
||||
]
|
||||
@@ -1290,6 +1291,12 @@ dependencies = [
|
||||
"tiny-keccak",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const_fn"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fbdcdcb6d86f71c5e97409ad45898af11cbc995b4ee8112d59095a28d376c935"
|
||||
|
||||
[[package]]
|
||||
name = "const_format"
|
||||
version = "0.2.30"
|
||||
@@ -1969,6 +1976,21 @@ version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
|
||||
dependencies = [
|
||||
"foreign-types-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types-shared"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.1.0"
|
||||
@@ -2598,6 +2620,19 @@ dependencies = [
|
||||
"tokio-io-timeout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-tls"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"hyper 0.14.26",
|
||||
"native-tls",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-util"
|
||||
version = "0.1.3"
|
||||
@@ -2915,12 +2950,6 @@ version = "0.4.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0b5399f6804fbab912acbd8878ed3532d506b7c951b8f9f164ef90fef39e3f4"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.10"
|
||||
@@ -3139,6 +3168,24 @@ version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
|
||||
|
||||
[[package]]
|
||||
name = "native-tls"
|
||||
version = "0.2.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"log",
|
||||
"openssl",
|
||||
"openssl-probe",
|
||||
"openssl-sys",
|
||||
"schannel",
|
||||
"security-framework",
|
||||
"security-framework-sys",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.25.1"
|
||||
@@ -3309,6 +3356,15 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_threads"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "oauth2"
|
||||
version = "4.4.2"
|
||||
@@ -3358,12 +3414,50 @@ version = "11.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
||||
|
||||
[[package]]
|
||||
name = "openssl"
|
||||
version = "0.10.60"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"cfg-if",
|
||||
"foreign-types",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"openssl-macros",
|
||||
"openssl-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-macros"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-probe"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
|
||||
|
||||
[[package]]
|
||||
name = "openssl-sys"
|
||||
version = "0.9.96"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opentelemetry"
|
||||
version = "0.20.0"
|
||||
@@ -3570,7 +3664,6 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"svg_fmt",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
@@ -4002,7 +4095,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres"
|
||||
version = "0.19.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=skip-auth-1rtt#42784ef44fe62b6edca9813ca47bfc1c52c60a73"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4012,10 +4105,21 @@ dependencies = [
|
||||
"tokio-postgres",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres-native-tls"
|
||||
version = "0.5.0"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"native-tls",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
"tokio-postgres",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres-protocol"
|
||||
version = "0.6.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=skip-auth-1rtt#42784ef44fe62b6edca9813ca47bfc1c52c60a73"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"base64 0.20.0",
|
||||
"byteorder",
|
||||
@@ -4034,7 +4138,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "postgres-types"
|
||||
version = "0.2.4"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=skip-auth-1rtt#42784ef44fe62b6edca9813ca47bfc1c52c60a73"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fallible-iterator",
|
||||
@@ -4120,7 +4224,6 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"itertools",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol",
|
||||
"rand 0.8.5",
|
||||
@@ -4310,7 +4413,6 @@ dependencies = [
|
||||
"http 1.1.0",
|
||||
"http-body-util",
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"hyper 0.14.26",
|
||||
"hyper 1.2.0",
|
||||
"hyper-util",
|
||||
@@ -4321,6 +4423,7 @@ dependencies = [
|
||||
"md5",
|
||||
"measured",
|
||||
"metrics",
|
||||
"native-tls",
|
||||
"once_cell",
|
||||
"opentelemetry",
|
||||
"parking_lot 0.12.1",
|
||||
@@ -4328,6 +4431,7 @@ dependencies = [
|
||||
"parquet_derive",
|
||||
"pbkdf2",
|
||||
"pin-project-lite",
|
||||
"postgres-native-tls",
|
||||
"postgres-protocol",
|
||||
"postgres_backend",
|
||||
"pq_proto",
|
||||
@@ -4346,7 +4450,6 @@ dependencies = [
|
||||
"rstest",
|
||||
"rustc-hash",
|
||||
"rustls 0.22.4",
|
||||
"rustls-native-certs 0.7.0",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
@@ -4376,6 +4479,7 @@ dependencies = [
|
||||
"utils",
|
||||
"uuid",
|
||||
"walkdir",
|
||||
"webpki-roots 0.25.2",
|
||||
"workspace_hack",
|
||||
"x509-parser",
|
||||
]
|
||||
@@ -4682,21 +4786,20 @@ dependencies = [
|
||||
"http 0.2.9",
|
||||
"http-body 0.4.5",
|
||||
"hyper 0.14.26",
|
||||
"hyper-rustls 0.24.0",
|
||||
"hyper-tls",
|
||||
"ipnet",
|
||||
"js-sys",
|
||||
"log",
|
||||
"mime",
|
||||
"native-tls",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustls 0.21.11",
|
||||
"rustls-pemfile 1.0.2",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"tokio",
|
||||
"tokio-rustls 0.24.0",
|
||||
"tokio-native-tls",
|
||||
"tokio-util",
|
||||
"tower-service",
|
||||
"url",
|
||||
@@ -4704,7 +4807,6 @@ dependencies = [
|
||||
"wasm-bindgen-futures",
|
||||
"wasm-streams 0.3.0",
|
||||
"web-sys",
|
||||
"webpki-roots 0.25.2",
|
||||
"winreg 0.50.0",
|
||||
]
|
||||
|
||||
@@ -5109,6 +5211,51 @@ version = "1.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
|
||||
|
||||
[[package]]
|
||||
name = "s3_scrubber"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
"aws-config",
|
||||
"aws-sdk-s3",
|
||||
"aws-smithy-async",
|
||||
"bincode",
|
||||
"bytes",
|
||||
"camino",
|
||||
"chrono",
|
||||
"clap",
|
||||
"crc32c",
|
||||
"either",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"hex",
|
||||
"histogram",
|
||||
"itertools",
|
||||
"native-tls",
|
||||
"pageserver",
|
||||
"pageserver_api",
|
||||
"postgres-native-tls",
|
||||
"postgres_ffi",
|
||||
"rand 0.8.5",
|
||||
"remote_storage",
|
||||
"reqwest 0.12.4",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-rustls 0.25.0",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-appender",
|
||||
"tracing-subscriber",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "safekeeper"
|
||||
version = "0.1.0"
|
||||
@@ -5765,54 +5912,6 @@ dependencies = [
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "storage_scrubber"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
"aws-config",
|
||||
"aws-sdk-s3",
|
||||
"aws-smithy-async",
|
||||
"bincode",
|
||||
"bytes",
|
||||
"camino",
|
||||
"chrono",
|
||||
"clap",
|
||||
"crc32c",
|
||||
"either",
|
||||
"futures",
|
||||
"futures-util",
|
||||
"hex",
|
||||
"histogram",
|
||||
"humantime",
|
||||
"itertools",
|
||||
"once_cell",
|
||||
"pageserver",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"rand 0.8.5",
|
||||
"remote_storage",
|
||||
"reqwest 0.12.4",
|
||||
"rustls 0.22.4",
|
||||
"rustls-native-certs 0.7.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-postgres-rustls",
|
||||
"tokio-rustls 0.25.0",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-appender",
|
||||
"tracing-subscriber",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "storcon_cli"
|
||||
version = "0.1.0"
|
||||
@@ -5820,8 +5919,6 @@ dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"comfy-table",
|
||||
"futures",
|
||||
"humantime",
|
||||
"hyper 0.14.26",
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
@@ -6092,6 +6189,8 @@ checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"js-sys",
|
||||
"libc",
|
||||
"num_threads",
|
||||
"serde",
|
||||
"time-core",
|
||||
"time-macros",
|
||||
@@ -6167,7 +6266,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "tokio-epoll-uring"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#08ccfa94ff5507727bf4d8d006666b5b192e04c6"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#342ddd197a060a8354e8f11f4d12994419fff939"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"nix 0.26.4",
|
||||
@@ -6201,10 +6300,20 @@ dependencies = [
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-native-tls"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
|
||||
dependencies = [
|
||||
"native-tls",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.7"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=skip-auth-1rtt#42784ef44fe62b6edca9813ca47bfc1c52c60a73"
|
||||
source = "git+https://github.com/neondatabase/rust-postgres.git?branch=neon#20031d7a9ee1addeae6e0968e3899ae6bf01cee2"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"byteorder",
|
||||
@@ -6607,6 +6716,15 @@ version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
|
||||
|
||||
[[package]]
|
||||
name = "tz-rs"
|
||||
version = "0.6.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33851b15c848fad2cf4b105c6bb66eb9512b6f6c44a4b13f57c53c73c707e2b4"
|
||||
dependencies = [
|
||||
"const_fn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "uname"
|
||||
version = "0.1.1"
|
||||
@@ -6679,12 +6797,11 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "uring-common"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#08ccfa94ff5507727bf4d8d006666b5b192e04c6"
|
||||
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#342ddd197a060a8354e8f11f4d12994419fff939"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"io-uring",
|
||||
"libc",
|
||||
"linux-raw-sys 0.6.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7512,9 +7629,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zeroize"
|
||||
version = "1.7.0"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d"
|
||||
checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9"
|
||||
dependencies = [
|
||||
"zeroize_derive",
|
||||
]
|
||||
|
||||
33
Cargo.toml
33
Cargo.toml
@@ -13,7 +13,7 @@ members = [
|
||||
"safekeeper",
|
||||
"storage_broker",
|
||||
"storage_controller",
|
||||
"storage_scrubber",
|
||||
"s3_scrubber",
|
||||
"workspace_hack",
|
||||
"trace",
|
||||
"libs/compute_api",
|
||||
@@ -46,10 +46,10 @@ anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
arc-swap = "1.6"
|
||||
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
|
||||
atomic-take = "1.1.0"
|
||||
azure_core = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls", "hmac_rust"] }
|
||||
azure_identity = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_storage = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_storage_blobs = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_core = "0.19"
|
||||
azure_identity = "0.19"
|
||||
azure_storage = "0.19"
|
||||
azure_storage_blobs = "0.19"
|
||||
flate2 = "1.0.26"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
@@ -114,13 +114,14 @@ md5 = "0.7.0"
|
||||
measured = { version = "0.0.21", features=["lasso"] }
|
||||
measured-process = { version = "0.0.21" }
|
||||
memoffset = "0.8"
|
||||
native-tls = "0.2"
|
||||
nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
|
||||
notify = "6.0.0"
|
||||
num_cpus = "1.15"
|
||||
num-traits = "0.2.15"
|
||||
once_cell = "1.13"
|
||||
opentelemetry = "0.20.0"
|
||||
opentelemetry-otlp = { version = "0.13.0", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-otlp = { version = "0.13.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.12.0"
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
|
||||
@@ -128,7 +129,7 @@ parquet_derive = "51.0.0"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pin-project-lite = "0.2"
|
||||
procfs = "0.14"
|
||||
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
||||
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
|
||||
prost = "0.11"
|
||||
rand = "0.8"
|
||||
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
|
||||
@@ -184,13 +185,13 @@ tower-service = "0.3.2"
|
||||
tracing = "0.1"
|
||||
tracing-error = "0.2.0"
|
||||
tracing-opentelemetry = "0.21.0"
|
||||
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json", "ansi"] }
|
||||
tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json", "ansi"] }
|
||||
twox-hash = { version = "1.6.3", default-features = false }
|
||||
url = "2.2"
|
||||
urlencoding = "2.1"
|
||||
uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
|
||||
walkdir = "2.3.2"
|
||||
rustls-native-certs = "0.7"
|
||||
webpki-roots = "0.25"
|
||||
x509-parser = "0.15"
|
||||
|
||||
## TODO replace this with tracing
|
||||
@@ -198,10 +199,11 @@ env_logger = "0.10"
|
||||
log = "0.4"
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="skip-auth-1rtt" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="skip-auth-1rtt" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="skip-auth-1rtt" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="skip-auth-1rtt" }
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
## Other git libraries
|
||||
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
|
||||
@@ -239,8 +241,9 @@ tonic-build = "0.9"
|
||||
|
||||
[patch.crates-io]
|
||||
|
||||
# Needed to get `tokio-postgres-rustls` to depend on our fork.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="skip-auth-1rtt" }
|
||||
# This is only needed for proxy's tests.
|
||||
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
# bug fixes for UUID
|
||||
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
|
||||
|
||||
@@ -141,7 +141,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.79.0
|
||||
ENV RUSTC_VERSION=1.78.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
|
||||
|
||||
@@ -89,7 +89,7 @@ RUN apt update && \
|
||||
# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
|
||||
RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
|
||||
echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
|
||||
mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
|
||||
mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make clean && cp -R /sfcgal/* /
|
||||
@@ -98,7 +98,7 @@ ENV PATH "/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
|
||||
echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
|
||||
mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
|
||||
mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
./autogen.sh && \
|
||||
./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
|
||||
@@ -124,7 +124,7 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postg
|
||||
|
||||
RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
|
||||
echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
|
||||
mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
|
||||
mkdir build && cd build && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release .. && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -149,7 +149,7 @@ RUN apt update && \
|
||||
|
||||
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
|
||||
echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
|
||||
mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
|
||||
mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
|
||||
# generate and copy upgrade scripts
|
||||
mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \
|
||||
cp upgrade/* /usr/local/pgsql/share/extension/ && \
|
||||
@@ -194,7 +194,7 @@ RUN case "$(uname -m)" in \
|
||||
|
||||
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
|
||||
echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
|
||||
mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
|
||||
mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
|
||||
mkdir build && cd build && \
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -204,7 +204,7 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz
|
||||
|
||||
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
|
||||
echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
|
||||
mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
|
||||
mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
@@ -222,7 +222,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
|
||||
echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
|
||||
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
|
||||
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
# unit extension's "create extension" script relies on absolute install path to fill some reference tables.
|
||||
@@ -243,12 +243,12 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
COPY patches/pgvector.patch /pgvector.patch
|
||||
|
||||
# By default, pgvector Makefile uses `-march=native`. We don't want that,
|
||||
# By default, pgvector Makefile uses `-march=native`. We don't want that,
|
||||
# because we build the images on different machines than where we run them.
|
||||
# Pass OPTFLAGS="" to remove it.
|
||||
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
|
||||
echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \
|
||||
mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
||||
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.0.tar.gz -O pgvector.tar.gz && \
|
||||
echo "1b5503a35c265408b6eb282621c5e1e75f7801afc04eecb950796cfee2e3d1d8 pgvector.tar.gz" | sha256sum --check && \
|
||||
mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < /pgvector.patch && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
@@ -266,7 +266,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
|
||||
RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
|
||||
echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
|
||||
mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
|
||||
|
||||
@@ -281,7 +281,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
|
||||
echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
|
||||
mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
|
||||
mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
|
||||
@@ -297,7 +297,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
|
||||
echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
|
||||
@@ -313,7 +313,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
|
||||
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
|
||||
mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
|
||||
mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
|
||||
@@ -329,7 +329,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
|
||||
echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
|
||||
mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
|
||||
@@ -345,7 +345,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
|
||||
echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
|
||||
mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
|
||||
mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
|
||||
@@ -361,7 +361,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
|
||||
echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
|
||||
mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
|
||||
mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
|
||||
@@ -377,7 +377,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
|
||||
echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
|
||||
mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
|
||||
mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
|
||||
@@ -393,7 +393,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
|
||||
echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
|
||||
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
|
||||
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
|
||||
@@ -424,7 +424,7 @@ RUN case "${PG_VERSION}" in \
|
||||
apt-get install -y cmake && \
|
||||
wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
|
||||
echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
|
||||
mkdir timescaledb-src && cd timescaledb-src && tar xzf ../timescaledb.tar.gz --strip-components=1 -C . && \
|
||||
mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
|
||||
./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
|
||||
cd build && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -462,7 +462,7 @@ RUN case "${PG_VERSION}" in \
|
||||
esac && \
|
||||
wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \
|
||||
echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control
|
||||
@@ -481,7 +481,7 @@ RUN apt-get update && \
|
||||
apt-get install -y git libgtk2.0-dev libpq-dev libpam-dev libxslt-dev libkrb5-dev cmake && \
|
||||
wget https://github.com/ketteq-neon/postgres-exts/archive/e0bd1a9d9313d7120c1b9c7bb15c48c0dede4c4e.tar.gz -O kq_imcx.tar.gz && \
|
||||
echo "dc93a97ff32d152d32737ba7e196d9687041cda15e58ab31344c2f2de8855336 kq_imcx.tar.gz" | sha256sum --check && \
|
||||
mkdir kq_imcx-src && cd kq_imcx-src && tar xzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
|
||||
mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
mkdir build && cd build && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release .. && \
|
||||
@@ -505,7 +505,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
|
||||
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_cron-src && cd pg_cron-src && tar xvzf ../pg_cron.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
|
||||
@@ -531,7 +531,7 @@ RUN apt-get update && \
|
||||
ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
|
||||
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
|
||||
echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
|
||||
mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
|
||||
mkdir rdkit-src && cd rdkit-src && tar xvzf ../rdkit.tar.gz --strip-components=1 -C . && \
|
||||
cmake \
|
||||
-D RDK_BUILD_CAIRO_SUPPORT=OFF \
|
||||
-D RDK_BUILD_INCHI_SUPPORT=ON \
|
||||
@@ -571,7 +571,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
|
||||
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xvzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
|
||||
@@ -588,7 +588,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
|
||||
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xvzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
|
||||
@@ -605,7 +605,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
|
||||
echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_semver-src && cd pg_semver-src && tar xvzf ../pg_semver.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control
|
||||
@@ -631,7 +631,7 @@ RUN case "${PG_VERSION}" in \
|
||||
esac && \
|
||||
wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/${PG_EMBEDDING_VERSION}.tar.gz -O pg_embedding.tar.gz && \
|
||||
echo "${PG_EMBEDDING_CHECKSUM} pg_embedding.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_embedding-src && cd pg_embedding-src && tar xzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_embedding-src && cd pg_embedding-src && tar xvzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install
|
||||
|
||||
@@ -647,7 +647,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
|
||||
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xvzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \
|
||||
@@ -696,7 +696,7 @@ ARG PG_VERSION
|
||||
|
||||
RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.2.0.tar.gz -O pg_jsonschema.tar.gz && \
|
||||
echo "9118fc508a6e231e7a39acaa6f066fcd79af17a5db757b47d2eefbe14f7794f0 pg_jsonschema.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
|
||||
@@ -713,7 +713,7 @@ ARG PG_VERSION
|
||||
|
||||
RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.4.0.tar.gz -O pg_graphql.tar.gz && \
|
||||
echo "bd8dc7230282b3efa9ae5baf053a54151ed0e66881c7c53750e2d0c765776edc pg_graphql.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "=0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
# it's needed to enable extension because it uses untrusted C language
|
||||
@@ -733,7 +733,7 @@ ARG PG_VERSION
|
||||
# 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
|
||||
RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
|
||||
echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
|
||||
|
||||
@@ -749,7 +749,7 @@ ARG PG_VERSION
|
||||
|
||||
RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -O pgx_ulid.tar.gz && \
|
||||
echo "ee5db82945d2d9f2d15597a80cf32de9dca67b897f605beb830561705f12683c pgx_ulid.tar.gz" | sha256sum --check && \
|
||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xvzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||
echo "******************* Apply a patch for Postgres 16 support; delete in the next release ******************" && \
|
||||
wget https://github.com/pksunkara/pgx_ulid/commit/f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
|
||||
patch -p1 < f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
|
||||
@@ -771,7 +771,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
|
||||
echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
|
||||
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
|
||||
mkdir wal2json-src && cd wal2json-src && tar xvzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install
|
||||
|
||||
@@ -787,7 +787,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
|
||||
echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_ivm-src && cd pg_ivm-src && tar xvzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_ivm.control
|
||||
@@ -804,7 +804,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
|
||||
echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_partman-src && cd pg_partman-src && tar xvzf ../pg_partman.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control
|
||||
@@ -928,69 +928,6 @@ RUN rm -r /usr/local/pgsql/include
|
||||
# if they were to be used by other libraries.
|
||||
RUN rm /usr/local/pgsql/lib/lib*.a
|
||||
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer neon-pg-ext-test
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM neon-pg-ext-build AS neon-pg-ext-test
|
||||
ARG PG_VERSION
|
||||
RUN mkdir /ext-src
|
||||
|
||||
#COPY --from=postgis-build /postgis.tar.gz /ext-src/
|
||||
#COPY --from=postgis-build /sfcgal/* /usr
|
||||
COPY --from=plv8-build /plv8.tar.gz /ext-src/
|
||||
COPY --from=h3-pg-build /h3-pg.tar.gz /ext-src/
|
||||
COPY --from=unit-pg-build /postgresql-unit.tar.gz /ext-src/
|
||||
COPY --from=vector-pg-build /pgvector.tar.gz /ext-src/
|
||||
COPY --from=vector-pg-build /pgvector.patch /ext-src/
|
||||
COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
|
||||
#COPY --from=pg-jsonschema-pg-build /home/nonroot/pg_jsonschema.tar.gz /ext-src
|
||||
#COPY --from=pg-graphql-pg-build /home/nonroot/pg_graphql.tar.gz /ext-src
|
||||
#COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
|
||||
COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
|
||||
COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
|
||||
#COPY --from=rum-pg-build /rum.tar.gz /ext-src
|
||||
#COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
|
||||
COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
|
||||
COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
|
||||
COPY --from=hll-pg-build /hll.tar.gz /ext-src
|
||||
COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
|
||||
#COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
|
||||
COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
|
||||
COPY patches/pg_hintplan.patch /ext-src
|
||||
#COPY --from=kq-imcx-pg-build /kq_imcx.tar.gz /ext-src
|
||||
COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
|
||||
COPY patches/pg_cron.patch /ext-src
|
||||
#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
|
||||
COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
|
||||
COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
|
||||
COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
|
||||
COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
|
||||
#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
|
||||
#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
|
||||
COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
|
||||
COPY patches/pg_anon.patch /ext-src
|
||||
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
|
||||
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
|
||||
RUN cd /ext-src/ && for f in *.tar.gz; \
|
||||
do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
|
||||
rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
|
||||
|| exit 1; rm -f $f; done
|
||||
RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
|
||||
# cmake is required for the h3 test
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
RUN patch -p1 < /ext-src/pg_hintplan.patch
|
||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||
RUN patch -p1 </ext-src/pg_anon.patch
|
||||
RUN patch -p1 </ext-src/pg_cron.patch
|
||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||
ENV PGHOST=compute
|
||||
ENV PGPORT=55433
|
||||
ENV PGUSER=cloud_admin
|
||||
ENV PGDATABASE=postgres
|
||||
#########################################################################################
|
||||
#
|
||||
# Final layer
|
||||
|
||||
2
Makefile
2
Makefile
@@ -124,8 +124,6 @@ postgres-%: postgres-configure-% \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
|
||||
+@echo "Compiling amcheck $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
|
||||
+@echo "Compiling test_decoding $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
|
||||
|
||||
.PHONY: postgres-clean-%
|
||||
postgres-clean-%:
|
||||
|
||||
@@ -735,7 +735,7 @@ fn cli() -> clap::Command {
|
||||
Arg::new("filecache-connstr")
|
||||
.long("filecache-connstr")
|
||||
.default_value(
|
||||
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
|
||||
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable",
|
||||
)
|
||||
.value_name("FILECACHE_CONNSTR"),
|
||||
)
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
ALTER ROLE neon_superuser BYPASSRLS;
|
||||
@@ -1,18 +0,0 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
role_name text;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
|
||||
END LOOP;
|
||||
|
||||
FOR role_name IN SELECT rolname FROM pg_roles
|
||||
WHERE
|
||||
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
|
||||
END LOOP;
|
||||
END $$;
|
||||
@@ -1,6 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -1 +0,0 @@
|
||||
GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;
|
||||
@@ -1,4 +0,0 @@
|
||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||
-- interacted with by neon_superuser without permission issues.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
|
||||
@@ -1,4 +0,0 @@
|
||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||
-- interacted with by neon_superuser without permission issues.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
|
||||
@@ -1,3 +0,0 @@
|
||||
-- SKIP: Moved inline to the handle_grants() functions.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
|
||||
@@ -1,3 +0,0 @@
|
||||
-- SKIP: Moved inline to the handle_grants() functions.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
|
||||
@@ -1,13 +0,0 @@
|
||||
-- SKIP: The original goal of this migration was to prevent creating
|
||||
-- subscriptions, but this migration was insufficient.
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
role_name TEXT;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
|
||||
END LOOP;
|
||||
END $$;
|
||||
@@ -774,21 +774,44 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
|
||||
// !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!
|
||||
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
// Add new migrations in numerical order.
|
||||
let migrations = [
|
||||
include_str!("./migrations/0000-neon_superuser_bypass_rls.sql"),
|
||||
include_str!("./migrations/0001-alter_roles.sql"),
|
||||
include_str!("./migrations/0002-grant_pg_create_subscription_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0003-grant_pg_monitor_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0004-grant_all_on_tables_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0005-grant_all_on_sequences_to_neon_superuser.sql"),
|
||||
include_str!(
|
||||
"./migrations/0006-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
|
||||
),
|
||||
include_str!(
|
||||
"./migrations/0007-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
|
||||
),
|
||||
include_str!("./migrations/0008-revoke_replication_for_previously_allowed_roles.sql"),
|
||||
"ALTER ROLE neon_superuser BYPASSRLS",
|
||||
r#"
|
||||
DO $$
|
||||
DECLARE
|
||||
role_name text;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
|
||||
END LOOP;
|
||||
|
||||
FOR role_name IN SELECT rolname FROM pg_roles
|
||||
WHERE
|
||||
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
|
||||
END LOOP;
|
||||
END $$;
|
||||
"#,
|
||||
r#"
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
|
||||
END IF;
|
||||
END
|
||||
$$;"#,
|
||||
"GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION",
|
||||
// Don't remove: these are some SQLs that we originally applied in migrations but turned out to execute somewhere else.
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
// Add new migrations below.
|
||||
];
|
||||
|
||||
let mut func = || {
|
||||
@@ -824,13 +847,10 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
|
||||
|
||||
while current_migration < migrations.len() {
|
||||
let migration = &migrations[current_migration];
|
||||
if migration.starts_with("-- SKIP") {
|
||||
info!("Skipping migration id={}", current_migration);
|
||||
if migration.is_empty() {
|
||||
info!("Skip migration id={}", current_migration);
|
||||
} else {
|
||||
info!(
|
||||
"Running migration id={}:\n{}\n",
|
||||
current_migration, migration
|
||||
);
|
||||
info!("Running migration:\n{}\n", migration);
|
||||
client.simple_query(migration).with_context(|| {
|
||||
format!("handle_migrations current_migration={}", current_migration)
|
||||
})?;
|
||||
|
||||
@@ -9,8 +9,6 @@ license.workspace = true
|
||||
anyhow.workspace = true
|
||||
clap.workspace = true
|
||||
comfy-table.workspace = true
|
||||
futures.workspace = true
|
||||
humantime.workspace = true
|
||||
hyper.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
pageserver_client.workspace = true
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use futures::StreamExt;
|
||||
use std::{collections::HashMap, str::FromStr, time::Duration};
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
@@ -8,9 +7,8 @@ use pageserver_api::{
|
||||
TenantDescribeResponse, TenantPolicyRequest,
|
||||
},
|
||||
models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
||||
ShardParameters, TenantConfig, TenantConfigRequest, TenantCreateRequest,
|
||||
TenantShardSplitRequest, TenantShardSplitResponse,
|
||||
LocationConfigSecondary, ShardParameters, TenantConfig, TenantConfigRequest,
|
||||
TenantCreateRequest, TenantShardSplitRequest, TenantShardSplitResponse,
|
||||
},
|
||||
shard::{ShardStripeSize, TenantShardId},
|
||||
};
|
||||
@@ -127,44 +125,6 @@ enum Command {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
/// Uncleanly drop a tenant from the storage controller: this doesn't delete anything from pageservers. Appropriate
|
||||
/// if you e.g. used `tenant-warmup` by mistake on a tenant ID that doesn't really exist, or is in some other region.
|
||||
TenantDrop {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
unclean: bool,
|
||||
},
|
||||
NodeDrop {
|
||||
#[arg(long)]
|
||||
node_id: NodeId,
|
||||
#[arg(long)]
|
||||
unclean: bool,
|
||||
},
|
||||
TenantSetTimeBasedEviction {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
period: humantime::Duration,
|
||||
#[arg(long)]
|
||||
threshold: humantime::Duration,
|
||||
},
|
||||
// Drain a set of specified pageservers by moving the primary attachments to pageservers
|
||||
// outside of the specified set.
|
||||
Drain {
|
||||
// Set of pageserver node ids to drain.
|
||||
#[arg(long)]
|
||||
nodes: Vec<NodeId>,
|
||||
// Optional: migration concurrency (default is 8)
|
||||
#[arg(long)]
|
||||
concurrency: Option<usize>,
|
||||
// Optional: maximum number of shards to migrate
|
||||
#[arg(long)]
|
||||
max_shards: Option<usize>,
|
||||
// Optional: when set to true, nothing is migrated, but the plan is printed to stdout
|
||||
#[arg(long)]
|
||||
dry_run: Option<bool>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -714,234 +674,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
}
|
||||
Command::TenantDrop { tenant_id, unclean } => {
|
||||
if !unclean {
|
||||
anyhow::bail!("This command is not a tenant deletion, and uncleanly drops all controller state for the tenant. If you know what you're doing, add `--unclean` to proceed.")
|
||||
}
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(
|
||||
Method::POST,
|
||||
format!("debug/v1/tenant/{tenant_id}/drop"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::NodeDrop { node_id, unclean } => {
|
||||
if !unclean {
|
||||
anyhow::bail!("This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it. If you know what you're doing, add `--unclean` to proceed.")
|
||||
}
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(Method::POST, format!("debug/v1/node/{node_id}/drop"), None)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantSetTimeBasedEviction {
|
||||
tenant_id,
|
||||
period,
|
||||
threshold,
|
||||
} => {
|
||||
vps_client
|
||||
.tenant_config(&TenantConfigRequest {
|
||||
tenant_id,
|
||||
config: TenantConfig {
|
||||
eviction_policy: Some(EvictionPolicy::LayerAccessThreshold(
|
||||
EvictionPolicyLayerAccessThreshold {
|
||||
period: period.into(),
|
||||
threshold: threshold.into(),
|
||||
},
|
||||
)),
|
||||
..Default::default()
|
||||
},
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
Command::Drain {
|
||||
nodes,
|
||||
concurrency,
|
||||
max_shards,
|
||||
dry_run,
|
||||
} => {
|
||||
// Load the list of nodes, split them up into the drained and filled sets,
|
||||
// and validate that draining is possible.
|
||||
let node_descs = storcon_client
|
||||
.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/node".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut node_to_drain_descs = Vec::new();
|
||||
let mut node_to_fill_descs = Vec::new();
|
||||
|
||||
for desc in node_descs {
|
||||
let to_drain = nodes.iter().any(|id| *id == desc.id);
|
||||
if to_drain {
|
||||
node_to_drain_descs.push(desc);
|
||||
} else {
|
||||
node_to_fill_descs.push(desc);
|
||||
}
|
||||
}
|
||||
|
||||
if nodes.len() != node_to_drain_descs.len() {
|
||||
anyhow::bail!("Drain requested for node which doesn't exist.")
|
||||
}
|
||||
|
||||
node_to_fill_descs.retain(|desc| {
|
||||
matches!(desc.availability, NodeAvailabilityWrapper::Active)
|
||||
&& matches!(
|
||||
desc.scheduling,
|
||||
NodeSchedulingPolicy::Active | NodeSchedulingPolicy::Filling
|
||||
)
|
||||
});
|
||||
|
||||
if node_to_fill_descs.is_empty() {
|
||||
anyhow::bail!("There are no nodes to drain to")
|
||||
}
|
||||
|
||||
// Set the node scheduling policy to draining for the nodes which
|
||||
// we plan to drain.
|
||||
for node_desc in node_to_drain_descs.iter() {
|
||||
let req = NodeConfigureRequest {
|
||||
node_id: node_desc.id,
|
||||
availability: None,
|
||||
scheduling: Some(NodeSchedulingPolicy::Draining),
|
||||
};
|
||||
|
||||
storcon_client
|
||||
.dispatch::<_, ()>(
|
||||
Method::PUT,
|
||||
format!("control/v1/node/{}/config", node_desc.id),
|
||||
Some(req),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Perform the drain: move each tenant shard scheduled on a node to
|
||||
// be drained to a node which is being filled. A simple round robin
|
||||
// strategy is used to pick the new node.
|
||||
let tenants = storcon_client
|
||||
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/tenant".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut selected_node_idx = 0;
|
||||
|
||||
struct DrainMove {
|
||||
tenant_shard_id: TenantShardId,
|
||||
from: NodeId,
|
||||
to: NodeId,
|
||||
}
|
||||
|
||||
let mut moves: Vec<DrainMove> = Vec::new();
|
||||
|
||||
let shards = tenants
|
||||
.into_iter()
|
||||
.flat_map(|tenant| tenant.shards.into_iter());
|
||||
for shard in shards {
|
||||
if let Some(max_shards) = max_shards {
|
||||
if moves.len() >= max_shards {
|
||||
println!(
|
||||
"Stop planning shard moves since the requested maximum was reached"
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let should_migrate = {
|
||||
if let Some(attached_to) = shard.node_attached {
|
||||
node_to_drain_descs
|
||||
.iter()
|
||||
.map(|desc| desc.id)
|
||||
.any(|id| id == attached_to)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
if !should_migrate {
|
||||
continue;
|
||||
}
|
||||
|
||||
moves.push(DrainMove {
|
||||
tenant_shard_id: shard.tenant_shard_id,
|
||||
from: shard
|
||||
.node_attached
|
||||
.expect("We only migrate attached tenant shards"),
|
||||
to: node_to_fill_descs[selected_node_idx].id,
|
||||
});
|
||||
selected_node_idx = (selected_node_idx + 1) % node_to_fill_descs.len();
|
||||
}
|
||||
|
||||
let total_moves = moves.len();
|
||||
|
||||
if dry_run == Some(true) {
|
||||
println!("Dryrun requested. Planned {total_moves} moves:");
|
||||
for mv in &moves {
|
||||
println!("{}: {} -> {}", mv.tenant_shard_id, mv.from, mv.to)
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
const DEFAULT_MIGRATE_CONCURRENCY: usize = 8;
|
||||
let mut stream = futures::stream::iter(moves)
|
||||
.map(|mv| {
|
||||
let client = Client::new(cli.api.clone(), cli.jwt.clone());
|
||||
async move {
|
||||
client
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id),
|
||||
Some(TenantShardMigrateRequest {
|
||||
tenant_shard_id: mv.tenant_shard_id,
|
||||
node_id: mv.to,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| (mv.tenant_shard_id, mv.from, mv.to, e))
|
||||
}
|
||||
})
|
||||
.buffered(concurrency.unwrap_or(DEFAULT_MIGRATE_CONCURRENCY));
|
||||
|
||||
let mut success = 0;
|
||||
let mut failure = 0;
|
||||
|
||||
while let Some(res) = stream.next().await {
|
||||
match res {
|
||||
Ok(_) => {
|
||||
success += 1;
|
||||
}
|
||||
Err((tenant_shard_id, from, to, error)) => {
|
||||
failure += 1;
|
||||
println!(
|
||||
"Failed to migrate {} from node {} to node {}: {}",
|
||||
tenant_shard_id, from, to, error
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (success + failure) % 20 == 0 {
|
||||
println!(
|
||||
"Processed {}/{} shards: {} succeeded, {} failed",
|
||||
success + failure,
|
||||
total_moves,
|
||||
success,
|
||||
failure
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
println!(
|
||||
"Processed {}/{} shards: {} succeeded, {} failed",
|
||||
success + failure,
|
||||
total_moves,
|
||||
success,
|
||||
failure
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -99,13 +99,6 @@ name = "async-executor"
|
||||
[[bans.deny]]
|
||||
name = "smol"
|
||||
|
||||
[[bans.deny]]
|
||||
# We want to use rustls instead of the platform's native tls implementation.
|
||||
name = "native-tls"
|
||||
|
||||
[[bans.deny]]
|
||||
name = "openssl"
|
||||
|
||||
# This section is considered when running `cargo deny check sources`.
|
||||
# More documentation about the 'sources' section can be found here:
|
||||
# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html
|
||||
|
||||
@@ -8,11 +8,6 @@ USER root
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl \
|
||||
jq \
|
||||
python3-pip \
|
||||
netcat
|
||||
#Faker is required for the pg_anon test
|
||||
RUN pip3 install Faker
|
||||
#This is required for the pg_hintplan test
|
||||
RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src
|
||||
|
||||
USER postgres
|
||||
USER postgres
|
||||
|
||||
@@ -95,7 +95,7 @@
|
||||
},
|
||||
{
|
||||
"name": "shared_preload_libraries",
|
||||
"value": "neon,pg_cron,timescaledb,pg_stat_statements",
|
||||
"value": "neon",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
@@ -127,16 +127,6 @@
|
||||
"name": "max_replication_flush_lag",
|
||||
"value": "10GB",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "cron.database",
|
||||
"value": "postgres",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "session_preload_libraries",
|
||||
"value": "anon",
|
||||
"vartype": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
version: '3'
|
||||
|
||||
services:
|
||||
minio:
|
||||
restart: always
|
||||
@@ -159,12 +161,12 @@ services:
|
||||
context: ./compute_wrapper/
|
||||
args:
|
||||
- REPOSITORY=${REPOSITORY:-neondatabase}
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-16}
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}
|
||||
- TAG=${TAG:-latest}
|
||||
- http_proxy=$http_proxy
|
||||
- https_proxy=$https_proxy
|
||||
environment:
|
||||
- PG_VERSION=${PG_VERSION:-16}
|
||||
- PG_VERSION=${PG_VERSION:-14}
|
||||
#- RUST_BACKTRACE=1
|
||||
# Mount the test files directly, for faster editing cycle.
|
||||
volumes:
|
||||
@@ -192,14 +194,3 @@ services:
|
||||
done"
|
||||
depends_on:
|
||||
- compute
|
||||
|
||||
neon-test-extensions:
|
||||
profiles: ["test-extensions"]
|
||||
image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest}
|
||||
entrypoint:
|
||||
- "/bin/bash"
|
||||
- "-c"
|
||||
command:
|
||||
- sleep 1800
|
||||
depends_on:
|
||||
- compute
|
||||
|
||||
@@ -7,94 +7,52 @@
|
||||
# Implicitly accepts `REPOSITORY` and `TAG` env vars that are passed into the compose file
|
||||
# Their defaults point at DockerHub `neondatabase/neon:latest` image.`,
|
||||
# to verify custom image builds (e.g pre-published ones).
|
||||
#
|
||||
# A test script for postgres extensions
|
||||
# Currently supports only v16
|
||||
#
|
||||
|
||||
set -eux -o pipefail
|
||||
|
||||
COMPOSE_FILE='docker-compose.yml'
|
||||
cd $(dirname $0)
|
||||
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
COMPOSE_FILE=$SCRIPT_DIR/docker-compose.yml
|
||||
|
||||
COMPUTE_CONTAINER_NAME=docker-compose-compute-1
|
||||
TEST_CONTAINER_NAME=docker-compose-neon-test-extensions-1
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
|
||||
: ${http_proxy:=}
|
||||
: ${https_proxy:=}
|
||||
export http_proxy https_proxy
|
||||
SQL="CREATE TABLE t(key int primary key, value text); insert into t values(1,1); select * from t;"
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -c '$SQL' postgres"
|
||||
|
||||
cleanup() {
|
||||
echo "show container information"
|
||||
docker ps
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE logs
|
||||
docker compose -f $COMPOSE_FILE logs
|
||||
echo "stop containers..."
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE down
|
||||
docker compose -f $COMPOSE_FILE down
|
||||
}
|
||||
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
|
||||
for pg_version in 14 15 16; do
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
PG_TEST_VERSION=$(($pg_version < 16 ? 16 : $pg_version))
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d
|
||||
echo "start containers (pg_version=$pg_version)."
|
||||
PG_VERSION=$pg_version docker compose -f $COMPOSE_FILE up --build -d
|
||||
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
cnt=0
|
||||
while sleep 3; do
|
||||
while sleep 1; do
|
||||
# check timeout
|
||||
cnt=`expr $cnt + 3`
|
||||
cnt=`expr $cnt + 1`
|
||||
if [ $cnt -gt 60 ]; then
|
||||
echo "timeout before the compute is ready."
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
if docker compose --profile test-extensions -f $COMPOSE_FILE logs "compute_is_ready" | grep -q "accepting connections"; then
|
||||
|
||||
# check if the compute is ready
|
||||
set +o pipefail
|
||||
result=`docker compose -f $COMPOSE_FILE logs "compute_is_ready" | grep "accepting connections" | wc -l`
|
||||
set -o pipefail
|
||||
if [ $result -eq 1 ]; then
|
||||
echo "OK. The compute is ready to connect."
|
||||
echo "execute simple queries."
|
||||
docker exec $COMPUTE_CONTAINER_NAME /bin/bash -c "psql $PSQL_OPTION"
|
||||
cleanup
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $pg_version -ge 16 ]
|
||||
then
|
||||
echo Enabling trust connection
|
||||
docker exec $COMPUTE_CONTAINER_NAME bash -c "sed -i '\$d' /var/db/postgres/compute/pg_hba.conf && echo -e 'host\t all\t all\t all\t trust' >> /var/db/postgres/compute/pg_hba.conf && psql $PSQL_OPTION -c 'select pg_reload_conf()' "
|
||||
echo Adding postgres role
|
||||
docker exec $COMPUTE_CONTAINER_NAME psql $PSQL_OPTION -c "CREATE ROLE postgres SUPERUSER LOGIN"
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
docker exec $COMPUTE_CONTAINER_NAME touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
# This block is required for the pg_anon extension test.
|
||||
# The test assumes that it is running on the same host with the postgres engine.
|
||||
# In our case it's not true, that's why we are copying files to the compute node
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_anon-src/data $TMPDIR/data
|
||||
echo -e '1\t too \t many \t tabs' > $TMPDIR/data/bad.csv
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/tmp/tmp_anon_alternate_data
|
||||
rm -rf $TMPDIR
|
||||
TMPDIR=$(mktemp -d)
|
||||
# The following block does the same for the pg_hintplan test
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
|
||||
rm -rf $TMPDIR
|
||||
# We are running tests now
|
||||
if docker exec -e SKIP=rum-src,timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
|
||||
then
|
||||
cleanup
|
||||
else
|
||||
FAILED=$(tail -1 testout.txt)
|
||||
for d in $FAILED
|
||||
do
|
||||
mkdir $d
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.diffs $d || true
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.out $d || true
|
||||
cat $d/regression.out $d/regression.diffs || true
|
||||
done
|
||||
rm -rf $FAILED
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
cleanup
|
||||
done
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
cd /ext-src
|
||||
FAILED=
|
||||
LIST=$((echo ${SKIP} | sed 's/,/\n/g'; ls -d *-src) | sort | uniq -u)
|
||||
for d in ${LIST}
|
||||
do
|
||||
[ -d ${d} ] || continue
|
||||
psql -c "select 1" >/dev/null || break
|
||||
make -C ${d} installcheck || FAILED="${d} ${FAILED}"
|
||||
done
|
||||
[ -z "${FAILED}" ] && exit 0
|
||||
echo ${FAILED}
|
||||
exit 1
|
||||
@@ -4,18 +4,18 @@
|
||||
|
||||
Currently we build two main images:
|
||||
|
||||
- [neondatabase/neon](https://hub.docker.com/repository/docker/neondatabase/neon) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14.
|
||||
- [neondatabase/neon](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [neondatabase/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres).
|
||||
|
||||
And additional intermediate image:
|
||||
|
||||
- [neondatabase/compute-tools](https://hub.docker.com/repository/docker/neondatabase/compute-tools) — compute node configuration management tools.
|
||||
|
||||
## Build pipeline
|
||||
## Building pipeline
|
||||
|
||||
We build all images after a successful `release` tests run and push automatically to Docker Hub with two parallel CI jobs
|
||||
|
||||
1. `neondatabase/compute-tools` and `neondatabase/compute-node-v16` (and -v15 and -v14)
|
||||
1. `neondatabase/compute-tools` and `neondatabase/compute-node`
|
||||
|
||||
2. `neondatabase/neon`
|
||||
|
||||
@@ -34,12 +34,12 @@ You can see a [docker compose](https://docs.docker.com/compose/) example to crea
|
||||
1. create containers
|
||||
|
||||
You can specify version of neon cluster using following environment values.
|
||||
- PG_VERSION: postgres version for compute (default is 16 as of this writing)
|
||||
- TAG: the tag version of [docker image](https://registry.hub.docker.com/r/neondatabase/neon/tags), which is tagged in [CI test](/.github/workflows/build_and_test.yml). Default is 'latest'
|
||||
- PG_VERSION: postgres version for compute (default is 14)
|
||||
- TAG: the tag version of [docker image](https://registry.hub.docker.com/r/neondatabase/neon/tags) (default is latest), which is tagged in [CI test](/.github/workflows/build_and_test.yml)
|
||||
```
|
||||
$ cd docker-compose/
|
||||
$ docker-compose down # remove the containers if exists
|
||||
$ PG_VERSION=16 TAG=latest docker-compose up --build -d # You can specify the postgres and image version
|
||||
$ PG_VERSION=15 TAG=2937 docker-compose up --build -d # You can specify the postgres and image version
|
||||
Creating network "dockercompose_default" with the default driver
|
||||
Creating docker-compose_storage_broker_1 ... done
|
||||
(...omit...)
|
||||
@@ -47,31 +47,29 @@ Creating docker-compose_storage_broker_1 ... done
|
||||
|
||||
2. connect compute node
|
||||
```
|
||||
$ psql postgresql://cloud_admin:cloud_admin@localhost:55433/postgres
|
||||
psql (16.3)
|
||||
Type "help" for help.
|
||||
|
||||
$ echo "localhost:55433:postgres:cloud_admin:cloud_admin" >> ~/.pgpass
|
||||
$ chmod 600 ~/.pgpass
|
||||
$ psql -h localhost -p 55433 -U cloud_admin
|
||||
postgres=# CREATE TABLE t(key int primary key, value text);
|
||||
CREATE TABLE
|
||||
postgres=# insert into t values(1, 1);
|
||||
postgres=# insert into t values(1,1);
|
||||
INSERT 0 1
|
||||
postgres=# select * from t;
|
||||
key | value
|
||||
key | value
|
||||
-----+-------
|
||||
1 | 1
|
||||
(1 row)
|
||||
|
||||
```
|
||||
|
||||
3. If you want to see the log, you can use `docker-compose logs` command.
|
||||
```
|
||||
# check the container name you want to see
|
||||
$ docker ps
|
||||
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
3582f6d76227 docker-compose_compute "/shell/compute.sh" 2 minutes ago Up 2 minutes 0.0.0.0:3080->3080/tcp, :::3080->3080/tcp, 0.0.0.0:55433->55433/tcp, :::55433->55433/tcp docker-compose_compute_1
|
||||
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
d6968a5ae912 dockercompose_compute "/shell/compute.sh" 5 minutes ago Up 5 minutes 0.0.0.0:3080->3080/tcp, 0.0.0.0:55433->55433/tcp dockercompose_compute_1
|
||||
(...omit...)
|
||||
|
||||
$ docker logs -f docker-compose_compute_1
|
||||
$ docker logs -f dockercompose_compute_1
|
||||
2022-10-21 06:15:48.757 GMT [56] LOG: connection authorized: user=cloud_admin database=postgres application_name=psql
|
||||
2022-10-21 06:17:00.307 GMT [56] LOG: [NEON_SMGR] libpagestore: connected to 'host=pageserver port=6400'
|
||||
(...omit...)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use anyhow::{bail, Result};
|
||||
use byteorder::{ByteOrder, BE};
|
||||
use bytes::BufMut;
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::RepOriginId;
|
||||
use postgres_ffi::{Oid, TransactionId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{fmt, ops::Range};
|
||||
@@ -39,9 +39,6 @@ pub const RELATION_SIZE_PREFIX: u8 = 0x61;
|
||||
/// The key prefix of AUX file keys.
|
||||
pub const AUX_KEY_PREFIX: u8 = 0x62;
|
||||
|
||||
/// The key prefix of ReplOrigin keys.
|
||||
pub const REPL_ORIGIN_KEY_PREFIX: u8 = 0x63;
|
||||
|
||||
/// Check if the key falls in the range of metadata keys.
|
||||
pub const fn is_metadata_key_slice(key: &[u8]) -> bool {
|
||||
key[0] >= METADATA_KEY_BEGIN_PREFIX && key[0] < METADATA_KEY_END_PREFIX
|
||||
@@ -56,8 +53,14 @@ impl Key {
|
||||
/// Encode a metadata key to a storage key.
|
||||
pub fn from_metadata_key_fixed_size(key: &[u8; METADATA_KEY_SIZE]) -> Self {
|
||||
assert!(is_metadata_key_slice(key), "key not in metadata key range");
|
||||
// Metadata key space ends at 0x7F so it's fine to directly convert it to i128.
|
||||
Self::from_i128(i128::from_be_bytes(*key))
|
||||
Key {
|
||||
field1: key[0],
|
||||
field2: u16::from_be_bytes(key[1..3].try_into().unwrap()) as u32,
|
||||
field3: u32::from_be_bytes(key[3..7].try_into().unwrap()),
|
||||
field4: u32::from_be_bytes(key[7..11].try_into().unwrap()),
|
||||
field5: key[11],
|
||||
field6: u32::from_be_bytes(key[12..16].try_into().unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode a metadata key to a storage key.
|
||||
@@ -65,6 +68,17 @@ impl Key {
|
||||
Self::from_metadata_key_fixed_size(key.try_into().expect("expect 16 byte metadata key"))
|
||||
}
|
||||
|
||||
/// Extract a metadata key to a writer. The result should always be 16 bytes.
|
||||
pub fn extract_metadata_key_to_writer(&self, mut writer: impl BufMut) {
|
||||
writer.put_u8(self.field1);
|
||||
assert!(self.field2 <= 0xFFFF);
|
||||
writer.put_u16(self.field2 as u16);
|
||||
writer.put_u32(self.field3);
|
||||
writer.put_u32(self.field4);
|
||||
writer.put_u8(self.field5);
|
||||
writer.put_u32(self.field6);
|
||||
}
|
||||
|
||||
/// Get the range of metadata keys.
|
||||
pub const fn metadata_key_range() -> Range<Self> {
|
||||
Key {
|
||||
@@ -107,7 +121,7 @@ impl Key {
|
||||
/// As long as Neon does not support tablespace (because of lack of access to local file system),
|
||||
/// we can assume that only some predefined namespace OIDs are used which can fit in u16
|
||||
pub fn to_i128(&self) -> i128 {
|
||||
assert!(self.field2 <= 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222);
|
||||
assert!(self.field2 < 0xFFFF || self.field2 == 0xFFFFFFFF || self.field2 == 0x22222222);
|
||||
(((self.field1 & 0x7F) as i128) << 120)
|
||||
| (((self.field2 & 0xFFFF) as i128) << 104)
|
||||
| ((self.field3 as i128) << 72)
|
||||
@@ -161,7 +175,7 @@ impl Key {
|
||||
}
|
||||
|
||||
/// Convert a 18B slice to a key. This function should not be used for metadata keys because field2 is handled differently.
|
||||
/// Use [`Key::from_i128`] instead if you want to handle 16B keys (i.e., metadata keys).
|
||||
/// Use [`Key::from_metadata_key`] instead.
|
||||
pub fn from_slice(b: &[u8]) -> Self {
|
||||
Key {
|
||||
field1: b[0],
|
||||
@@ -174,7 +188,7 @@ impl Key {
|
||||
}
|
||||
|
||||
/// Convert a key to a 18B slice. This function should not be used for metadata keys because field2 is handled differently.
|
||||
/// Use [`Key::to_i128`] instead if you want to get a 16B key (i.e., metadata keys).
|
||||
/// Use [`Key::extract_metadata_key_to_writer`] instead.
|
||||
pub fn write_to_byte_slice(&self, buf: &mut [u8]) {
|
||||
buf[0] = self.field1;
|
||||
BE::write_u32(&mut buf[1..5], self.field2);
|
||||
@@ -385,14 +399,7 @@ pub fn rel_size_to_key(rel: RelTag) -> Key {
|
||||
field3: rel.dbnode,
|
||||
field4: rel.relnode,
|
||||
field5: rel.forknum,
|
||||
field6: 0xffff_ffff,
|
||||
}
|
||||
}
|
||||
|
||||
impl Key {
|
||||
#[inline(always)]
|
||||
pub fn is_rel_size_key(&self) -> bool {
|
||||
self.field1 == 0 && self.field6 == u32::MAX
|
||||
field6: 0xffffffff,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -433,25 +440,6 @@ pub fn slru_dir_to_key(kind: SlruKind) -> Key {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn slru_dir_kind(key: &Key) -> Option<Result<SlruKind, u32>> {
|
||||
if key.field1 == 0x01
|
||||
&& key.field3 == 0
|
||||
&& key.field4 == 0
|
||||
&& key.field5 == 0
|
||||
&& key.field6 == 0
|
||||
{
|
||||
match key.field2 {
|
||||
0 => Some(Ok(SlruKind::Clog)),
|
||||
1 => Some(Ok(SlruKind::MultiXactMembers)),
|
||||
2 => Some(Ok(SlruKind::MultiXactOffsets)),
|
||||
x => Some(Err(x)),
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
|
||||
Key {
|
||||
@@ -480,17 +468,7 @@ pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
|
||||
field3: 1,
|
||||
field4: segno,
|
||||
field5: 0,
|
||||
field6: 0xffff_ffff,
|
||||
}
|
||||
}
|
||||
|
||||
impl Key {
|
||||
pub fn is_slru_segment_size_key(&self) -> bool {
|
||||
self.field1 == 0x01
|
||||
&& self.field2 < 0x03
|
||||
&& self.field3 == 0x01
|
||||
&& self.field5 == 0
|
||||
&& self.field6 == u32::MAX
|
||||
field6: 0xffffffff,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -591,37 +569,6 @@ pub const AUX_FILES_KEY: Key = Key {
|
||||
field6: 2,
|
||||
};
|
||||
|
||||
#[inline(always)]
|
||||
pub fn repl_origin_key(origin_id: RepOriginId) -> Key {
|
||||
Key {
|
||||
field1: REPL_ORIGIN_KEY_PREFIX,
|
||||
field2: 0,
|
||||
field3: 0,
|
||||
field4: 0,
|
||||
field5: 0,
|
||||
field6: origin_id as u32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the range of replorigin keys.
|
||||
pub fn repl_origin_key_range() -> Range<Key> {
|
||||
Key {
|
||||
field1: REPL_ORIGIN_KEY_PREFIX,
|
||||
field2: 0,
|
||||
field3: 0,
|
||||
field4: 0,
|
||||
field5: 0,
|
||||
field6: 0,
|
||||
}..Key {
|
||||
field1: REPL_ORIGIN_KEY_PREFIX,
|
||||
field2: 0,
|
||||
field3: 0,
|
||||
field4: 0,
|
||||
field5: 0,
|
||||
field6: 0x10000,
|
||||
}
|
||||
}
|
||||
|
||||
// Reverse mappings for a few Keys.
|
||||
// These are needed by WAL redo manager.
|
||||
|
||||
@@ -630,78 +577,73 @@ pub const NON_INHERITED_RANGE: Range<Key> = AUX_FILES_KEY..AUX_FILES_KEY.next();
|
||||
/// Sparse keyspace range for vectored get. Missing key error will be ignored for this range.
|
||||
pub const NON_INHERITED_SPARSE_RANGE: Range<Key> = Key::metadata_key_range();
|
||||
|
||||
impl Key {
|
||||
// AUX_FILES currently stores only data for logical replication (slots etc), and
|
||||
// we don't preserve these on a branch because safekeepers can't follow timeline
|
||||
// switch (and generally it likely should be optional), so ignore these.
|
||||
#[inline(always)]
|
||||
pub fn is_inherited_key(self) -> bool {
|
||||
!NON_INHERITED_RANGE.contains(&self) && !NON_INHERITED_SPARSE_RANGE.contains(&self)
|
||||
}
|
||||
// AUX_FILES currently stores only data for logical replication (slots etc), and
|
||||
// we don't preserve these on a branch because safekeepers can't follow timeline
|
||||
// switch (and generally it likely should be optional), so ignore these.
|
||||
#[inline(always)]
|
||||
pub fn is_inherited_key(key: Key) -> bool {
|
||||
!NON_INHERITED_RANGE.contains(&key) && !NON_INHERITED_SPARSE_RANGE.contains(&key)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_rel_fsm_block_key(self) -> bool {
|
||||
self.field1 == 0x00
|
||||
&& self.field4 != 0
|
||||
&& self.field5 == FSM_FORKNUM
|
||||
&& self.field6 != 0xffffffff
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn is_rel_fsm_block_key(key: Key) -> bool {
|
||||
key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_rel_vm_block_key(self) -> bool {
|
||||
self.field1 == 0x00
|
||||
&& self.field4 != 0
|
||||
&& self.field5 == VISIBILITYMAP_FORKNUM
|
||||
&& self.field6 != 0xffffffff
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn is_rel_vm_block_key(key: Key) -> bool {
|
||||
key.field1 == 0x00
|
||||
&& key.field4 != 0
|
||||
&& key.field5 == VISIBILITYMAP_FORKNUM
|
||||
&& key.field6 != 0xffffffff
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn to_slru_block(self) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
|
||||
Ok(match self.field1 {
|
||||
0x01 => {
|
||||
let kind = match self.field2 {
|
||||
0x00 => SlruKind::Clog,
|
||||
0x01 => SlruKind::MultiXactMembers,
|
||||
0x02 => SlruKind::MultiXactOffsets,
|
||||
_ => anyhow::bail!("unrecognized slru kind 0x{:02x}", self.field2),
|
||||
};
|
||||
let segno = self.field4;
|
||||
let blknum = self.field6;
|
||||
#[inline(always)]
|
||||
pub fn key_to_slru_block(key: Key) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
|
||||
Ok(match key.field1 {
|
||||
0x01 => {
|
||||
let kind = match key.field2 {
|
||||
0x00 => SlruKind::Clog,
|
||||
0x01 => SlruKind::MultiXactMembers,
|
||||
0x02 => SlruKind::MultiXactOffsets,
|
||||
_ => anyhow::bail!("unrecognized slru kind 0x{:02x}", key.field2),
|
||||
};
|
||||
let segno = key.field4;
|
||||
let blknum = key.field6;
|
||||
|
||||
(kind, segno, blknum)
|
||||
}
|
||||
_ => anyhow::bail!("unexpected value kind 0x{:02x}", self.field1),
|
||||
})
|
||||
}
|
||||
(kind, segno, blknum)
|
||||
}
|
||||
_ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_slru_block_key(self) -> bool {
|
||||
self.field1 == 0x01 // SLRU-related
|
||||
&& self.field3 == 0x00000001 // but not SlruDir
|
||||
&& self.field6 != 0xffffffff // and not SlruSegSize
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn is_slru_block_key(key: Key) -> bool {
|
||||
key.field1 == 0x01 // SLRU-related
|
||||
&& key.field3 == 0x00000001 // but not SlruDir
|
||||
&& key.field6 != 0xffffffff // and not SlruSegSize
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_rel_block_key(&self) -> bool {
|
||||
self.field1 == 0x00 && self.field4 != 0 && self.field6 != 0xffffffff
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn is_rel_block_key(key: &Key) -> bool {
|
||||
key.field1 == 0x00 && key.field4 != 0 && key.field6 != 0xffffffff
|
||||
}
|
||||
|
||||
/// Guaranteed to return `Ok()` if [`Self::is_rel_block_key`] returns `true` for `key`.
|
||||
#[inline(always)]
|
||||
pub fn to_rel_block(self) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
Ok(match self.field1 {
|
||||
0x00 => (
|
||||
RelTag {
|
||||
spcnode: self.field2,
|
||||
dbnode: self.field3,
|
||||
relnode: self.field4,
|
||||
forknum: self.field5,
|
||||
},
|
||||
self.field6,
|
||||
),
|
||||
_ => anyhow::bail!("unexpected value kind 0x{:02x}", self.field1),
|
||||
})
|
||||
}
|
||||
/// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
|
||||
#[inline(always)]
|
||||
pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
|
||||
Ok(match key.field1 {
|
||||
0x00 => (
|
||||
RelTag {
|
||||
spcnode: key.field2,
|
||||
dbnode: key.field3,
|
||||
relnode: key.field4,
|
||||
forknum: key.field5,
|
||||
},
|
||||
key.field6,
|
||||
),
|
||||
_ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
|
||||
})
|
||||
}
|
||||
|
||||
impl std::str::FromStr for Key {
|
||||
@@ -745,15 +687,10 @@ mod tests {
|
||||
let mut metadata_key = vec![AUX_KEY_PREFIX];
|
||||
metadata_key.extend_from_slice(&[0xFF; 15]);
|
||||
let encoded_key = Key::from_metadata_key(&metadata_key);
|
||||
let output_key = encoded_key.to_i128().to_be_bytes();
|
||||
let mut output_key = Vec::new();
|
||||
encoded_key.extract_metadata_key_to_writer(&mut output_key);
|
||||
assert_eq!(metadata_key, output_key);
|
||||
assert!(encoded_key.is_metadata_key());
|
||||
assert!(is_metadata_key_slice(&metadata_key));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_possible_largest_key() {
|
||||
Key::from_i128(0x7FFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF_FFFF);
|
||||
// TODO: put this key into the system and see if anything breaks.
|
||||
}
|
||||
}
|
||||
|
||||
@@ -558,12 +558,6 @@ impl KeySpaceRandomAccum {
|
||||
self.ranges.push(range);
|
||||
}
|
||||
|
||||
pub fn add_keyspace(&mut self, keyspace: KeySpace) {
|
||||
for range in keyspace.ranges {
|
||||
self.add_range(range);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_keyspace(mut self) -> KeySpace {
|
||||
let mut ranges = Vec::new();
|
||||
if !self.ranges.is_empty() {
|
||||
|
||||
@@ -451,6 +451,8 @@ impl EvictionPolicy {
|
||||
)]
|
||||
#[strum(serialize_all = "kebab-case")]
|
||||
pub enum CompactionAlgorithm {
|
||||
#[strum(disabled)]
|
||||
NotSpecified,
|
||||
Legacy,
|
||||
Tiered,
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
|
||||
use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
|
||||
use postgres_ffi::relfile_utils::{forkname_to_number, forknumber_to_name, MAIN_FORKNUM};
|
||||
use postgres_ffi::relfile_utils::forknumber_to_name;
|
||||
use postgres_ffi::Oid;
|
||||
|
||||
///
|
||||
@@ -68,57 +68,6 @@ impl fmt::Display for RelTag {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ParseRelTagError {
|
||||
#[error("invalid forknum")]
|
||||
InvalidForknum(#[source] std::num::ParseIntError),
|
||||
#[error("missing triplet member {}", .0)]
|
||||
MissingTripletMember(usize),
|
||||
#[error("invalid triplet member {}", .0)]
|
||||
InvalidTripletMember(usize, #[source] std::num::ParseIntError),
|
||||
}
|
||||
|
||||
impl std::str::FromStr for RelTag {
|
||||
type Err = ParseRelTagError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
use ParseRelTagError::*;
|
||||
|
||||
// FIXME: in postgres logs this separator is dot
|
||||
// Example:
|
||||
// could not read block 2 in rel 1663/208101/2620.1 from page server at lsn 0/2431E6F0
|
||||
// with a regex we could get this more painlessly
|
||||
let (triplet, forknum) = match s.split_once('_').or_else(|| s.split_once('.')) {
|
||||
Some((t, f)) => {
|
||||
let forknum = forkname_to_number(Some(f));
|
||||
let forknum = if let Ok(f) = forknum {
|
||||
f
|
||||
} else {
|
||||
f.parse::<u8>().map_err(InvalidForknum)?
|
||||
};
|
||||
|
||||
(t, Some(forknum))
|
||||
}
|
||||
None => (s, None),
|
||||
};
|
||||
|
||||
let mut split = triplet
|
||||
.splitn(3, '/')
|
||||
.enumerate()
|
||||
.map(|(i, s)| s.parse::<u32>().map_err(|e| InvalidTripletMember(i, e)));
|
||||
let spcnode = split.next().ok_or(MissingTripletMember(0))??;
|
||||
let dbnode = split.next().ok_or(MissingTripletMember(1))??;
|
||||
let relnode = split.next().ok_or(MissingTripletMember(2))??;
|
||||
|
||||
Ok(RelTag {
|
||||
spcnode,
|
||||
forknum: forknum.unwrap_or(MAIN_FORKNUM),
|
||||
dbnode,
|
||||
relnode,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl RelTag {
|
||||
pub fn to_segfile_name(&self, segno: u32) -> String {
|
||||
let mut name = if self.spcnode == GLOBALTABLESPACE_OID {
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
use std::{ops::RangeInclusive, str::FromStr};
|
||||
|
||||
use crate::{key::Key, models::ShardParameters};
|
||||
use crate::{
|
||||
key::{is_rel_block_key, Key},
|
||||
models::ShardParameters,
|
||||
};
|
||||
use hex::FromHex;
|
||||
use postgres_ffi::relfile_utils::INIT_FORKNUM;
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -425,12 +428,6 @@ impl<'de> Deserialize<'de> for TenantShardId {
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
|
||||
pub struct ShardStripeSize(pub u32);
|
||||
|
||||
impl Default for ShardStripeSize {
|
||||
fn default() -> Self {
|
||||
DEFAULT_STRIPE_SIZE
|
||||
}
|
||||
}
|
||||
|
||||
/// Layout version: for future upgrades where we might change how the key->shard mapping works
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)]
|
||||
pub struct ShardLayout(u8);
|
||||
@@ -669,7 +666,7 @@ fn key_is_shard0(key: &Key) -> bool {
|
||||
// because they must be included in basebackups.
|
||||
let is_initfork = key.field5 == INIT_FORKNUM;
|
||||
|
||||
!key.is_rel_block_key() || is_initfork
|
||||
!is_rel_block_key(key) || is_initfork
|
||||
}
|
||||
|
||||
/// Provide the same result as the function in postgres `hashfn.h` with the same name
|
||||
@@ -716,25 +713,6 @@ fn key_to_shard_number(count: ShardCount, stripe_size: ShardStripeSize, key: &Ke
|
||||
ShardNumber((hash % count.0 as u32) as u8)
|
||||
}
|
||||
|
||||
/// For debugging, while not exposing the internals.
|
||||
#[derive(Debug)]
|
||||
#[allow(unused)] // used by debug formatting by pagectl
|
||||
struct KeyShardingInfo {
|
||||
shard0: bool,
|
||||
shard_number: ShardNumber,
|
||||
}
|
||||
|
||||
pub fn describe(
|
||||
key: &Key,
|
||||
shard_count: ShardCount,
|
||||
stripe_size: ShardStripeSize,
|
||||
) -> impl std::fmt::Debug {
|
||||
KeyShardingInfo {
|
||||
shard0: key_is_shard0(key),
|
||||
shard_number: key_to_shard_number(shard_count, stripe_size, key),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use utils::Hex;
|
||||
|
||||
@@ -144,7 +144,20 @@ impl PgConnectionConfig {
|
||||
// implement and this function is hardly a bottleneck. The function is only called around
|
||||
// establishing a new connection.
|
||||
#[allow(unstable_name_collisions)]
|
||||
config.options(&encode_options(&self.options));
|
||||
config.options(
|
||||
&self
|
||||
.options
|
||||
.iter()
|
||||
.map(|s| {
|
||||
if s.contains(['\\', ' ']) {
|
||||
Cow::Owned(s.replace('\\', "\\\\").replace(' ', "\\ "))
|
||||
} else {
|
||||
Cow::Borrowed(s.as_str())
|
||||
}
|
||||
})
|
||||
.intersperse(Cow::Borrowed(" ")) // TODO: use impl from std once it's stabilized
|
||||
.collect::<String>(),
|
||||
);
|
||||
}
|
||||
config
|
||||
}
|
||||
@@ -165,21 +178,6 @@ impl PgConnectionConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unstable_name_collisions)]
|
||||
fn encode_options(options: &[String]) -> String {
|
||||
options
|
||||
.iter()
|
||||
.map(|s| {
|
||||
if s.contains(['\\', ' ']) {
|
||||
Cow::Owned(s.replace('\\', "\\\\").replace(' ', "\\ "))
|
||||
} else {
|
||||
Cow::Borrowed(s.as_str())
|
||||
}
|
||||
})
|
||||
.intersperse(Cow::Borrowed(" ")) // TODO: use impl from std once it's stabilized
|
||||
.collect::<String>()
|
||||
}
|
||||
|
||||
impl fmt::Display for PgConnectionConfig {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
// The password is intentionally hidden and not part of this display string.
|
||||
@@ -208,7 +206,7 @@ impl fmt::Debug for PgConnectionConfig {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests_pg_connection_config {
|
||||
use crate::{encode_options, PgConnectionConfig};
|
||||
use crate::PgConnectionConfig;
|
||||
use once_cell::sync::Lazy;
|
||||
use url::Host;
|
||||
|
||||
@@ -257,12 +255,18 @@ mod tests_pg_connection_config {
|
||||
|
||||
#[test]
|
||||
fn test_with_options() {
|
||||
let options = encode_options(&[
|
||||
"hello".to_owned(),
|
||||
"world".to_owned(),
|
||||
"with space".to_owned(),
|
||||
"and \\ backslashes".to_owned(),
|
||||
let cfg = PgConnectionConfig::new_host_port(STUB_HOST.clone(), 123).extend_options([
|
||||
"hello",
|
||||
"world",
|
||||
"with space",
|
||||
"and \\ backslashes",
|
||||
]);
|
||||
assert_eq!(options, "hello world with\\ space and\\ \\\\\\ backslashes");
|
||||
assert_eq!(cfg.host(), &*STUB_HOST);
|
||||
assert_eq!(cfg.port(), 123);
|
||||
assert_eq!(cfg.raw_address(), "stub.host.example:123");
|
||||
assert_eq!(
|
||||
cfg.to_tokio_postgres_config().get_options(),
|
||||
Some("hello world with\\ space and\\ \\\\\\ backslashes")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -126,7 +126,6 @@ fn main() -> anyhow::Result<()> {
|
||||
.allowlist_type("PageHeaderData")
|
||||
.allowlist_type("DBState")
|
||||
.allowlist_type("RelMapFile")
|
||||
.allowlist_type("RepOriginId")
|
||||
// Because structs are used for serialization, tell bindgen to emit
|
||||
// explicit padding fields.
|
||||
.explicit_padding(true)
|
||||
|
||||
@@ -110,7 +110,6 @@ pub mod pg_constants;
|
||||
pub mod relfile_utils;
|
||||
|
||||
// Export some widely used datatypes that are unlikely to change across Postgres versions
|
||||
pub use v14::bindings::RepOriginId;
|
||||
pub use v14::bindings::{uint32, uint64, Oid};
|
||||
pub use v14::bindings::{BlockNumber, OffsetNumber};
|
||||
pub use v14::bindings::{MultiXactId, TransactionId};
|
||||
|
||||
@@ -102,7 +102,7 @@ pub const XACT_XINFO_HAS_SUBXACTS: u32 = 1u32 << 1;
|
||||
pub const XACT_XINFO_HAS_RELFILENODES: u32 = 1u32 << 2;
|
||||
pub const XACT_XINFO_HAS_INVALS: u32 = 1u32 << 3;
|
||||
pub const XACT_XINFO_HAS_TWOPHASE: u32 = 1u32 << 4;
|
||||
pub const XACT_XINFO_HAS_ORIGIN: u32 = 1u32 << 5;
|
||||
// pub const XACT_XINFO_HAS_ORIGIN: u32 = 1u32 << 5;
|
||||
// pub const XACT_XINFO_HAS_AE_LOCKS: u32 = 1u32 << 6;
|
||||
// pub const XACT_XINFO_HAS_GID: u32 = 1u32 << 7;
|
||||
|
||||
@@ -167,7 +167,6 @@ pub const RM_RELMAP_ID: u8 = 7;
|
||||
pub const RM_STANDBY_ID: u8 = 8;
|
||||
pub const RM_HEAP2_ID: u8 = 9;
|
||||
pub const RM_HEAP_ID: u8 = 10;
|
||||
pub const RM_REPLORIGIN_ID: u8 = 19;
|
||||
pub const RM_LOGICALMSG_ID: u8 = 21;
|
||||
|
||||
// from neon_rmgr.h
|
||||
@@ -224,10 +223,6 @@ pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
|
||||
pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
|
||||
pub const XLP_LONG_HEADER: u16 = 0x0002;
|
||||
|
||||
/* From xlog.h */
|
||||
pub const XLOG_REPLORIGIN_SET: u8 = 0x00;
|
||||
pub const XLOG_REPLORIGIN_DROP: u8 = 0x10;
|
||||
|
||||
/* From replication/slot.h */
|
||||
pub const REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN: usize = 4*4 /* offset of `slotdata` in ReplicationSlotOnDisk */
|
||||
+ 64 /* NameData */ + 4*4;
|
||||
@@ -242,9 +237,6 @@ pub const SLOTS_PER_FSM_PAGE: u32 = FSM_LEAF_NODES_PER_PAGE as u32;
|
||||
pub const VM_HEAPBLOCKS_PER_PAGE: u32 =
|
||||
(BLCKSZ as usize - SIZEOF_PAGE_HEADER_DATA) as u32 * (8 / 2); // MAPSIZE * (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
|
||||
|
||||
/* From origin.c */
|
||||
pub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE;
|
||||
|
||||
// List of subdirectories inside pgdata.
|
||||
// Copied from src/bin/initdb/initdb.c
|
||||
pub const PGDATA_SUBDIRS: [&str; 22] = [
|
||||
|
||||
@@ -7,7 +7,6 @@ license.workspace = true
|
||||
[dependencies]
|
||||
bytes.workspace = true
|
||||
byteorder.workspace = true
|
||||
itertools.workspace = true
|
||||
pin-project-lite.workspace = true
|
||||
postgres-protocol.workspace = true
|
||||
rand.workspace = true
|
||||
|
||||
@@ -7,9 +7,8 @@ pub mod framed;
|
||||
|
||||
use byteorder::{BigEndian, ReadBytesExt};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use itertools::Itertools;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{borrow::Cow, fmt, io, str};
|
||||
use std::{borrow::Cow, collections::HashMap, fmt, io, str};
|
||||
|
||||
// re-export for use in utils pageserver_feedback.rs
|
||||
pub use postgres_protocol::PG_EPOCH;
|
||||
@@ -51,37 +50,15 @@ pub enum FeStartupPacket {
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct StartupMessageParamsBuilder {
|
||||
params: BytesMut,
|
||||
}
|
||||
|
||||
impl StartupMessageParamsBuilder {
|
||||
/// Set parameter's value by its name.
|
||||
/// name and value must not contain a \0 byte
|
||||
pub fn insert(&mut self, name: &str, value: &str) {
|
||||
self.params.put(name.as_bytes());
|
||||
self.params.put(&b"\0"[..]);
|
||||
self.params.put(value.as_bytes());
|
||||
self.params.put(&b"\0"[..]);
|
||||
}
|
||||
|
||||
pub fn freeze(self) -> StartupMessageParams {
|
||||
StartupMessageParams {
|
||||
params: self.params.freeze(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
#[derive(Debug)]
|
||||
pub struct StartupMessageParams {
|
||||
params: Bytes,
|
||||
params: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl StartupMessageParams {
|
||||
/// Get parameter's value by its name.
|
||||
pub fn get(&self, name: &str) -> Option<&str> {
|
||||
self.iter().find_map(|(k, v)| (k == name).then_some(v))
|
||||
self.params.get(name).map(|s| s.as_str())
|
||||
}
|
||||
|
||||
/// Split command-line options according to PostgreSQL's logic,
|
||||
@@ -135,19 +112,15 @@ impl StartupMessageParams {
|
||||
|
||||
/// Iterate through key-value pairs in an arbitrary order.
|
||||
pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {
|
||||
let params =
|
||||
std::str::from_utf8(&self.params).expect("should be validated as utf8 already");
|
||||
params.split_terminator('\0').tuples()
|
||||
self.params.iter().map(|(k, v)| (k.as_str(), v.as_str()))
|
||||
}
|
||||
|
||||
// This function is mostly useful in tests.
|
||||
#[doc(hidden)]
|
||||
pub fn new<'a, const N: usize>(pairs: [(&'a str, &'a str); N]) -> Self {
|
||||
let mut b = StartupMessageParamsBuilder::default();
|
||||
for (k, v) in pairs {
|
||||
b.insert(k, v)
|
||||
Self {
|
||||
params: pairs.map(|(k, v)| (k.to_owned(), v.to_owned())).into(),
|
||||
}
|
||||
b.freeze()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -372,21 +345,35 @@ impl FeStartupPacket {
|
||||
(major_version, minor_version) => {
|
||||
// StartupMessage
|
||||
|
||||
let s = str::from_utf8(&msg).map_err(|_e| {
|
||||
ProtocolError::BadMessage("StartupMessage params: invalid utf-8".to_owned())
|
||||
})?;
|
||||
let s = s.strip_suffix('\0').ok_or_else(|| {
|
||||
ProtocolError::Protocol(
|
||||
"StartupMessage params: missing null terminator".to_string(),
|
||||
)
|
||||
})?;
|
||||
// Parse pairs of null-terminated strings (key, value).
|
||||
// See `postgres: ProcessStartupPacket, build_startup_packet`.
|
||||
let mut tokens = str::from_utf8(&msg)
|
||||
.map_err(|_e| {
|
||||
ProtocolError::BadMessage("StartupMessage params: invalid utf-8".to_owned())
|
||||
})?
|
||||
.strip_suffix('\0') // drop packet's own null
|
||||
.ok_or_else(|| {
|
||||
ProtocolError::Protocol(
|
||||
"StartupMessage params: missing null terminator".to_string(),
|
||||
)
|
||||
})?
|
||||
.split_terminator('\0');
|
||||
|
||||
let mut params = HashMap::new();
|
||||
while let Some(name) = tokens.next() {
|
||||
let value = tokens.next().ok_or_else(|| {
|
||||
ProtocolError::Protocol(
|
||||
"StartupMessage params: key without value".to_string(),
|
||||
)
|
||||
})?;
|
||||
|
||||
params.insert(name.to_owned(), value.to_owned());
|
||||
}
|
||||
|
||||
FeStartupPacket::StartupMessage {
|
||||
major_version,
|
||||
minor_version,
|
||||
params: StartupMessageParams {
|
||||
params: msg.slice_ref(s.as_bytes()),
|
||||
},
|
||||
params: StartupMessageParams { params },
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fmt::Display;
|
||||
use std::io;
|
||||
use std::num::NonZeroU32;
|
||||
use std::pin::Pin;
|
||||
@@ -27,15 +26,13 @@ use futures::stream::Stream;
|
||||
use futures_util::StreamExt;
|
||||
use futures_util::TryStreamExt;
|
||||
use http_types::{StatusCode, Url};
|
||||
use scopeguard::ScopeGuard;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::debug;
|
||||
use utils::backoff;
|
||||
|
||||
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
|
||||
use crate::{
|
||||
error::Cancelled, AzureConfig, ConcurrencyLimiter, Download, DownloadError, Listing,
|
||||
ListingMode, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel,
|
||||
error::Cancelled, s3_bucket::RequestKind, AzureConfig, ConcurrencyLimiter, Download,
|
||||
DownloadError, Listing, ListingMode, RemotePath, RemoteStorage, StorageMetadata,
|
||||
TimeTravelError, TimeoutOrCancel,
|
||||
};
|
||||
|
||||
pub struct AzureBlobStorage {
|
||||
@@ -140,8 +137,6 @@ impl AzureBlobStorage {
|
||||
let mut last_modified = None;
|
||||
let mut metadata = HashMap::new();
|
||||
|
||||
let started_at = start_measuring_requests(kind);
|
||||
|
||||
let download = async {
|
||||
let response = builder
|
||||
// convert to concrete Pageable
|
||||
@@ -205,22 +200,13 @@ impl AzureBlobStorage {
|
||||
})
|
||||
};
|
||||
|
||||
let download = tokio::select! {
|
||||
tokio::select! {
|
||||
bufs = download => bufs,
|
||||
cancel_or_timeout = cancel_or_timeout => match cancel_or_timeout {
|
||||
TimeoutOrCancel::Timeout => return Err(DownloadError::Timeout),
|
||||
TimeoutOrCancel::Cancel => return Err(DownloadError::Cancelled),
|
||||
TimeoutOrCancel::Timeout => Err(DownloadError::Timeout),
|
||||
TimeoutOrCancel::Cancel => Err(DownloadError::Cancelled),
|
||||
},
|
||||
};
|
||||
let started_at = ScopeGuard::into_inner(started_at);
|
||||
let outcome = match &download {
|
||||
Ok(_) => AttemptOutcome::Ok,
|
||||
Err(_) => AttemptOutcome::Err,
|
||||
};
|
||||
crate::metrics::BUCKET_METRICS
|
||||
.req_seconds
|
||||
.observe_elapsed(kind, outcome, started_at);
|
||||
download
|
||||
}
|
||||
}
|
||||
|
||||
async fn permit(
|
||||
@@ -354,10 +340,7 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
metadata: Option<StorageMetadata>,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let kind = RequestKind::Put;
|
||||
let _permit = self.permit(kind, cancel).await?;
|
||||
|
||||
let started_at = start_measuring_requests(kind);
|
||||
let _permit = self.permit(RequestKind::Put, cancel).await?;
|
||||
|
||||
let op = async {
|
||||
let blob_client = self.client.blob_client(self.relative_path_to_name(to));
|
||||
@@ -381,25 +364,14 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
match fut.await {
|
||||
Ok(Ok(_response)) => Ok(()),
|
||||
Ok(Err(azure)) => Err(azure.into()),
|
||||
Err(_timeout) => Err(TimeoutOrCancel::Timeout.into()),
|
||||
Err(_timeout) => Err(TimeoutOrCancel::Cancel.into()),
|
||||
}
|
||||
};
|
||||
|
||||
let res = tokio::select! {
|
||||
tokio::select! {
|
||||
res = op => res,
|
||||
_ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),
|
||||
};
|
||||
|
||||
let outcome = match res {
|
||||
Ok(_) => AttemptOutcome::Ok,
|
||||
Err(_) => AttemptOutcome::Err,
|
||||
};
|
||||
let started_at = ScopeGuard::into_inner(started_at);
|
||||
crate::metrics::BUCKET_METRICS
|
||||
.req_seconds
|
||||
.observe_elapsed(kind, outcome, started_at);
|
||||
|
||||
res
|
||||
_ = cancel.cancelled() => Err(TimeoutOrCancel::Cancel.into()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn download(
|
||||
@@ -445,79 +417,40 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
paths: &'a [RemotePath],
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let kind = RequestKind::Delete;
|
||||
let _permit = self.permit(kind, cancel).await?;
|
||||
let started_at = start_measuring_requests(kind);
|
||||
let _permit = self.permit(RequestKind::Delete, cancel).await?;
|
||||
|
||||
let op = async {
|
||||
// TODO batch requests are not supported by the SDK
|
||||
// TODO batch requests are also not supported by the SDK
|
||||
// https://github.com/Azure/azure-sdk-for-rust/issues/1068
|
||||
// https://github.com/Azure/azure-sdk-for-rust/issues/1249
|
||||
for path in paths {
|
||||
#[derive(Debug)]
|
||||
enum AzureOrTimeout {
|
||||
AzureError(azure_core::Error),
|
||||
Timeout,
|
||||
Cancel,
|
||||
}
|
||||
impl Display for AzureOrTimeout {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{self:?}")
|
||||
}
|
||||
}
|
||||
let warn_threshold = 3;
|
||||
let max_retries = 5;
|
||||
backoff::retry(
|
||||
|| async {
|
||||
let blob_client = self.client.blob_client(self.relative_path_to_name(path));
|
||||
let blob_client = self.client.blob_client(self.relative_path_to_name(path));
|
||||
|
||||
let request = blob_client.delete().into_future();
|
||||
let request = blob_client.delete().into_future();
|
||||
|
||||
let res = tokio::time::timeout(self.timeout, request).await;
|
||||
let res = tokio::time::timeout(self.timeout, request).await;
|
||||
|
||||
match res {
|
||||
Ok(Ok(_v)) => Ok(()),
|
||||
Ok(Err(azure_err)) => {
|
||||
if let Some(http_err) = azure_err.as_http_error() {
|
||||
if http_err.status() == StatusCode::NotFound {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(AzureOrTimeout::AzureError(azure_err))
|
||||
match res {
|
||||
Ok(Ok(_response)) => continue,
|
||||
Ok(Err(e)) => {
|
||||
if let Some(http_err) = e.as_http_error() {
|
||||
if http_err.status() == StatusCode::NotFound {
|
||||
continue;
|
||||
}
|
||||
Err(_elapsed) => Err(AzureOrTimeout::Timeout),
|
||||
}
|
||||
},
|
||||
|err| match err {
|
||||
AzureOrTimeout::AzureError(_) | AzureOrTimeout::Timeout => false,
|
||||
AzureOrTimeout::Cancel => true,
|
||||
},
|
||||
warn_threshold,
|
||||
max_retries,
|
||||
"deleting remote object",
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
.ok_or_else(|| AzureOrTimeout::Cancel)
|
||||
.and_then(|x| x)
|
||||
.map_err(|e| match e {
|
||||
AzureOrTimeout::AzureError(err) => anyhow::Error::from(err),
|
||||
AzureOrTimeout::Timeout => TimeoutOrCancel::Timeout.into(),
|
||||
AzureOrTimeout::Cancel => TimeoutOrCancel::Cancel.into(),
|
||||
})?;
|
||||
return Err(e.into());
|
||||
}
|
||||
Err(_elapsed) => return Err(TimeoutOrCancel::Timeout.into()),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
let res = tokio::select! {
|
||||
tokio::select! {
|
||||
res = op => res,
|
||||
_ = cancel.cancelled() => return Err(TimeoutOrCancel::Cancel.into()),
|
||||
};
|
||||
|
||||
let started_at = ScopeGuard::into_inner(started_at);
|
||||
crate::metrics::BUCKET_METRICS
|
||||
.req_seconds
|
||||
.observe_elapsed(kind, &res, started_at);
|
||||
res
|
||||
_ = cancel.cancelled() => Err(TimeoutOrCancel::Cancel.into()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn copy(
|
||||
@@ -526,9 +459,7 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
to: &RemotePath,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let kind = RequestKind::Copy;
|
||||
let _permit = self.permit(kind, cancel).await?;
|
||||
let started_at = start_measuring_requests(kind);
|
||||
let _permit = self.permit(RequestKind::Copy, cancel).await?;
|
||||
|
||||
let timeout = tokio::time::sleep(self.timeout);
|
||||
|
||||
@@ -572,21 +503,15 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
}
|
||||
};
|
||||
|
||||
let res = tokio::select! {
|
||||
tokio::select! {
|
||||
res = op => res,
|
||||
_ = cancel.cancelled() => return Err(anyhow::Error::new(TimeoutOrCancel::Cancel)),
|
||||
_ = cancel.cancelled() => Err(anyhow::Error::new(TimeoutOrCancel::Cancel)),
|
||||
_ = timeout => {
|
||||
let e = anyhow::Error::new(TimeoutOrCancel::Timeout);
|
||||
let e = e.context(format!("Timeout, last status: {copy_status:?}"));
|
||||
Err(e)
|
||||
},
|
||||
};
|
||||
|
||||
let started_at = ScopeGuard::into_inner(started_at);
|
||||
crate::metrics::BUCKET_METRICS
|
||||
.req_seconds
|
||||
.observe_elapsed(kind, &res, started_at);
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
async fn time_travel_recover(
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
mod azure_blob;
|
||||
mod error;
|
||||
mod local_fs;
|
||||
mod metrics;
|
||||
mod s3_bucket;
|
||||
mod simulate_failures;
|
||||
mod support;
|
||||
@@ -122,8 +121,8 @@ impl RemotePath {
|
||||
self.0.file_name()
|
||||
}
|
||||
|
||||
pub fn join(&self, path: impl AsRef<Utf8Path>) -> Self {
|
||||
Self(self.0.join(path))
|
||||
pub fn join(&self, segment: &Utf8Path) -> Self {
|
||||
Self(self.0.join(segment))
|
||||
}
|
||||
|
||||
pub fn get_path(&self) -> &Utf8PathBuf {
|
||||
|
||||
@@ -46,16 +46,15 @@ use utils::backoff;
|
||||
|
||||
use super::StorageMetadata;
|
||||
use crate::{
|
||||
error::Cancelled,
|
||||
metrics::{start_counting_cancelled_wait, start_measuring_requests},
|
||||
support::PermitCarrying,
|
||||
ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, RemotePath, RemoteStorage,
|
||||
S3Config, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
|
||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
error::Cancelled, support::PermitCarrying, ConcurrencyLimiter, Download, DownloadError,
|
||||
Listing, ListingMode, RemotePath, RemoteStorage, S3Config, TimeTravelError, TimeoutOrCancel,
|
||||
MAX_KEYS_PER_DELETE, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
};
|
||||
|
||||
use crate::metrics::AttemptOutcome;
|
||||
pub(super) use crate::metrics::RequestKind;
|
||||
pub(super) mod metrics;
|
||||
|
||||
use self::metrics::AttemptOutcome;
|
||||
pub(super) use self::metrics::RequestKind;
|
||||
|
||||
/// AWS S3 storage.
|
||||
pub struct S3Bucket {
|
||||
@@ -228,7 +227,7 @@ impl S3Bucket {
|
||||
};
|
||||
|
||||
let started_at = ScopeGuard::into_inner(started_at);
|
||||
crate::metrics::BUCKET_METRICS
|
||||
metrics::BUCKET_METRICS
|
||||
.wait_seconds
|
||||
.observe_elapsed(kind, started_at);
|
||||
|
||||
@@ -249,7 +248,7 @@ impl S3Bucket {
|
||||
};
|
||||
|
||||
let started_at = ScopeGuard::into_inner(started_at);
|
||||
crate::metrics::BUCKET_METRICS
|
||||
metrics::BUCKET_METRICS
|
||||
.wait_seconds
|
||||
.observe_elapsed(kind, started_at);
|
||||
Ok(permit)
|
||||
@@ -288,7 +287,7 @@ impl S3Bucket {
|
||||
// Count this in the AttemptOutcome::Ok bucket, because 404 is not
|
||||
// an error: we expect to sometimes fetch an object and find it missing,
|
||||
// e.g. when probing for timeline indices.
|
||||
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
|
||||
metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
|
||||
kind,
|
||||
AttemptOutcome::Ok,
|
||||
started_at,
|
||||
@@ -296,7 +295,7 @@ impl S3Bucket {
|
||||
return Err(DownloadError::NotFound);
|
||||
}
|
||||
Err(e) => {
|
||||
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
|
||||
metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
|
||||
kind,
|
||||
AttemptOutcome::Err,
|
||||
started_at,
|
||||
@@ -372,12 +371,12 @@ impl S3Bucket {
|
||||
};
|
||||
|
||||
let started_at = ScopeGuard::into_inner(started_at);
|
||||
crate::metrics::BUCKET_METRICS
|
||||
metrics::BUCKET_METRICS
|
||||
.req_seconds
|
||||
.observe_elapsed(kind, &resp, started_at);
|
||||
|
||||
let resp = resp.context("request deletion")?;
|
||||
crate::metrics::BUCKET_METRICS
|
||||
metrics::BUCKET_METRICS
|
||||
.deleted_objects_total
|
||||
.inc_by(chunk.len() as u64);
|
||||
|
||||
@@ -436,14 +435,14 @@ pin_project_lite::pin_project! {
|
||||
/// Times and tracks the outcome of the request.
|
||||
struct TimedDownload<S> {
|
||||
started_at: std::time::Instant,
|
||||
outcome: AttemptOutcome,
|
||||
outcome: metrics::AttemptOutcome,
|
||||
#[pin]
|
||||
inner: S
|
||||
}
|
||||
|
||||
impl<S> PinnedDrop for TimedDownload<S> {
|
||||
fn drop(mut this: Pin<&mut Self>) {
|
||||
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(RequestKind::Get, this.outcome, this.started_at);
|
||||
metrics::BUCKET_METRICS.req_seconds.observe_elapsed(RequestKind::Get, this.outcome, this.started_at);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -452,7 +451,7 @@ impl<S> TimedDownload<S> {
|
||||
fn new(started_at: std::time::Instant, inner: S) -> Self {
|
||||
TimedDownload {
|
||||
started_at,
|
||||
outcome: AttemptOutcome::Cancelled,
|
||||
outcome: metrics::AttemptOutcome::Cancelled,
|
||||
inner,
|
||||
}
|
||||
}
|
||||
@@ -469,8 +468,8 @@ impl<S: Stream<Item = std::io::Result<Bytes>>> Stream for TimedDownload<S> {
|
||||
let res = ready!(this.inner.poll_next(cx));
|
||||
match &res {
|
||||
Some(Ok(_)) => {}
|
||||
Some(Err(_)) => *this.outcome = AttemptOutcome::Err,
|
||||
None => *this.outcome = AttemptOutcome::Ok,
|
||||
Some(Err(_)) => *this.outcome = metrics::AttemptOutcome::Err,
|
||||
None => *this.outcome = metrics::AttemptOutcome::Ok,
|
||||
}
|
||||
|
||||
Poll::Ready(res)
|
||||
@@ -544,7 +543,7 @@ impl RemoteStorage for S3Bucket {
|
||||
|
||||
let started_at = ScopeGuard::into_inner(started_at);
|
||||
|
||||
crate::metrics::BUCKET_METRICS
|
||||
metrics::BUCKET_METRICS
|
||||
.req_seconds
|
||||
.observe_elapsed(kind, &response, started_at);
|
||||
|
||||
@@ -626,7 +625,7 @@ impl RemoteStorage for S3Bucket {
|
||||
if let Ok(inner) = &res {
|
||||
// do not incl. timeouts as errors in metrics but cancellations
|
||||
let started_at = ScopeGuard::into_inner(started_at);
|
||||
crate::metrics::BUCKET_METRICS
|
||||
metrics::BUCKET_METRICS
|
||||
.req_seconds
|
||||
.observe_elapsed(kind, inner, started_at);
|
||||
}
|
||||
@@ -674,7 +673,7 @@ impl RemoteStorage for S3Bucket {
|
||||
};
|
||||
|
||||
let started_at = ScopeGuard::into_inner(started_at);
|
||||
crate::metrics::BUCKET_METRICS
|
||||
metrics::BUCKET_METRICS
|
||||
.req_seconds
|
||||
.observe_elapsed(kind, &res, started_at);
|
||||
|
||||
@@ -978,6 +977,28 @@ impl RemoteStorage for S3Bucket {
|
||||
}
|
||||
}
|
||||
|
||||
/// On drop (cancellation) count towards [`metrics::BucketMetrics::cancelled_waits`].
|
||||
fn start_counting_cancelled_wait(
|
||||
kind: RequestKind,
|
||||
) -> ScopeGuard<std::time::Instant, impl FnOnce(std::time::Instant), scopeguard::OnSuccess> {
|
||||
scopeguard::guard_on_success(std::time::Instant::now(), move |_| {
|
||||
metrics::BUCKET_METRICS.cancelled_waits.get(kind).inc()
|
||||
})
|
||||
}
|
||||
|
||||
/// On drop (cancellation) add time to [`metrics::BucketMetrics::req_seconds`].
|
||||
fn start_measuring_requests(
|
||||
kind: RequestKind,
|
||||
) -> ScopeGuard<std::time::Instant, impl FnOnce(std::time::Instant), scopeguard::OnSuccess> {
|
||||
scopeguard::guard_on_success(std::time::Instant::now(), move |started_at| {
|
||||
metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
|
||||
kind,
|
||||
AttemptOutcome::Cancelled,
|
||||
started_at,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// Save RAM and only store the needed data instead of the entire ObjectVersion/DeleteMarkerEntry
|
||||
struct VerOrDelete {
|
||||
kind: VerOrDeleteKind,
|
||||
|
||||
@@ -15,7 +15,6 @@ pub(crate) enum RequestKind {
|
||||
TimeTravel = 5,
|
||||
}
|
||||
|
||||
use scopeguard::ScopeGuard;
|
||||
use RequestKind::*;
|
||||
|
||||
impl RequestKind {
|
||||
@@ -34,10 +33,10 @@ impl RequestKind {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct RequestTyped<C>([C; 6]);
|
||||
pub(super) struct RequestTyped<C>([C; 6]);
|
||||
|
||||
impl<C> RequestTyped<C> {
|
||||
pub(crate) fn get(&self, kind: RequestKind) -> &C {
|
||||
pub(super) fn get(&self, kind: RequestKind) -> &C {
|
||||
&self.0[kind.as_index()]
|
||||
}
|
||||
|
||||
@@ -59,19 +58,19 @@ impl<C> RequestTyped<C> {
|
||||
}
|
||||
|
||||
impl RequestTyped<Histogram> {
|
||||
pub(crate) fn observe_elapsed(&self, kind: RequestKind, started_at: std::time::Instant) {
|
||||
pub(super) fn observe_elapsed(&self, kind: RequestKind, started_at: std::time::Instant) {
|
||||
self.get(kind).observe(started_at.elapsed().as_secs_f64())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct PassFailCancelledRequestTyped<C> {
|
||||
pub(super) struct PassFailCancelledRequestTyped<C> {
|
||||
success: RequestTyped<C>,
|
||||
fail: RequestTyped<C>,
|
||||
cancelled: RequestTyped<C>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub(crate) enum AttemptOutcome {
|
||||
pub(super) enum AttemptOutcome {
|
||||
Ok,
|
||||
Err,
|
||||
Cancelled,
|
||||
@@ -87,7 +86,7 @@ impl<T, E> From<&Result<T, E>> for AttemptOutcome {
|
||||
}
|
||||
|
||||
impl AttemptOutcome {
|
||||
pub(crate) fn as_str(&self) -> &'static str {
|
||||
pub(super) fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
AttemptOutcome::Ok => "ok",
|
||||
AttemptOutcome::Err => "err",
|
||||
@@ -97,7 +96,7 @@ impl AttemptOutcome {
|
||||
}
|
||||
|
||||
impl<C> PassFailCancelledRequestTyped<C> {
|
||||
pub(crate) fn get(&self, kind: RequestKind, outcome: AttemptOutcome) -> &C {
|
||||
pub(super) fn get(&self, kind: RequestKind, outcome: AttemptOutcome) -> &C {
|
||||
let target = match outcome {
|
||||
AttemptOutcome::Ok => &self.success,
|
||||
AttemptOutcome::Err => &self.fail,
|
||||
@@ -120,7 +119,7 @@ impl<C> PassFailCancelledRequestTyped<C> {
|
||||
}
|
||||
|
||||
impl PassFailCancelledRequestTyped<Histogram> {
|
||||
pub(crate) fn observe_elapsed(
|
||||
pub(super) fn observe_elapsed(
|
||||
&self,
|
||||
kind: RequestKind,
|
||||
outcome: impl Into<AttemptOutcome>,
|
||||
@@ -131,44 +130,19 @@ impl PassFailCancelledRequestTyped<Histogram> {
|
||||
}
|
||||
}
|
||||
|
||||
/// On drop (cancellation) count towards [`BucketMetrics::cancelled_waits`].
|
||||
pub(crate) fn start_counting_cancelled_wait(
|
||||
kind: RequestKind,
|
||||
) -> ScopeGuard<std::time::Instant, impl FnOnce(std::time::Instant), scopeguard::OnSuccess> {
|
||||
scopeguard::guard_on_success(std::time::Instant::now(), move |_| {
|
||||
crate::metrics::BUCKET_METRICS
|
||||
.cancelled_waits
|
||||
.get(kind)
|
||||
.inc()
|
||||
})
|
||||
}
|
||||
|
||||
/// On drop (cancellation) add time to [`BucketMetrics::req_seconds`].
|
||||
pub(crate) fn start_measuring_requests(
|
||||
kind: RequestKind,
|
||||
) -> ScopeGuard<std::time::Instant, impl FnOnce(std::time::Instant), scopeguard::OnSuccess> {
|
||||
scopeguard::guard_on_success(std::time::Instant::now(), move |started_at| {
|
||||
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
|
||||
kind,
|
||||
AttemptOutcome::Cancelled,
|
||||
started_at,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) struct BucketMetrics {
|
||||
pub(super) struct BucketMetrics {
|
||||
/// Full request duration until successful completion, error or cancellation.
|
||||
pub(crate) req_seconds: PassFailCancelledRequestTyped<Histogram>,
|
||||
pub(super) req_seconds: PassFailCancelledRequestTyped<Histogram>,
|
||||
/// Total amount of seconds waited on queue.
|
||||
pub(crate) wait_seconds: RequestTyped<Histogram>,
|
||||
pub(super) wait_seconds: RequestTyped<Histogram>,
|
||||
|
||||
/// Track how many semaphore awaits were cancelled per request type.
|
||||
///
|
||||
/// This is in case cancellations are happening more than expected.
|
||||
pub(crate) cancelled_waits: RequestTyped<IntCounter>,
|
||||
pub(super) cancelled_waits: RequestTyped<IntCounter>,
|
||||
|
||||
/// Total amount of deleted objects in batches or single requests.
|
||||
pub(crate) deleted_objects_total: IntCounter,
|
||||
pub(super) deleted_objects_total: IntCounter,
|
||||
}
|
||||
|
||||
impl Default for BucketMetrics {
|
||||
@@ -78,10 +78,6 @@ where
|
||||
let e = Err(std::io::Error::from(e));
|
||||
return Poll::Ready(Some(e));
|
||||
}
|
||||
} else {
|
||||
// this would be perfectly valid behaviour for doing a graceful completion on the
|
||||
// download for example, but not one we expect to do right now.
|
||||
tracing::warn!("continuing polling after having cancelled or timeouted");
|
||||
}
|
||||
|
||||
this.inner.poll_next(cx)
|
||||
@@ -93,22 +89,13 @@ where
|
||||
}
|
||||
|
||||
/// Fires only on the first cancel or timeout, not on both.
|
||||
pub(crate) fn cancel_or_timeout(
|
||||
pub(crate) async fn cancel_or_timeout(
|
||||
timeout: Duration,
|
||||
cancel: CancellationToken,
|
||||
) -> impl std::future::Future<Output = TimeoutOrCancel> + 'static {
|
||||
// futures are lazy, they don't do anything before being polled.
|
||||
//
|
||||
// "precalculate" the wanted deadline before returning the future, so that we can use pause
|
||||
// failpoint to trigger a timeout in test.
|
||||
let deadline = tokio::time::Instant::now() + timeout;
|
||||
async move {
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep_until(deadline) => TimeoutOrCancel::Timeout,
|
||||
_ = cancel.cancelled() => {
|
||||
TimeoutOrCancel::Cancel
|
||||
},
|
||||
}
|
||||
) -> TimeoutOrCancel {
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(timeout) => TimeoutOrCancel::Timeout,
|
||||
_ = cancel.cancelled() => TimeoutOrCancel::Cancel,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -185,31 +172,4 @@ mod tests {
|
||||
_ = tokio::time::sleep(Duration::from_secs(121)) => {},
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn notified_but_pollable_after() {
|
||||
let inner = futures::stream::once(futures::future::ready(Ok(bytes::Bytes::from_static(
|
||||
b"hello world",
|
||||
))));
|
||||
let timeout = Duration::from_secs(120);
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
cancel.cancel();
|
||||
let stream = DownloadStream::new(cancel_or_timeout(timeout, cancel.clone()), inner);
|
||||
let mut stream = std::pin::pin!(stream);
|
||||
|
||||
let next = stream.next().await;
|
||||
let ioe = next.unwrap().unwrap_err();
|
||||
assert!(
|
||||
matches!(
|
||||
ioe.get_ref().unwrap().downcast_ref::<DownloadError>(),
|
||||
Some(&DownloadError::Cancelled)
|
||||
),
|
||||
"{ioe:?}"
|
||||
);
|
||||
|
||||
let next = stream.next().await;
|
||||
let bytes = next.unwrap().unwrap();
|
||||
assert_eq!(&b"hello world"[..], bytes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
hyper.workspace = true
|
||||
opentelemetry = { workspace = true, features=["rt-tokio"] }
|
||||
opentelemetry-otlp = { workspace = true, default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-otlp = { workspace = true, default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions.workspace = true
|
||||
reqwest = { workspace = true, default-features = false, features = ["rustls-tls"] }
|
||||
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
|
||||
@@ -3,9 +3,6 @@ use std::{fs, io, path::Path};
|
||||
|
||||
use anyhow::Context;
|
||||
|
||||
mod rename_noreplace;
|
||||
pub use rename_noreplace::rename_noreplace;
|
||||
|
||||
pub trait PathExt {
|
||||
/// Returns an error if `self` is not a directory.
|
||||
fn is_empty_dir(&self) -> io::Result<bool>;
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
use nix::NixPath;
|
||||
|
||||
/// Rename a file without replacing an existing file.
|
||||
///
|
||||
/// This is a wrapper around platform-specific APIs.
|
||||
pub fn rename_noreplace<P1: ?Sized + NixPath, P2: ?Sized + NixPath>(
|
||||
src: &P1,
|
||||
dst: &P2,
|
||||
) -> nix::Result<()> {
|
||||
{
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
nix::fcntl::renameat2(
|
||||
None,
|
||||
src,
|
||||
None,
|
||||
dst,
|
||||
nix::fcntl::RenameFlags::RENAME_NOREPLACE,
|
||||
)
|
||||
}
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
let res = src.with_nix_path(|src| {
|
||||
dst.with_nix_path(|dst|
|
||||
// SAFETY: `src` and `dst` are valid C strings as per the NixPath trait and they outlive the call to renamex_np.
|
||||
unsafe {
|
||||
nix::libc::renamex_np(src.as_ptr(), dst.as_ptr(), nix::libc::RENAME_EXCL)
|
||||
})
|
||||
})??;
|
||||
nix::errno::Errno::result(res).map(drop)
|
||||
}
|
||||
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
|
||||
{
|
||||
std::compile_error!("OS does not support no-replace renames");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::{fs, path::PathBuf};
|
||||
|
||||
use super::*;
|
||||
|
||||
fn testdir() -> camino_tempfile::Utf8TempDir {
|
||||
match crate::env::var("NEON_UTILS_RENAME_NOREPLACE_TESTDIR") {
|
||||
Some(path) => {
|
||||
let path: camino::Utf8PathBuf = path;
|
||||
camino_tempfile::tempdir_in(path).unwrap()
|
||||
}
|
||||
None => camino_tempfile::tempdir().unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_absolute_paths() {
|
||||
let testdir = testdir();
|
||||
println!("testdir: {}", testdir.path());
|
||||
|
||||
let src = testdir.path().join("src");
|
||||
let dst = testdir.path().join("dst");
|
||||
|
||||
fs::write(&src, b"").unwrap();
|
||||
fs::write(&dst, b"").unwrap();
|
||||
|
||||
let src = src.canonicalize().unwrap();
|
||||
assert!(src.is_absolute());
|
||||
let dst = dst.canonicalize().unwrap();
|
||||
assert!(dst.is_absolute());
|
||||
|
||||
let result = rename_noreplace(&src, &dst);
|
||||
assert_eq!(result.unwrap_err(), nix::Error::EEXIST);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_relative_paths() {
|
||||
let testdir = testdir();
|
||||
println!("testdir: {}", testdir.path());
|
||||
|
||||
// this is fine because we run in nextest => process per test
|
||||
std::env::set_current_dir(testdir.path()).unwrap();
|
||||
|
||||
let src = PathBuf::from("src");
|
||||
let dst = PathBuf::from("dst");
|
||||
|
||||
fs::write(&src, b"").unwrap();
|
||||
fs::write(&dst, b"").unwrap();
|
||||
|
||||
let result = rename_noreplace(&src, &dst);
|
||||
assert_eq!(result.unwrap_err(), nix::Error::EEXIST);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_works_when_not_exists() {
|
||||
let testdir = testdir();
|
||||
println!("testdir: {}", testdir.path());
|
||||
|
||||
let src = testdir.path().join("src");
|
||||
let dst = testdir.path().join("dst");
|
||||
|
||||
fs::write(&src, b"content").unwrap();
|
||||
|
||||
rename_noreplace(src.as_std_path(), dst.as_std_path()).unwrap();
|
||||
assert_eq!(
|
||||
"content",
|
||||
String::from_utf8(std::fs::read(&dst).unwrap()).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -19,13 +19,13 @@
|
||||
/// // right: [0x68; 1]
|
||||
/// # fn serialize_something() -> Vec<u8> { "hello world".as_bytes().to_vec() }
|
||||
/// ```
|
||||
pub struct Hex<S>(pub S);
|
||||
#[derive(PartialEq)]
|
||||
pub struct Hex<'a>(pub &'a [u8]);
|
||||
|
||||
impl<S: AsRef<[u8]>> std::fmt::Debug for Hex<S> {
|
||||
impl std::fmt::Debug for Hex<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "[")?;
|
||||
let chunks = self.0.as_ref().chunks(16);
|
||||
for (i, c) in chunks.enumerate() {
|
||||
for (i, c) in self.0.chunks(16).enumerate() {
|
||||
if i > 0 && !c.is_empty() {
|
||||
writeln!(f, ", ")?;
|
||||
}
|
||||
@@ -36,15 +36,6 @@ impl<S: AsRef<[u8]>> std::fmt::Debug for Hex<S> {
|
||||
write!(f, "0x{b:02x}")?;
|
||||
}
|
||||
}
|
||||
write!(f, "; {}]", self.0.as_ref().len())
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: AsRef<[u8]>, L: AsRef<[u8]>> PartialEq<Hex<R>> for Hex<L> {
|
||||
fn eq(&self, other: &Hex<R>) -> bool {
|
||||
let left = self.0.as_ref();
|
||||
let right = other.0.as_ref();
|
||||
|
||||
left == right
|
||||
write!(f, "; {}]", self.0.len())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,9 +34,6 @@ pub enum ApiError {
|
||||
#[error("Timeout")]
|
||||
Timeout(Cow<'static, str>),
|
||||
|
||||
#[error("Request cancelled")]
|
||||
Cancelled,
|
||||
|
||||
#[error(transparent)]
|
||||
InternalServerError(anyhow::Error),
|
||||
}
|
||||
@@ -77,10 +74,6 @@ impl ApiError {
|
||||
err.to_string(),
|
||||
StatusCode::REQUEST_TIMEOUT,
|
||||
),
|
||||
ApiError::Cancelled => HttpErrorBody::response_from_msg_and_status(
|
||||
self.to_string(),
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
),
|
||||
ApiError::InternalServerError(err) => HttpErrorBody::response_from_msg_and_status(
|
||||
err.to_string(),
|
||||
StatusCode::INTERNAL_SERVER_ERROR,
|
||||
@@ -140,7 +133,6 @@ pub fn api_error_handler(api_error: ApiError) -> Response<Body> {
|
||||
ApiError::InternalServerError(_) => error!("Error processing HTTP request: {api_error:?}"),
|
||||
ApiError::ShuttingDown => info!("Shut down while processing HTTP request"),
|
||||
ApiError::Timeout(_) => info!("Timeout while processing HTTP request: {api_error:#}"),
|
||||
ApiError::Cancelled => info!("Request cancelled while processing HTTP request"),
|
||||
_ => info!("Error processing HTTP request: {api_error:#}"),
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,6 @@ pageserver = { path = ".." }
|
||||
pageserver_api.workspace = true
|
||||
remote_storage = { path = "../../libs/remote_storage" }
|
||||
postgres_ffi.workspace = true
|
||||
thiserror.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
toml_edit.workspace = true
|
||||
|
||||
@@ -1,6 +1,11 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver::tenant::IndexPart;
|
||||
use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||
use pageserver::tenant::storage_layer::LayerName;
|
||||
use pageserver::tenant::{metadata::TimelineMetadata, IndexPart};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
#[derive(clap::Subcommand)]
|
||||
pub(crate) enum IndexPartCmd {
|
||||
@@ -12,7 +17,20 @@ pub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> {
|
||||
IndexPartCmd::Dump { path } => {
|
||||
let bytes = tokio::fs::read(path).await.context("read file")?;
|
||||
let des: IndexPart = IndexPart::from_s3_bytes(&bytes).context("deserialize")?;
|
||||
let output = serde_json::to_string_pretty(&des).context("serialize output")?;
|
||||
#[derive(serde::Serialize)]
|
||||
struct Output<'a> {
|
||||
layer_metadata: &'a HashMap<LayerName, LayerFileMetadata>,
|
||||
disk_consistent_lsn: Lsn,
|
||||
timeline_metadata: &'a TimelineMetadata,
|
||||
}
|
||||
|
||||
let output = Output {
|
||||
layer_metadata: &des.layer_metadata,
|
||||
disk_consistent_lsn: des.get_disk_consistent_lsn(),
|
||||
timeline_metadata: &des.metadata,
|
||||
};
|
||||
|
||||
let output = serde_json::to_string_pretty(&output).context("serialize output")?;
|
||||
println!("{output}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,475 +0,0 @@
|
||||
use anyhow::Context;
|
||||
use clap::Parser;
|
||||
use pageserver_api::{
|
||||
key::Key,
|
||||
reltag::{BlockNumber, RelTag, SlruKind},
|
||||
shard::{ShardCount, ShardStripeSize},
|
||||
};
|
||||
use std::str::FromStr;
|
||||
|
||||
#[derive(Parser)]
|
||||
pub(super) struct DescribeKeyCommand {
|
||||
/// Key material in one of the forms: hex, span attributes captured from log, reltag blocknum
|
||||
input: Vec<String>,
|
||||
|
||||
/// The number of shards to calculate what Keys placement would be.
|
||||
#[arg(long)]
|
||||
shard_count: Option<CustomShardCount>,
|
||||
|
||||
/// The sharding stripe size.
|
||||
///
|
||||
/// The default is hardcoded. It makes no sense to provide this without providing
|
||||
/// `--shard-count`.
|
||||
#[arg(long, requires = "shard_count")]
|
||||
stripe_size: Option<u32>,
|
||||
}
|
||||
|
||||
/// Sharded shard count without unsharded count, which the actual ShardCount supports.
|
||||
#[derive(Clone, Copy)]
|
||||
pub(super) struct CustomShardCount(std::num::NonZeroU8);
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(super) enum InvalidShardCount {
|
||||
#[error(transparent)]
|
||||
ParsingFailed(#[from] std::num::ParseIntError),
|
||||
#[error("too few shards")]
|
||||
TooFewShards,
|
||||
}
|
||||
|
||||
impl FromStr for CustomShardCount {
|
||||
type Err = InvalidShardCount;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
let inner: std::num::NonZeroU8 = s.parse()?;
|
||||
if inner.get() < 2 {
|
||||
Err(InvalidShardCount::TooFewShards)
|
||||
} else {
|
||||
Ok(CustomShardCount(inner))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CustomShardCount> for ShardCount {
|
||||
fn from(value: CustomShardCount) -> Self {
|
||||
ShardCount::new(value.0.get())
|
||||
}
|
||||
}
|
||||
|
||||
impl DescribeKeyCommand {
|
||||
pub(super) fn execute(self) {
|
||||
let DescribeKeyCommand {
|
||||
input,
|
||||
shard_count,
|
||||
stripe_size,
|
||||
} = self;
|
||||
|
||||
let material = KeyMaterial::try_from(input.as_slice()).unwrap();
|
||||
let kind = material.kind();
|
||||
let key = Key::from(material);
|
||||
|
||||
println!("parsed from {kind}: {key}:");
|
||||
println!();
|
||||
println!("{key:?}");
|
||||
|
||||
macro_rules! kind_query {
|
||||
([$($name:ident),*$(,)?]) => {{[$(kind_query!($name)),*]}};
|
||||
($name:ident) => {{
|
||||
let s: &'static str = stringify!($name);
|
||||
let s = s.strip_prefix("is_").unwrap_or(s);
|
||||
let s = s.strip_suffix("_key").unwrap_or(s);
|
||||
|
||||
#[allow(clippy::needless_borrow)]
|
||||
(s, key.$name())
|
||||
}};
|
||||
}
|
||||
|
||||
// the current characterization is a mess of these boolean queries and separate
|
||||
// "recognization". I think it accurately represents how strictly we model the Key
|
||||
// right now, but could of course be made less confusing.
|
||||
|
||||
let queries = kind_query!([
|
||||
is_rel_block_key,
|
||||
is_rel_vm_block_key,
|
||||
is_rel_fsm_block_key,
|
||||
is_slru_block_key,
|
||||
is_inherited_key,
|
||||
is_rel_size_key,
|
||||
is_slru_segment_size_key,
|
||||
]);
|
||||
|
||||
let recognized_kind = "recognized kind";
|
||||
let metadata_key = "metadata key";
|
||||
let shard_placement = "shard placement";
|
||||
|
||||
let longest = queries
|
||||
.iter()
|
||||
.map(|t| t.0)
|
||||
.chain([recognized_kind, metadata_key, shard_placement])
|
||||
.map(|s| s.len())
|
||||
.max()
|
||||
.unwrap();
|
||||
|
||||
let colon = 1;
|
||||
let padding = 1;
|
||||
|
||||
for (name, is) in queries {
|
||||
let width = longest - name.len() + colon + padding;
|
||||
println!("{}{:width$}{}", name, ":", is);
|
||||
}
|
||||
|
||||
let width = longest - recognized_kind.len() + colon + padding;
|
||||
println!(
|
||||
"{}{:width$}{:?}",
|
||||
recognized_kind,
|
||||
":",
|
||||
RecognizedKeyKind::new(key),
|
||||
);
|
||||
|
||||
if let Some(shard_count) = shard_count {
|
||||
// seeing the sharding placement might be confusing, so leave it out unless shard
|
||||
// count was given.
|
||||
|
||||
let stripe_size = stripe_size.map(ShardStripeSize).unwrap_or_default();
|
||||
println!(
|
||||
"# placement with shard_count: {} and stripe_size: {}:",
|
||||
shard_count.0, stripe_size.0
|
||||
);
|
||||
let width = longest - shard_placement.len() + colon + padding;
|
||||
println!(
|
||||
"{}{:width$}{:?}",
|
||||
shard_placement,
|
||||
":",
|
||||
pageserver_api::shard::describe(&key, shard_count.into(), stripe_size)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Hand-wavy "inputs we accept" for a key.
|
||||
#[derive(Debug)]
|
||||
pub(super) enum KeyMaterial {
|
||||
Hex(Key),
|
||||
String(SpanAttributesFromLogs),
|
||||
Split(RelTag, BlockNumber),
|
||||
}
|
||||
|
||||
impl KeyMaterial {
|
||||
fn kind(&self) -> &'static str {
|
||||
match self {
|
||||
KeyMaterial::Hex(_) => "hex",
|
||||
KeyMaterial::String(_) | KeyMaterial::Split(_, _) => "split",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<KeyMaterial> for Key {
|
||||
fn from(value: KeyMaterial) -> Self {
|
||||
match value {
|
||||
KeyMaterial::Hex(key) => key,
|
||||
KeyMaterial::String(SpanAttributesFromLogs(rt, blocknum))
|
||||
| KeyMaterial::Split(rt, blocknum) => {
|
||||
pageserver_api::key::rel_block_to_key(rt, blocknum)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: AsRef<str>> TryFrom<&[S]> for KeyMaterial {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(value: &[S]) -> Result<Self, Self::Error> {
|
||||
match value {
|
||||
[] => anyhow::bail!(
|
||||
"need 1..N positional arguments describing the key, try hex or a log line"
|
||||
),
|
||||
[one] => {
|
||||
let one = one.as_ref();
|
||||
|
||||
let key = Key::from_hex(one).map(KeyMaterial::Hex);
|
||||
|
||||
let attrs = SpanAttributesFromLogs::from_str(one).map(KeyMaterial::String);
|
||||
|
||||
match (key, attrs) {
|
||||
(Ok(key), _) => Ok(key),
|
||||
(_, Ok(s)) => Ok(s),
|
||||
(Err(e1), Err(e2)) => anyhow::bail!(
|
||||
"failed to parse {one:?} as hex or span attributes:\n- {e1:#}\n- {e2:#}"
|
||||
),
|
||||
}
|
||||
}
|
||||
more => {
|
||||
// assume going left to right one of these is a reltag and then we find a blocknum
|
||||
// this works, because we don't have plain numbers at least right after reltag in
|
||||
// logs. for some definition of "works".
|
||||
|
||||
let Some((reltag_at, reltag)) = more
|
||||
.iter()
|
||||
.map(AsRef::as_ref)
|
||||
.enumerate()
|
||||
.find_map(|(i, s)| {
|
||||
s.split_once("rel=")
|
||||
.map(|(_garbage, actual)| actual)
|
||||
.unwrap_or(s)
|
||||
.parse::<RelTag>()
|
||||
.ok()
|
||||
.map(|rt| (i, rt))
|
||||
})
|
||||
else {
|
||||
anyhow::bail!("found no RelTag in arguments");
|
||||
};
|
||||
|
||||
let Some(blocknum) = more
|
||||
.iter()
|
||||
.map(AsRef::as_ref)
|
||||
.skip(reltag_at)
|
||||
.find_map(|s| {
|
||||
s.split_once("blkno=")
|
||||
.map(|(_garbage, actual)| actual)
|
||||
.unwrap_or(s)
|
||||
.parse::<BlockNumber>()
|
||||
.ok()
|
||||
})
|
||||
else {
|
||||
anyhow::bail!("found no blocknum in arguments");
|
||||
};
|
||||
|
||||
Ok(KeyMaterial::Split(reltag, blocknum))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct SpanAttributesFromLogs(RelTag, BlockNumber);
|
||||
|
||||
impl std::str::FromStr for SpanAttributesFromLogs {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
// accept the span separator but do not require or fail if either is missing
|
||||
// "whatever{rel=1663/16389/24615 blkno=1052204 req_lsn=FFFFFFFF/FFFFFFFF}"
|
||||
let (_, reltag) = s
|
||||
.split_once("rel=")
|
||||
.ok_or_else(|| anyhow::anyhow!("cannot find 'rel='"))?;
|
||||
let reltag = reltag.split_whitespace().next().unwrap();
|
||||
|
||||
let (_, blocknum) = s
|
||||
.split_once("blkno=")
|
||||
.ok_or_else(|| anyhow::anyhow!("cannot find 'blkno='"))?;
|
||||
let blocknum = blocknum.split_whitespace().next().unwrap();
|
||||
|
||||
let reltag = reltag
|
||||
.parse()
|
||||
.with_context(|| format!("parse reltag from {reltag:?}"))?;
|
||||
let blocknum = blocknum
|
||||
.parse()
|
||||
.with_context(|| format!("parse blocknum from {blocknum:?}"))?;
|
||||
|
||||
Ok(Self(reltag, blocknum))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)] // debug print is used
|
||||
enum RecognizedKeyKind {
|
||||
DbDir,
|
||||
ControlFile,
|
||||
Checkpoint,
|
||||
AuxFilesV1,
|
||||
SlruDir(Result<SlruKind, u32>),
|
||||
RelMap(RelTagish<2>),
|
||||
RelDir(RelTagish<2>),
|
||||
AuxFileV2(Result<AuxFileV2, utils::Hex<[u8; 16]>>),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[allow(unused)]
|
||||
enum AuxFileV2 {
|
||||
Recognized(&'static str, utils::Hex<[u8; 13]>),
|
||||
OtherWithPrefix(&'static str, utils::Hex<[u8; 13]>),
|
||||
Other(utils::Hex<[u8; 13]>),
|
||||
}
|
||||
|
||||
impl RecognizedKeyKind {
|
||||
fn new(key: Key) -> Option<Self> {
|
||||
use RecognizedKeyKind::{
|
||||
AuxFilesV1, Checkpoint, ControlFile, DbDir, RelDir, RelMap, SlruDir,
|
||||
};
|
||||
|
||||
let slru_dir_kind = pageserver_api::key::slru_dir_kind(&key);
|
||||
|
||||
Some(match key {
|
||||
pageserver_api::key::DBDIR_KEY => DbDir,
|
||||
pageserver_api::key::CONTROLFILE_KEY => ControlFile,
|
||||
pageserver_api::key::CHECKPOINT_KEY => Checkpoint,
|
||||
pageserver_api::key::AUX_FILES_KEY => AuxFilesV1,
|
||||
_ if slru_dir_kind.is_some() => SlruDir(slru_dir_kind.unwrap()),
|
||||
_ if key.field1 == 0 && key.field4 == 0 && key.field5 == 0 && key.field6 == 0 => {
|
||||
RelMap([key.field2, key.field3].into())
|
||||
}
|
||||
_ if key.field1 == 0 && key.field4 == 0 && key.field5 == 0 && key.field6 == 1 => {
|
||||
RelDir([key.field2, key.field3].into())
|
||||
}
|
||||
_ if key.is_metadata_key() => RecognizedKeyKind::AuxFileV2(
|
||||
AuxFileV2::new(key).ok_or_else(|| utils::Hex(key.to_i128().to_be_bytes())),
|
||||
),
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl AuxFileV2 {
|
||||
fn new(key: Key) -> Option<AuxFileV2> {
|
||||
const EMPTY_HASH: [u8; 13] = {
|
||||
let mut out = [0u8; 13];
|
||||
let hash = pageserver::aux_file::fnv_hash(b"").to_be_bytes();
|
||||
let mut i = 3;
|
||||
while i < 16 {
|
||||
out[i - 3] = hash[i];
|
||||
i += 1;
|
||||
}
|
||||
out
|
||||
};
|
||||
|
||||
let bytes = key.to_i128().to_be_bytes();
|
||||
let hash = utils::Hex(<[u8; 13]>::try_from(&bytes[3..]).unwrap());
|
||||
|
||||
assert_eq!(EMPTY_HASH.len(), hash.0.len());
|
||||
|
||||
// TODO: we could probably find the preimages for the hashes
|
||||
|
||||
Some(match (bytes[1], bytes[2]) {
|
||||
(1, 1) => AuxFileV2::Recognized("pg_logical/mappings/", hash),
|
||||
(1, 2) => AuxFileV2::Recognized("pg_logical/snapshots/", hash),
|
||||
(1, 3) if hash.0 == EMPTY_HASH => {
|
||||
AuxFileV2::Recognized("pg_logical/replorigin_checkpoint", hash)
|
||||
}
|
||||
(2, 1) => AuxFileV2::Recognized("pg_replslot/", hash),
|
||||
(1, 0xff) => AuxFileV2::OtherWithPrefix("pg_logical/", hash),
|
||||
(0xff, 0xff) => AuxFileV2::Other(hash),
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Prefix of RelTag, currently only known use cases are the two item versions.
|
||||
///
|
||||
/// Renders like a reltag with `/`, nothing else.
|
||||
struct RelTagish<const N: usize>([u32; N]);
|
||||
|
||||
impl<const N: usize> From<[u32; N]> for RelTagish<N> {
|
||||
fn from(val: [u32; N]) -> Self {
|
||||
RelTagish(val)
|
||||
}
|
||||
}
|
||||
|
||||
impl<const N: usize> std::fmt::Debug for RelTagish<N> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
use std::fmt::Write as _;
|
||||
let mut first = true;
|
||||
self.0.iter().try_for_each(|x| {
|
||||
if !first {
|
||||
f.write_char('/')?;
|
||||
}
|
||||
first = false;
|
||||
write!(f, "{}", x)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pageserver::aux_file::encode_aux_file_key;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn hex_is_key_material() {
|
||||
let m = KeyMaterial::try_from(&["000000067F0000400200DF927900FFFFFFFF"][..]).unwrap();
|
||||
assert!(matches!(m, KeyMaterial::Hex(_)), "{m:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_positional_spanalike_is_key_material() {
|
||||
// why is this needed? if you are checking many, then copypaste starts to appeal
|
||||
let strings = [
|
||||
(line!(), "2024-05-15T15:33:49.873906Z ERROR page_service_conn_main{peer_addr=A:B}:process_query{tenant_id=C timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm blkno=2 req_lsn=0/238D98C8}: error reading relation or page version: Read error: could not find data for key 000000067F00032CE5000000000000000001 (shard ShardNumber(0)) at LSN 0/1D0A16C1, request LSN 0/238D98C8, ancestor 0/0"),
|
||||
(line!(), "rel=1663/208101/2620_fsm blkno=2"),
|
||||
(line!(), "rel=1663/208101/2620.1 blkno=2"),
|
||||
];
|
||||
|
||||
let mut first: Option<Key> = None;
|
||||
|
||||
for (line, example) in strings {
|
||||
let m = KeyMaterial::try_from(&[example][..])
|
||||
.unwrap_or_else(|e| panic!("failed to parse example from line {line}: {e:?}"));
|
||||
let key = Key::from(m);
|
||||
if let Some(first) = first {
|
||||
assert_eq!(first, key);
|
||||
} else {
|
||||
first = Some(key);
|
||||
}
|
||||
}
|
||||
|
||||
// not supporting this is rather accidential, but I think the input parsing is lenient
|
||||
// enough already
|
||||
KeyMaterial::try_from(&["1663/208101/2620_fsm 2"][..]).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_spanlike_args() {
|
||||
let strings = [
|
||||
(line!(), &["process_query{tenant_id=C", "timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm", "blkno=2", "req_lsn=0/238D98C8}"][..]),
|
||||
(line!(), &["rel=1663/208101/2620_fsm", "blkno=2"][..]),
|
||||
(line!(), &["1663/208101/2620_fsm", "2"][..]),
|
||||
];
|
||||
|
||||
let mut first: Option<Key> = None;
|
||||
|
||||
for (line, example) in strings {
|
||||
let m = KeyMaterial::try_from(example)
|
||||
.unwrap_or_else(|e| panic!("failed to parse example from line {line}: {e:?}"));
|
||||
let key = Key::from(m);
|
||||
if let Some(first) = first {
|
||||
assert_eq!(first, key);
|
||||
} else {
|
||||
first = Some(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
#[test]
|
||||
fn recognized_auxfiles() {
|
||||
use AuxFileV2::*;
|
||||
|
||||
let empty = [
|
||||
0x2e, 0x07, 0xbb, 0x01, 0x42, 0x62, 0xb8, 0x21, 0x75, 0x62, 0x95, 0xc5, 0x8d,
|
||||
];
|
||||
let foobar = [
|
||||
0x62, 0x79, 0x3c, 0x64, 0xbf, 0x6f, 0x0d, 0x35, 0x97, 0xba, 0x44, 0x6f, 0x18,
|
||||
];
|
||||
|
||||
#[rustfmt::skip]
|
||||
let examples = [
|
||||
(line!(), "pg_logical/mappings/foobar", Recognized("pg_logical/mappings/", utils::Hex(foobar))),
|
||||
(line!(), "pg_logical/snapshots/foobar", Recognized("pg_logical/snapshots/", utils::Hex(foobar))),
|
||||
(line!(), "pg_logical/replorigin_checkpoint", Recognized("pg_logical/replorigin_checkpoint", utils::Hex(empty))),
|
||||
(line!(), "pg_logical/foobar", OtherWithPrefix("pg_logical/", utils::Hex(foobar))),
|
||||
(line!(), "pg_replslot/foobar", Recognized("pg_replslot/", utils::Hex(foobar))),
|
||||
(line!(), "foobar", Other(utils::Hex(foobar))),
|
||||
];
|
||||
|
||||
for (line, path, expected) in examples {
|
||||
let key = encode_aux_file_key(path);
|
||||
let recognized =
|
||||
AuxFileV2::new(key).unwrap_or_else(|| panic!("line {line} example failed"));
|
||||
|
||||
assert_eq!(recognized, expected);
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
AuxFileV2::new(Key::from_hex("600000102000000000000000000000000000").unwrap()),
|
||||
None,
|
||||
"example key has one too few 0 after 6 before 1"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,6 @@
|
||||
|
||||
mod draw_timeline_dir;
|
||||
mod index_part;
|
||||
mod key;
|
||||
mod layer_map_analyzer;
|
||||
mod layers;
|
||||
|
||||
@@ -62,8 +61,6 @@ enum Commands {
|
||||
AnalyzeLayerMap(AnalyzeLayerMapCmd),
|
||||
#[command(subcommand)]
|
||||
Layer(LayerCmd),
|
||||
/// Debug print a hex key found from logs
|
||||
Key(key::DescribeKeyCommand),
|
||||
}
|
||||
|
||||
/// Read and update pageserver metadata file
|
||||
@@ -186,7 +183,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
.time_travel_recover(Some(&prefix), timestamp, done_if_after, &cancel)
|
||||
.await?;
|
||||
}
|
||||
Commands::Key(dkc) => dkc.execute(),
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ use utils::lsn::Lsn;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
/// Ingest aux files into the pageserver.
|
||||
#[derive(clap::Parser)]
|
||||
@@ -89,17 +88,11 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
println!("ingested {file_cnt} files");
|
||||
}
|
||||
|
||||
for _ in 0..100 {
|
||||
let start = Instant::now();
|
||||
let files = mgmt_api_client
|
||||
.list_aux_files(tenant_shard_id, timeline_id, Lsn(Lsn::MAX.0 - 1))
|
||||
.await?;
|
||||
println!(
|
||||
"{} files found in {}s",
|
||||
files.len(),
|
||||
start.elapsed().as_secs_f64()
|
||||
);
|
||||
}
|
||||
let files = mgmt_api_client
|
||||
.list_aux_files(tenant_shard_id, timeline_id, Lsn(Lsn::MAX.0 - 1))
|
||||
.await?;
|
||||
|
||||
println!("{} files found", files.len());
|
||||
|
||||
anyhow::Ok(())
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::key::{is_rel_block_key, key_to_rel_block, Key};
|
||||
use pageserver_api::keyspace::KeySpaceAccum;
|
||||
use pageserver_api::models::PagestreamGetPageRequest;
|
||||
|
||||
@@ -187,7 +187,7 @@ async fn main_impl(
|
||||
for r in partitioning.keys.ranges.iter() {
|
||||
let mut i = r.start;
|
||||
while i != r.end {
|
||||
if i.is_rel_block_key() {
|
||||
if is_rel_block_key(&i) {
|
||||
filtered.add_key(i);
|
||||
}
|
||||
i = i.next();
|
||||
@@ -308,10 +308,9 @@ async fn main_impl(
|
||||
let r = &ranges[weights.sample(&mut rng)];
|
||||
let key: i128 = rng.gen_range(r.start..r.end);
|
||||
let key = Key::from_i128(key);
|
||||
assert!(key.is_rel_block_key());
|
||||
let (rel_tag, block_no) = key
|
||||
.to_rel_block()
|
||||
.expect("we filter non-rel-block keys out above");
|
||||
assert!(is_rel_block_key(&key));
|
||||
let (rel_tag, block_no) =
|
||||
key_to_rel_block(key).expect("we filter non-rel-block keys out above");
|
||||
PagestreamGetPageRequest {
|
||||
request_lsn: if rng.gen_bool(args.req_latest_probability) {
|
||||
Lsn::MAX
|
||||
|
||||
@@ -178,8 +178,7 @@ impl AuxFileSizeEstimator {
|
||||
}
|
||||
}
|
||||
|
||||
/// When generating base backup or doing initial logical size calculation
|
||||
pub fn on_initial(&self, new_size: usize) {
|
||||
pub fn on_base_backup(&self, new_size: usize) {
|
||||
let mut guard = self.size.lock().unwrap();
|
||||
*guard = Some(new_size as isize);
|
||||
self.report(new_size as isize);
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
use anyhow::{anyhow, Context};
|
||||
use bytes::{BufMut, Bytes, BytesMut};
|
||||
use fail::fail_point;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::key::{key_to_slru_block, Key};
|
||||
use postgres_ffi::pg_constants;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::time::SystemTime;
|
||||
@@ -170,7 +170,7 @@ where
|
||||
}
|
||||
|
||||
async fn add_block(&mut self, key: &Key, block: Bytes) -> Result<(), BasebackupError> {
|
||||
let (kind, segno, _) = key.to_slru_block()?;
|
||||
let (kind, segno, _) = key_to_slru_block(*key)?;
|
||||
|
||||
match kind {
|
||||
SlruKind::Clog => {
|
||||
@@ -362,13 +362,6 @@ where
|
||||
));
|
||||
info!("Replication slot {} restart LSN={}", path, restart_lsn);
|
||||
min_restart_lsn = Lsn::min(min_restart_lsn, restart_lsn);
|
||||
} else if path == "pg_logical/replorigin_checkpoint" {
|
||||
// replorigin_checkoint is written only on compute shutdown, so it contains
|
||||
// deteriorated values. So we generate our own version of this file for the particular LSN
|
||||
// based on information about replorigins extracted from transaction commit records.
|
||||
// In future we will not generate AUX record for "pg_logical/replorigin_checkpoint" at all,
|
||||
// but now we should handle (skip) it for backward compatibility.
|
||||
continue;
|
||||
}
|
||||
let header = new_tar_header(&path, content.len() as u64)?;
|
||||
self.ar
|
||||
@@ -397,32 +390,6 @@ where
|
||||
{
|
||||
self.add_twophase_file(xid).await?;
|
||||
}
|
||||
let repl_origins = self
|
||||
.timeline
|
||||
.get_replorigins(self.lsn, self.ctx)
|
||||
.await
|
||||
.map_err(|e| BasebackupError::Server(e.into()))?;
|
||||
let n_origins = repl_origins.len();
|
||||
if n_origins != 0 {
|
||||
//
|
||||
// Construct "pg_logical/replorigin_checkpoint" file based on information about replication origins
|
||||
// extracted from transaction commit record. We are using this file to pass information about replication
|
||||
// origins to compute to allow logical replication to restart from proper point.
|
||||
//
|
||||
let mut content = Vec::with_capacity(n_origins * 16 + 8);
|
||||
content.extend_from_slice(&pg_constants::REPLICATION_STATE_MAGIC.to_le_bytes());
|
||||
for (origin_id, origin_lsn) in repl_origins {
|
||||
content.extend_from_slice(&origin_id.to_le_bytes());
|
||||
content.extend_from_slice(&[0u8; 6]); // align to 8 bytes
|
||||
content.extend_from_slice(&origin_lsn.0.to_le_bytes());
|
||||
}
|
||||
let crc32 = crc32c::crc32c(&content);
|
||||
content.extend_from_slice(&crc32.to_le_bytes());
|
||||
let header = new_tar_header("pg_logical/replorigin_checkpoint", content.len() as u64)?;
|
||||
self.ar.append(&header, &*content).await.context(
|
||||
"could not add pg_logical/replorigin_checkpoint file to basebackup tarball",
|
||||
)?;
|
||||
}
|
||||
|
||||
fail_point!("basebackup-before-control-file", |_| {
|
||||
Err(BasebackupError::Server(anyhow!(
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
//! See also `settings.md` for better description on every parameter.
|
||||
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::{models::CompactionAlgorithm, shard::TenantShardId};
|
||||
use remote_storage::{RemotePath, RemoteStorageConfig};
|
||||
use serde;
|
||||
use serde::de::IntoDeserializer;
|
||||
@@ -15,7 +15,7 @@ use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::ConnectionId;
|
||||
use utils::logging::SecretString;
|
||||
|
||||
use once_cell::sync::OnceCell;
|
||||
use once_cell::sync::{Lazy, OnceCell};
|
||||
use reqwest::Url;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::str::FromStr;
|
||||
@@ -99,6 +99,8 @@ pub mod defaults {
|
||||
|
||||
pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
|
||||
|
||||
pub const DEFAULT_WALREDO_PROCESS_KIND: &str = "async";
|
||||
|
||||
///
|
||||
/// Default built-in configuration file.
|
||||
///
|
||||
@@ -144,6 +146,8 @@ pub mod defaults {
|
||||
|
||||
#validate_vectored_get = '{DEFAULT_VALIDATE_VECTORED_GET}'
|
||||
|
||||
#walredo_process_kind = '{DEFAULT_WALREDO_PROCESS_KIND}'
|
||||
|
||||
[tenant_config]
|
||||
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
||||
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
|
||||
@@ -296,6 +300,8 @@ pub struct PageServerConf {
|
||||
///
|
||||
/// Setting this to zero disables limits on total ephemeral layer size.
|
||||
pub ephemeral_bytes_per_memory_kb: usize,
|
||||
|
||||
pub walredo_process_kind: crate::walredo::ProcessKind,
|
||||
}
|
||||
|
||||
/// We do not want to store this in a PageServerConf because the latter may be logged
|
||||
@@ -401,6 +407,8 @@ struct PageServerConfigBuilder {
|
||||
validate_vectored_get: BuilderValue<bool>,
|
||||
|
||||
ephemeral_bytes_per_memory_kb: BuilderValue<usize>,
|
||||
|
||||
walredo_process_kind: BuilderValue<crate::walredo::ProcessKind>,
|
||||
}
|
||||
|
||||
impl PageServerConfigBuilder {
|
||||
@@ -489,6 +497,8 @@ impl PageServerConfigBuilder {
|
||||
)),
|
||||
validate_vectored_get: Set(DEFAULT_VALIDATE_VECTORED_GET),
|
||||
ephemeral_bytes_per_memory_kb: Set(DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
|
||||
|
||||
walredo_process_kind: Set(DEFAULT_WALREDO_PROCESS_KIND.parse().unwrap()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -676,6 +686,10 @@ impl PageServerConfigBuilder {
|
||||
self.ephemeral_bytes_per_memory_kb = BuilderValue::Set(value);
|
||||
}
|
||||
|
||||
pub fn get_walredo_process_kind(&mut self, value: crate::walredo::ProcessKind) {
|
||||
self.walredo_process_kind = BuilderValue::Set(value);
|
||||
}
|
||||
|
||||
pub fn build(self) -> anyhow::Result<PageServerConf> {
|
||||
let default = Self::default_values();
|
||||
|
||||
@@ -733,6 +747,7 @@ impl PageServerConfigBuilder {
|
||||
max_vectored_read_bytes,
|
||||
validate_vectored_get,
|
||||
ephemeral_bytes_per_memory_kb,
|
||||
walredo_process_kind,
|
||||
}
|
||||
CUSTOM LOGIC
|
||||
{
|
||||
@@ -1029,6 +1044,9 @@ impl PageServerConf {
|
||||
"ephemeral_bytes_per_memory_kb" => {
|
||||
builder.get_ephemeral_bytes_per_memory_kb(parse_toml_u64("ephemeral_bytes_per_memory_kb", item)? as usize)
|
||||
}
|
||||
"walredo_process_kind" => {
|
||||
builder.get_walredo_process_kind(parse_toml_from_str("walredo_process_kind", item)?)
|
||||
}
|
||||
_ => bail!("unrecognized pageserver option '{key}'"),
|
||||
}
|
||||
}
|
||||
@@ -1049,6 +1067,19 @@ impl PageServerConf {
|
||||
|
||||
conf.default_tenant_conf = t_conf.merge(TenantConf::default());
|
||||
|
||||
{
|
||||
const VAR_NAME: &str = "NEON_PAGESERVER_PANIC_ON_UNSPECIFIED_COMPACTION_ALGORITHM";
|
||||
static VAR: Lazy<Option<bool>> = Lazy::new(|| utils::env::var(VAR_NAME));
|
||||
if VAR.unwrap_or(false)
|
||||
&& conf.default_tenant_conf.compaction_algorithm.kind
|
||||
== CompactionAlgorithm::NotSpecified
|
||||
{
|
||||
panic!(
|
||||
"Unspecified compaction algorithm in default tenant configuration. \
|
||||
Set the algorithm explicitly in the pageserver.toml's `tenant_config` field or unset the environment variable {VAR_NAME}");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(conf)
|
||||
}
|
||||
|
||||
@@ -1112,6 +1143,7 @@ impl PageServerConf {
|
||||
),
|
||||
validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
|
||||
ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
|
||||
walredo_process_kind: defaults::DEFAULT_WALREDO_PROCESS_KIND.parse().unwrap(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1351,6 +1383,7 @@ background_task_maximum_delay = '334 s'
|
||||
),
|
||||
validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
|
||||
ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
|
||||
walredo_process_kind: defaults::DEFAULT_WALREDO_PROCESS_KIND.parse().unwrap(),
|
||||
},
|
||||
"Correct defaults should be used when no config values are provided"
|
||||
);
|
||||
@@ -1424,6 +1457,7 @@ background_task_maximum_delay = '334 s'
|
||||
),
|
||||
validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET,
|
||||
ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB,
|
||||
walredo_process_kind: defaults::DEFAULT_WALREDO_PROCESS_KIND.parse().unwrap(),
|
||||
},
|
||||
"Should be able to parse all basic config values correctly"
|
||||
);
|
||||
|
||||
@@ -2,9 +2,10 @@
|
||||
//! and push them to a HTTP endpoint.
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
|
||||
use crate::tenant::size::CalculateSyntheticSizeError;
|
||||
use crate::tenant::tasks::BackgroundLoopKind;
|
||||
use crate::tenant::{mgr::TenantManager, LogicalSizeCalculationCause, Tenant};
|
||||
use crate::tenant::{
|
||||
mgr::TenantManager, LogicalSizeCalculationCause, PageReconstructError, Tenant,
|
||||
};
|
||||
use camino::Utf8PathBuf;
|
||||
use consumption_metrics::EventType;
|
||||
use pageserver_api::models::TenantState;
|
||||
@@ -349,12 +350,19 @@ async fn calculate_and_log(tenant: &Tenant, cancel: &CancellationToken, ctx: &Re
|
||||
// Same for the loop that fetches computed metrics.
|
||||
// By using the same limiter, we centralize metrics collection for "start" and "finished" counters,
|
||||
// which turns out is really handy to understand the system.
|
||||
match tenant.calculate_synthetic_size(CAUSE, cancel, ctx).await {
|
||||
Ok(_) => {}
|
||||
Err(CalculateSyntheticSizeError::Cancelled) => {}
|
||||
Err(e) => {
|
||||
let tenant_shard_id = tenant.tenant_shard_id();
|
||||
error!("failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}");
|
||||
}
|
||||
let Err(e) = tenant.calculate_synthetic_size(CAUSE, cancel, ctx).await else {
|
||||
return;
|
||||
};
|
||||
|
||||
// this error can be returned if timeline is shutting down, but it does not
|
||||
// mean the synthetic size worker should terminate.
|
||||
let shutting_down = matches!(
|
||||
e.downcast_ref::<PageReconstructError>(),
|
||||
Some(PageReconstructError::Cancelled | PageReconstructError::AncestorStopping(_))
|
||||
);
|
||||
|
||||
if !shutting_down {
|
||||
let tenant_shard_id = tenant.tenant_shard_id();
|
||||
error!("failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -311,7 +311,7 @@ impl DeletionList {
|
||||
result.extend(
|
||||
timeline_layers
|
||||
.into_iter()
|
||||
.map(|l| timeline_remote_path.join(Utf8PathBuf::from(l))),
|
||||
.map(|l| timeline_remote_path.join(&Utf8PathBuf::from(l))),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,10 +81,8 @@ paths:
|
||||
Attempts to delete specified tenant. 500, 503 and 409 errors should be retried until 404 is retrieved.
|
||||
404 means that deletion successfully finished"
|
||||
responses:
|
||||
"200":
|
||||
description: Tenant was successfully deleted, or was already not found.
|
||||
"404":
|
||||
description: Tenant not found. This is a success result, equivalent to 200.
|
||||
description: Tenant not found. This is the success path.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
|
||||
@@ -74,7 +74,6 @@ use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::timeline::CompactFlags;
|
||||
use crate::tenant::timeline::CompactionError;
|
||||
use crate::tenant::timeline::Timeline;
|
||||
use crate::tenant::GetTimelineError;
|
||||
use crate::tenant::SpawnMode;
|
||||
@@ -181,7 +180,12 @@ impl From<PageReconstructError> for ApiError {
|
||||
PageReconstructError::MissingKey(e) => {
|
||||
ApiError::InternalServerError(anyhow::anyhow!("{e}"))
|
||||
}
|
||||
PageReconstructError::Cancelled => ApiError::Cancelled,
|
||||
PageReconstructError::Cancelled => {
|
||||
ApiError::InternalServerError(anyhow::anyhow!("request was cancelled"))
|
||||
}
|
||||
PageReconstructError::AncestorStopping(_) => {
|
||||
ApiError::ResourceUnavailable(format!("{pre}").into())
|
||||
}
|
||||
PageReconstructError::AncestorLsnTimeout(e) => ApiError::Timeout(format!("{e}").into()),
|
||||
PageReconstructError::WalRedo(pre) => ApiError::InternalServerError(pre),
|
||||
}
|
||||
@@ -1071,7 +1075,7 @@ async fn tenant_delete_handler(
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let status = state
|
||||
state
|
||||
.tenant_manager
|
||||
.delete_tenant(tenant_shard_id, ACTIVE_TENANT_TIMEOUT)
|
||||
.instrument(info_span!("tenant_delete_handler",
|
||||
@@ -1080,14 +1084,7 @@ async fn tenant_delete_handler(
|
||||
))
|
||||
.await?;
|
||||
|
||||
// Callers use 404 as success for deletions, for historical reasons.
|
||||
if status == StatusCode::NOT_FOUND {
|
||||
return Err(ApiError::NotFound(
|
||||
anyhow::anyhow!("Deletion complete").into(),
|
||||
));
|
||||
}
|
||||
|
||||
json_response(status, ())
|
||||
json_response(StatusCode::ACCEPTED, ())
|
||||
}
|
||||
|
||||
/// HTTP endpoint to query the current tenant_size of a tenant.
|
||||
@@ -1135,10 +1132,7 @@ async fn tenant_size_handler(
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| match e {
|
||||
crate::tenant::size::CalculateSyntheticSizeError::Cancelled => ApiError::ShuttingDown,
|
||||
other => ApiError::InternalServerError(anyhow::anyhow!(other)),
|
||||
})?;
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
let mut sizes = None;
|
||||
let accepts_html = headers
|
||||
@@ -1146,7 +1140,9 @@ async fn tenant_size_handler(
|
||||
.map(|v| v == "text/html")
|
||||
.unwrap_or_default();
|
||||
if !inputs_only.unwrap_or(false) {
|
||||
let storage_model = inputs.calculate_model();
|
||||
let storage_model = inputs
|
||||
.calculate_model()
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
let size = storage_model.calculate();
|
||||
|
||||
// If request header expects html, return html
|
||||
@@ -1817,22 +1813,11 @@ async fn timeline_checkpoint_handler(
|
||||
timeline
|
||||
.freeze_and_flush()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
match e {
|
||||
tenant::timeline::FlushLayerError::Cancelled => ApiError::ShuttingDown,
|
||||
other => ApiError::InternalServerError(other.into()),
|
||||
|
||||
}
|
||||
})?;
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
timeline
|
||||
.compact(&cancel, flags, &ctx)
|
||||
.await
|
||||
.map_err(|e|
|
||||
match e {
|
||||
CompactionError::ShuttingDown => ApiError::ShuttingDown,
|
||||
CompactionError::Other(e) => ApiError::InternalServerError(e)
|
||||
}
|
||||
)?;
|
||||
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
||||
|
||||
if wait_until_uploaded {
|
||||
timeline.remote_client.wait_completion().await.map_err(ApiError::InternalServerError)?;
|
||||
@@ -2188,7 +2173,7 @@ async fn tenant_scan_remote_handler(
|
||||
{
|
||||
Ok((index_part, index_generation)) => {
|
||||
tracing::info!("Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)",
|
||||
index_part.layer_metadata.len(), index_part.metadata.disk_consistent_lsn());
|
||||
index_part.layer_metadata.len(), index_part.get_disk_consistent_lsn());
|
||||
generation = std::cmp::max(generation, index_generation);
|
||||
}
|
||||
Err(DownloadError::NotFound) => {
|
||||
@@ -2430,25 +2415,6 @@ async fn list_aux_files(
|
||||
json_response(StatusCode::OK, files)
|
||||
}
|
||||
|
||||
async fn perf_info(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let timeline =
|
||||
active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
|
||||
.await?;
|
||||
|
||||
let result = timeline.perf_info().await;
|
||||
|
||||
json_response(StatusCode::OK, result)
|
||||
}
|
||||
|
||||
async fn ingest_aux_files(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
@@ -2876,9 +2842,5 @@ pub fn make_router(
|
||||
|r| testing_api_handler("list_aux_files", r, list_aux_files),
|
||||
)
|
||||
.post("/v1/top_tenants", |r| api_handler(r, post_top_tenants))
|
||||
.post(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/perf_info",
|
||||
|r| testing_api_handler("perf_info", r, perf_info),
|
||||
)
|
||||
.any(handler_404))
|
||||
}
|
||||
|
||||
@@ -2108,7 +2108,6 @@ pub(crate) struct TimelineMetrics {
|
||||
pub directory_entries_count_gauge: Lazy<UIntGauge, Box<dyn Send + Fn() -> UIntGauge>>,
|
||||
pub evictions: IntCounter,
|
||||
pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
|
||||
shutdown: std::sync::atomic::AtomicBool,
|
||||
}
|
||||
|
||||
impl TimelineMetrics {
|
||||
@@ -2228,7 +2227,6 @@ impl TimelineMetrics {
|
||||
evictions_with_low_residence_duration: std::sync::RwLock::new(
|
||||
evictions_with_low_residence_duration,
|
||||
),
|
||||
shutdown: std::sync::atomic::AtomicBool::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2251,17 +2249,6 @@ impl TimelineMetrics {
|
||||
}
|
||||
|
||||
pub(crate) fn shutdown(&self) {
|
||||
let was_shutdown = self
|
||||
.shutdown
|
||||
.swap(true, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
if was_shutdown {
|
||||
// this happens on tenant deletion because tenant first shuts down timelines, then
|
||||
// invokes timeline deletion which first shuts down the timeline again.
|
||||
// TODO: this can be removed once https://github.com/neondatabase/neon/issues/5080
|
||||
return;
|
||||
}
|
||||
|
||||
let tenant_id = &self.tenant_id;
|
||||
let timeline_id = &self.timeline_id;
|
||||
let shard_id = &self.shard_id;
|
||||
|
||||
@@ -66,7 +66,6 @@ use crate::tenant::mgr::GetTenantError;
|
||||
use crate::tenant::mgr::ShardResolveResult;
|
||||
use crate::tenant::mgr::ShardSelector;
|
||||
use crate::tenant::mgr::TenantManager;
|
||||
use crate::tenant::timeline::FlushLayerError;
|
||||
use crate::tenant::timeline::WaitLsnError;
|
||||
use crate::tenant::GetTimelineError;
|
||||
use crate::tenant::PageReconstructError;
|
||||
@@ -373,7 +372,7 @@ impl From<WaitLsnError> for PageStreamError {
|
||||
match value {
|
||||
e @ WaitLsnError::Timeout(_) => Self::LsnTimeout(e),
|
||||
WaitLsnError::Shutdown => Self::Shutdown,
|
||||
e @ WaitLsnError::BadState { .. } => Self::Reconnect(format!("{e}").into()),
|
||||
WaitLsnError::BadState => Self::Reconnect("Timeline is not active".into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -383,7 +382,7 @@ impl From<WaitLsnError> for QueryError {
|
||||
match value {
|
||||
e @ WaitLsnError::Timeout(_) => Self::Other(anyhow::Error::new(e)),
|
||||
WaitLsnError::Shutdown => Self::Shutdown,
|
||||
WaitLsnError::BadState { .. } => Self::Reconnect,
|
||||
WaitLsnError::BadState => Self::Reconnect,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -831,10 +830,7 @@ impl PageServerHandler {
|
||||
// We only want to persist the data, and it doesn't matter if it's in the
|
||||
// shape of deltas or images.
|
||||
info!("flushing layers");
|
||||
timeline.freeze_and_flush().await.map_err(|e| match e {
|
||||
FlushLayerError::Cancelled => QueryError::Shutdown,
|
||||
other => QueryError::Other(other.into()),
|
||||
})?;
|
||||
timeline.freeze_and_flush().await?;
|
||||
|
||||
info!("done");
|
||||
Ok(())
|
||||
|
||||
@@ -17,8 +17,8 @@ use bytes::{Buf, Bytes, BytesMut};
|
||||
use enum_map::Enum;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::key::{
|
||||
dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key,
|
||||
relmap_file_key, repl_origin_key, repl_origin_key_range, slru_block_to_key, slru_dir_to_key,
|
||||
dbdir_key_range, is_rel_block_key, is_slru_block_key, rel_block_to_key, rel_dir_to_key,
|
||||
rel_key_range, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,
|
||||
slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,
|
||||
AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
|
||||
};
|
||||
@@ -27,7 +27,7 @@ use pageserver_api::models::AuxFilePolicy;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId};
|
||||
use postgres_ffi::{Oid, TimestampTz, TransactionId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{hash_map, HashMap, HashSet};
|
||||
use std::ops::ControlFlow;
|
||||
@@ -36,7 +36,6 @@ use strum::IntoEnumIterator;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, info, trace, warn};
|
||||
use utils::bin_ser::DeserializeError;
|
||||
use utils::pausable_failpoint;
|
||||
use utils::vec_map::{VecMap, VecMapOrdering};
|
||||
use utils::{bin_ser::BeSer, lsn::Lsn};
|
||||
|
||||
@@ -79,19 +78,11 @@ pub enum LsnForTimestamp {
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub(crate) enum CalculateLogicalSizeError {
|
||||
pub enum CalculateLogicalSizeError {
|
||||
#[error("cancelled")]
|
||||
Cancelled,
|
||||
|
||||
/// Something went wrong while reading the metadata we use to calculate logical size
|
||||
/// Note that cancellation variants of `PageReconstructError` are transformed to [`Self::Cancelled`]
|
||||
/// in the `From` implementation for this variant.
|
||||
#[error(transparent)]
|
||||
PageRead(PageReconstructError),
|
||||
|
||||
/// Something went wrong deserializing metadata that we read to calculate logical size
|
||||
#[error("decode error: {0}")]
|
||||
Decode(#[from] DeserializeError),
|
||||
Other(#[from] anyhow::Error),
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
@@ -116,8 +107,10 @@ impl From<PageReconstructError> for CollectKeySpaceError {
|
||||
impl From<PageReconstructError> for CalculateLogicalSizeError {
|
||||
fn from(pre: PageReconstructError) -> Self {
|
||||
match pre {
|
||||
PageReconstructError::Cancelled => Self::Cancelled,
|
||||
_ => Self::PageRead(pre),
|
||||
PageReconstructError::AncestorStopping(_) | PageReconstructError::Cancelled => {
|
||||
Self::Cancelled
|
||||
}
|
||||
_ => Self::Other(pre.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -410,8 +403,6 @@ impl Timeline {
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<LsnForTimestamp, PageReconstructError> {
|
||||
pausable_failpoint!("find-lsn-for-timestamp-pausable");
|
||||
|
||||
let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
|
||||
// We use this method to figure out the branching LSN for the new branch, but the
|
||||
// GC cutoff could be before the branching point and we cannot create a new branch
|
||||
@@ -427,7 +418,6 @@ impl Timeline {
|
||||
|
||||
let mut found_smaller = false;
|
||||
let mut found_larger = false;
|
||||
|
||||
while low < high {
|
||||
if cancel.is_cancelled() {
|
||||
return Err(PageReconstructError::Cancelled);
|
||||
@@ -722,22 +712,10 @@ impl Timeline {
|
||||
result.insert(fname, content);
|
||||
}
|
||||
}
|
||||
self.aux_file_size_estimator.on_initial(sz);
|
||||
self.aux_file_size_estimator.on_base_backup(sz);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub(crate) async fn trigger_aux_file_size_computation(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), PageReconstructError> {
|
||||
let current_policy = self.last_aux_file_policy.load();
|
||||
if let Some(AuxFilePolicy::V2) | Some(AuxFilePolicy::CrossValidation) = current_policy {
|
||||
self.list_aux_files_v2(lsn, ctx).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn list_aux_files(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
@@ -776,27 +754,6 @@ impl Timeline {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn get_replorigins(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<HashMap<RepOriginId, Lsn>, PageReconstructError> {
|
||||
let kv = self
|
||||
.scan(KeySpace::single(repl_origin_key_range()), lsn, ctx)
|
||||
.await
|
||||
.context("scan")?;
|
||||
let mut result = HashMap::new();
|
||||
for (k, v) in kv {
|
||||
let v = v.context("get value")?;
|
||||
let origin_id = k.field6 as RepOriginId;
|
||||
let origin_lsn = Lsn::des(&v).unwrap();
|
||||
if origin_lsn != Lsn::INVALID {
|
||||
result.insert(origin_id, origin_lsn);
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Does the same as get_current_logical_size but counted on demand.
|
||||
/// Used to initialize the logical size tracking on startup.
|
||||
///
|
||||
@@ -806,7 +763,7 @@ impl Timeline {
|
||||
/// # Cancel-Safety
|
||||
///
|
||||
/// This method is cancellation-safe.
|
||||
pub(crate) async fn get_current_logical_size_non_incremental(
|
||||
pub async fn get_current_logical_size_non_incremental(
|
||||
&self,
|
||||
lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
@@ -815,7 +772,7 @@ impl Timeline {
|
||||
|
||||
// Fetch list of database dirs and iterate them
|
||||
let buf = self.get(DBDIR_KEY, lsn, ctx).await?;
|
||||
let dbdir = DbDirectory::des(&buf)?;
|
||||
let dbdir = DbDirectory::des(&buf).context("deserialize db directory")?;
|
||||
|
||||
let mut total_size: u64 = 0;
|
||||
for (spcnode, dbnode) in dbdir.dbdirs.keys() {
|
||||
@@ -919,20 +876,10 @@ impl Timeline {
|
||||
result.add_key(AUX_FILES_KEY);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
{
|
||||
let guard = self.extra_test_dense_keyspace.load();
|
||||
for kr in &guard.ranges {
|
||||
result.add_range(kr.clone());
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
result.to_keyspace(),
|
||||
/* AUX sparse key space */
|
||||
SparseKeySpace(KeySpace {
|
||||
ranges: vec![repl_origin_key_range(), Key::metadata_aux_key_range()],
|
||||
}),
|
||||
SparseKeySpace(KeySpace::single(Key::metadata_aux_key_range())),
|
||||
))
|
||||
}
|
||||
|
||||
@@ -1201,20 +1148,6 @@ impl<'a> DatadirModification<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn set_replorigin(
|
||||
&mut self,
|
||||
origin_id: RepOriginId,
|
||||
origin_lsn: Lsn,
|
||||
) -> anyhow::Result<()> {
|
||||
let key = repl_origin_key(origin_id);
|
||||
self.put(key, Value::Image(origin_lsn.ser().unwrap().into()));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn drop_replorigin(&mut self, origin_id: RepOriginId) -> anyhow::Result<()> {
|
||||
self.set_replorigin(origin_id, Lsn::INVALID).await
|
||||
}
|
||||
|
||||
pub fn put_control_file(&mut self, img: Bytes) -> anyhow::Result<()> {
|
||||
self.put(CONTROLFILE_KEY, Value::Image(img));
|
||||
Ok(())
|
||||
@@ -1619,7 +1552,7 @@ impl<'a> DatadirModification<'a> {
|
||||
self.tline.aux_file_size_estimator.on_add(content.len());
|
||||
new_files.push((path, content));
|
||||
}
|
||||
(None, true) => warn!("removing non-existing aux file: {}", path),
|
||||
(None, true) => anyhow::bail!("removing non-existing aux file: {}", path),
|
||||
}
|
||||
let new_val = aux_file::encode_file_value(&new_files)?;
|
||||
self.put(key, Value::Image(new_val.into()));
|
||||
@@ -1673,7 +1606,8 @@ impl<'a> DatadirModification<'a> {
|
||||
aux_files.dir = Some(dir);
|
||||
}
|
||||
Err(
|
||||
e @ (PageReconstructError::Cancelled
|
||||
e @ (PageReconstructError::AncestorStopping(_)
|
||||
| PageReconstructError::Cancelled
|
||||
| PageReconstructError::AncestorLsnTimeout(_)),
|
||||
) => {
|
||||
// Important that we do not interpret a shutdown error as "not found" and thereby
|
||||
@@ -1745,7 +1679,7 @@ impl<'a> DatadirModification<'a> {
|
||||
let mut retained_pending_updates = HashMap::<_, Vec<_>>::new();
|
||||
for (key, values) in self.pending_updates.drain() {
|
||||
for (lsn, value) in values {
|
||||
if key.is_rel_block_key() || key.is_slru_block_key() {
|
||||
if is_rel_block_key(&key) || is_slru_block_key(key) {
|
||||
// This bails out on first error without modifying pending_updates.
|
||||
// That's Ok, cf this function's doc comment.
|
||||
writer.put(key, lsn, &value, ctx).await?;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -40,8 +40,6 @@ pub mod defaults {
|
||||
|
||||
pub const DEFAULT_COMPACTION_PERIOD: &str = "20 s";
|
||||
pub const DEFAULT_COMPACTION_THRESHOLD: usize = 10;
|
||||
pub const DEFAULT_COMPACTION_ALGORITHM: super::CompactionAlgorithm =
|
||||
super::CompactionAlgorithm::Legacy;
|
||||
|
||||
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
|
||||
|
||||
@@ -554,7 +552,13 @@ impl Default for TenantConf {
|
||||
.expect("cannot parse default compaction period"),
|
||||
compaction_threshold: DEFAULT_COMPACTION_THRESHOLD,
|
||||
compaction_algorithm: CompactionAlgorithmSettings {
|
||||
kind: DEFAULT_COMPACTION_ALGORITHM,
|
||||
kind: if cfg!(test) {
|
||||
// Rust tests rely on a valid implicit default (TODO: fix this)
|
||||
CompactionAlgorithm::Legacy
|
||||
} else {
|
||||
// Python tests are subject to NotSpecified handling
|
||||
CompactionAlgorithm::NotSpecified
|
||||
},
|
||||
},
|
||||
gc_horizon: DEFAULT_GC_HORIZON,
|
||||
gc_period: humantime::parse_duration(DEFAULT_GC_PERIOD)
|
||||
|
||||
@@ -16,7 +16,6 @@ use crate::{
|
||||
task_mgr::{self, TaskKind},
|
||||
tenant::{
|
||||
mgr::{TenantSlot, TenantsMapRemoveResult},
|
||||
remote_timeline_client::remote_heatmap_path,
|
||||
timeline::ShutdownMode,
|
||||
},
|
||||
};
|
||||
@@ -532,25 +531,6 @@ impl DeleteTenantFlow {
|
||||
}
|
||||
}
|
||||
|
||||
// Remove top-level tenant objects that don't belong to a timeline, such as heatmap
|
||||
let heatmap_path = remote_heatmap_path(&tenant.tenant_shard_id());
|
||||
if let Some(Err(e)) = backoff::retry(
|
||||
|| async {
|
||||
remote_storage
|
||||
.delete(&heatmap_path, &task_mgr::shutdown_token())
|
||||
.await
|
||||
},
|
||||
TimeoutOrCancel::caused_by_cancel,
|
||||
FAILED_UPLOAD_WARN_THRESHOLD,
|
||||
FAILED_REMOTE_OP_RETRIES,
|
||||
"remove_remote_tenant_heatmap",
|
||||
&task_mgr::shutdown_token(),
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::warn!("Failed to delete heatmap at {heatmap_path}: {e}");
|
||||
}
|
||||
|
||||
let timelines_path = conf.timelines_path(&tenant.tenant_shard_id);
|
||||
// May not exist if we fail in cleanup_remaining_fs_traces after removing it
|
||||
if timelines_path.exists() {
|
||||
|
||||
@@ -1,23 +1,15 @@
|
||||
//! Describes the legacy now hopefully no longer modified per-timeline metadata stored in
|
||||
//! `index_part.json` managed by [`remote_timeline_client`]. For many tenants and their timelines,
|
||||
//! this struct and it's original serialization format is still needed because they were written a
|
||||
//! long time ago.
|
||||
//! Every image of a certain timeline from [`crate::tenant::Tenant`]
|
||||
//! has a metadata that needs to be stored persistently.
|
||||
//!
|
||||
//! Instead of changing and adding versioning to this, just change [`IndexPart`] with soft json
|
||||
//! versioning.
|
||||
//! Later, the file gets used in [`remote_timeline_client`] as a part of
|
||||
//! external storage import and export operations.
|
||||
//!
|
||||
//! To clean up this module we need to migrate all index_part.json files to a later version.
|
||||
//! While doing this, we need to be mindful about s3 based recovery as well, so it might take
|
||||
//! however long we keep the old versions to be able to delete the old code. After that, we can
|
||||
//! remove everything else than [`TimelineMetadataBodyV2`], rename it as `TimelineMetadata` and
|
||||
//! move it to `index.rs`. Before doing all of this, we need to keep the structures for backwards
|
||||
//! compatibility.
|
||||
//! The module contains all structs and related helper methods related to timeline metadata.
|
||||
//!
|
||||
//! [`remote_timeline_client`]: super::remote_timeline_client
|
||||
//! [`IndexPart`]: super::remote_timeline_client::index::IndexPart
|
||||
|
||||
use anyhow::ensure;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde::{de::Error, Deserialize, Serialize, Serializer};
|
||||
use utils::bin_ser::SerializeError;
|
||||
use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn};
|
||||
|
||||
@@ -25,37 +17,17 @@ use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn};
|
||||
const METADATA_FORMAT_VERSION: u16 = 4;
|
||||
|
||||
/// Previous supported format versions.
|
||||
///
|
||||
/// In practice, none of these should remain, all are [`METADATA_FORMAT_VERSION`], but confirming
|
||||
/// that requires a scrubber run which is yet to be done.
|
||||
const METADATA_OLD_FORMAT_VERSION: u16 = 3;
|
||||
|
||||
/// When the file existed on disk we assumed that a write of up to METADATA_MAX_SIZE bytes is atomic.
|
||||
/// We assume that a write of up to METADATA_MAX_SIZE bytes is atomic.
|
||||
///
|
||||
/// This is the same assumption that PostgreSQL makes with the control file,
|
||||
///
|
||||
/// see PG_CONTROL_MAX_SAFE_SIZE
|
||||
const METADATA_MAX_SIZE: usize = 512;
|
||||
|
||||
/// Legacy metadata stored as a component of `index_part.json` per timeline.
|
||||
/// Metadata stored on disk for each timeline
|
||||
///
|
||||
/// Do not make new changes to this type or the module. In production, we have two different kinds
|
||||
/// of serializations of this type: bincode and json. Bincode version reflects what used to be
|
||||
/// stored on disk in earlier versions and does internal crc32 checksumming.
|
||||
///
|
||||
/// This type should not implement `serde::Serialize` or `serde::Deserialize` because there would
|
||||
/// be a confusion whether you want the old version ([`TimelineMetadata::from_bytes`]) or the modern
|
||||
/// as-exists in `index_part.json` ([`self::modern_serde`]).
|
||||
///
|
||||
/// ```compile_fail
|
||||
/// #[derive(serde::Serialize)]
|
||||
/// struct DoNotDoThis(pageserver::tenant::metadata::TimelineMetadata);
|
||||
/// ```
|
||||
///
|
||||
/// ```compile_fail
|
||||
/// #[derive(serde::Deserialize)]
|
||||
/// struct NeitherDoThis(pageserver::tenant::metadata::TimelineMetadata);
|
||||
/// ```
|
||||
/// The fields correspond to the values we hold in memory, in Timeline.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TimelineMetadata {
|
||||
hdr: TimelineMetadataHeader,
|
||||
@@ -68,49 +40,6 @@ struct TimelineMetadataHeader {
|
||||
size: u16, // size of serialized metadata
|
||||
format_version: u16, // metadata format version (used for compatibility checks)
|
||||
}
|
||||
|
||||
impl TryFrom<&TimelineMetadataBodyV2> for TimelineMetadataHeader {
|
||||
type Error = Crc32CalculationFailed;
|
||||
|
||||
fn try_from(value: &TimelineMetadataBodyV2) -> Result<Self, Self::Error> {
|
||||
#[derive(Default)]
|
||||
struct Crc32Sink {
|
||||
crc: u32,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
impl std::io::Write for Crc32Sink {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
self.crc = crc32c::crc32c_append(self.crc, buf);
|
||||
self.count += buf.len();
|
||||
Ok(buf.len())
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// jump through hoops to calculate the crc32 so that TimelineMetadata::ne works
|
||||
// across serialization versions
|
||||
let mut sink = Crc32Sink::default();
|
||||
<TimelineMetadataBodyV2 as utils::bin_ser::BeSer>::ser_into(value, &mut sink)
|
||||
.map_err(Crc32CalculationFailed)?;
|
||||
|
||||
let size = METADATA_HDR_SIZE + sink.count;
|
||||
|
||||
Ok(TimelineMetadataHeader {
|
||||
checksum: sink.crc,
|
||||
size: size as u16,
|
||||
format_version: METADATA_FORMAT_VERSION,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
#[error("re-serializing for crc32 failed")]
|
||||
struct Crc32CalculationFailed(#[source] utils::bin_ser::SerializeError);
|
||||
|
||||
const METADATA_HDR_SIZE: usize = std::mem::size_of::<TimelineMetadataHeader>();
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
@@ -182,12 +111,6 @@ impl TimelineMetadata {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn with_recalculated_checksum(mut self) -> anyhow::Result<Self> {
|
||||
self.hdr = TimelineMetadataHeader::try_from(&self.body)?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
fn upgrade_timeline_metadata(metadata_bytes: &[u8]) -> anyhow::Result<Self> {
|
||||
let mut hdr = TimelineMetadataHeader::des(&metadata_bytes[0..METADATA_HDR_SIZE])?;
|
||||
|
||||
@@ -338,93 +261,25 @@ impl TimelineMetadata {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) mod modern_serde {
|
||||
use super::{TimelineMetadata, TimelineMetadataBodyV2, TimelineMetadataHeader};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result<TimelineMetadata, D::Error>
|
||||
impl<'de> Deserialize<'de> for TimelineMetadata {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: serde::de::Deserializer<'de>,
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
// for legacy reasons versions 1-5 had TimelineMetadata serialized as a Vec<u8> field with
|
||||
// BeSer.
|
||||
struct Visitor;
|
||||
|
||||
impl<'d> serde::de::Visitor<'d> for Visitor {
|
||||
type Value = TimelineMetadata;
|
||||
|
||||
fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
f.write_str("BeSer bytes or json structure")
|
||||
}
|
||||
|
||||
fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
|
||||
where
|
||||
A: serde::de::SeqAccess<'d>,
|
||||
{
|
||||
use serde::de::Error;
|
||||
let de = serde::de::value::SeqAccessDeserializer::new(seq);
|
||||
Vec::<u8>::deserialize(de)
|
||||
.map(|v| TimelineMetadata::from_bytes(&v).map_err(A::Error::custom))?
|
||||
}
|
||||
|
||||
fn visit_map<A>(self, map: A) -> Result<Self::Value, A::Error>
|
||||
where
|
||||
A: serde::de::MapAccess<'d>,
|
||||
{
|
||||
use serde::de::Error;
|
||||
|
||||
let de = serde::de::value::MapAccessDeserializer::new(map);
|
||||
let body = TimelineMetadataBodyV2::deserialize(de)?;
|
||||
let hdr = TimelineMetadataHeader::try_from(&body).map_err(A::Error::custom)?;
|
||||
|
||||
Ok(TimelineMetadata { hdr, body })
|
||||
}
|
||||
}
|
||||
|
||||
deserializer.deserialize_any(Visitor)
|
||||
let bytes = Vec::<u8>::deserialize(deserializer)?;
|
||||
Self::from_bytes(bytes.as_slice()).map_err(|e| D::Error::custom(format!("{e}")))
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn serialize<S>(
|
||||
metadata: &TimelineMetadata,
|
||||
serializer: S,
|
||||
) -> Result<S::Ok, S::Error>
|
||||
impl Serialize for TimelineMetadata {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
S: Serializer,
|
||||
{
|
||||
// header is not needed, upon reading we've upgraded all v1 to v2
|
||||
metadata.body.serialize(serializer)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserializes_bytes_as_well_as_equivalent_body_v2() {
|
||||
#[derive(serde::Deserialize, serde::Serialize)]
|
||||
struct Wrapper(
|
||||
#[serde(deserialize_with = "deserialize", serialize_with = "serialize")]
|
||||
TimelineMetadata,
|
||||
);
|
||||
|
||||
let too_many_bytes = "[216,111,252,208,0,54,0,4,0,0,0,0,1,73,253,144,1,0,0,0,0,1,73,253,24,0,0,0,0,0,0,0,0,0,0,0,0,0,1,73,253,24,0,0,0,0,1,73,253,24,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]";
|
||||
|
||||
let wrapper_from_bytes = serde_json::from_str::<Wrapper>(too_many_bytes).unwrap();
|
||||
|
||||
let serialized = serde_json::to_value(&wrapper_from_bytes).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
serialized,
|
||||
serde_json::json! {{
|
||||
"disk_consistent_lsn": "0/149FD90",
|
||||
"prev_record_lsn": "0/149FD18",
|
||||
"ancestor_timeline": null,
|
||||
"ancestor_lsn": "0/0",
|
||||
"latest_gc_cutoff_lsn": "0/149FD18",
|
||||
"initdb_lsn": "0/149FD18",
|
||||
"pg_version": 15
|
||||
}}
|
||||
);
|
||||
|
||||
let wrapper_from_json = serde_json::value::from_value::<Wrapper>(serialized).unwrap();
|
||||
|
||||
assert_eq!(wrapper_from_bytes.0, wrapper_from_json.0);
|
||||
let bytes = self
|
||||
.to_bytes()
|
||||
.map_err(|e| serde::ser::Error::custom(format!("{e}")))?;
|
||||
bytes.serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -548,6 +403,59 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_metadata_bincode_serde() {
|
||||
let original_metadata = TimelineMetadata::new(
|
||||
Lsn(0x200),
|
||||
Some(Lsn(0x100)),
|
||||
Some(TIMELINE_ID),
|
||||
Lsn(0),
|
||||
Lsn(0),
|
||||
Lsn(0),
|
||||
// Any version will do here, so use the default
|
||||
crate::DEFAULT_PG_VERSION,
|
||||
);
|
||||
let metadata_bytes = original_metadata
|
||||
.to_bytes()
|
||||
.expect("Cannot create bytes array from metadata");
|
||||
|
||||
let metadata_bincode_be_bytes = original_metadata
|
||||
.ser()
|
||||
.expect("Cannot serialize the metadata");
|
||||
|
||||
// 8 bytes for the length of the vector
|
||||
assert_eq!(metadata_bincode_be_bytes.len(), 8 + metadata_bytes.len());
|
||||
|
||||
let expected_bincode_bytes = {
|
||||
let mut temp = vec![];
|
||||
let len_bytes = metadata_bytes.len().to_be_bytes();
|
||||
temp.extend_from_slice(&len_bytes);
|
||||
temp.extend_from_slice(&metadata_bytes);
|
||||
temp
|
||||
};
|
||||
assert_eq!(metadata_bincode_be_bytes, expected_bincode_bytes);
|
||||
|
||||
let deserialized_metadata = TimelineMetadata::des(&metadata_bincode_be_bytes).unwrap();
|
||||
// Deserialized metadata has the metadata header, which is different from the serialized one.
|
||||
// Reference: TimelineMetaData::to_bytes()
|
||||
let expected_metadata = {
|
||||
let mut temp_metadata = original_metadata;
|
||||
let body_bytes = temp_metadata
|
||||
.body
|
||||
.ser()
|
||||
.expect("Cannot serialize the metadata body");
|
||||
let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
|
||||
let hdr = TimelineMetadataHeader {
|
||||
size: metadata_size as u16,
|
||||
format_version: METADATA_FORMAT_VERSION,
|
||||
checksum: crc32c::crc32c(&body_bytes),
|
||||
};
|
||||
temp_metadata.hdr = hdr;
|
||||
temp_metadata
|
||||
};
|
||||
assert_eq!(deserialized_metadata, expected_metadata);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_metadata_bincode_serde_ensure_roundtrip() {
|
||||
let original_metadata = TimelineMetadata::new(
|
||||
@@ -561,6 +469,8 @@ mod tests {
|
||||
crate::DEFAULT_PG_VERSION,
|
||||
);
|
||||
let expected_bytes = vec![
|
||||
/* bincode length encoding bytes */
|
||||
0, 0, 0, 0, 0, 0, 2, 0, // 8 bytes for the length of the serialized vector
|
||||
/* TimelineMetadataHeader */
|
||||
4, 37, 101, 34, 0, 70, 0, 4, // checksum, size, format_version (4 + 2 + 2)
|
||||
/* TimelineMetadataBodyV2 */
|
||||
@@ -590,7 +500,7 @@ mod tests {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
];
|
||||
let metadata_ser_bytes = original_metadata.to_bytes().unwrap();
|
||||
let metadata_ser_bytes = original_metadata.ser().unwrap();
|
||||
assert_eq!(metadata_ser_bytes, expected_bytes);
|
||||
|
||||
let expected_metadata = {
|
||||
@@ -608,7 +518,7 @@ mod tests {
|
||||
temp_metadata.hdr = hdr;
|
||||
temp_metadata
|
||||
};
|
||||
let des_metadata = TimelineMetadata::from_bytes(&metadata_ser_bytes).unwrap();
|
||||
let des_metadata = TimelineMetadata::des(&metadata_ser_bytes).unwrap();
|
||||
assert_eq!(des_metadata, expected_metadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
|
||||
use futures::StreamExt;
|
||||
use hyper::StatusCode;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::models::LocationConfigMode;
|
||||
@@ -46,7 +45,7 @@ use crate::tenant::delete::DeleteTenantFlow;
|
||||
use crate::tenant::span::debug_assert_current_span_has_tenant_id;
|
||||
use crate::tenant::storage_layer::inmemory_layer;
|
||||
use crate::tenant::timeline::ShutdownMode;
|
||||
use crate::tenant::{AttachedTenantConf, GcError, SpawnMode, Tenant, TenantState};
|
||||
use crate::tenant::{AttachedTenantConf, SpawnMode, Tenant, TenantState};
|
||||
use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX};
|
||||
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
@@ -55,7 +54,6 @@ use utils::generation::Generation;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use super::delete::DeleteTenantError;
|
||||
use super::remote_timeline_client::remote_tenant_path;
|
||||
use super::secondary::SecondaryTenant;
|
||||
use super::timeline::detach_ancestor::PreparedTimelineDetach;
|
||||
use super::TenantSharedResources;
|
||||
@@ -1371,7 +1369,7 @@ impl TenantManager {
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
activation_timeout: Duration,
|
||||
) -> Result<StatusCode, DeleteTenantError> {
|
||||
) -> Result<(), DeleteTenantError> {
|
||||
super::span::debug_assert_current_span_has_tenant_id();
|
||||
// We acquire a SlotGuard during this function to protect against concurrent
|
||||
// changes while the ::prepare phase of DeleteTenantFlow executes, but then
|
||||
@@ -1384,79 +1382,18 @@ impl TenantManager {
|
||||
//
|
||||
// See https://github.com/neondatabase/neon/issues/5080
|
||||
|
||||
// Tenant deletion can happen two ways:
|
||||
// - Legacy: called on an attached location. The attached Tenant object stays alive in Stopping
|
||||
// state until deletion is complete.
|
||||
// - New: called on a pageserver without an attached location. We proceed with deletion from
|
||||
// remote storage.
|
||||
//
|
||||
// See https://github.com/neondatabase/neon/issues/5080 for more context on this transition.
|
||||
let slot_guard =
|
||||
tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::MustExist)?;
|
||||
|
||||
let slot_guard = tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any)?;
|
||||
match &slot_guard.old_value {
|
||||
Some(TenantSlot::Attached(tenant)) => {
|
||||
// Legacy deletion flow: the tenant remains attached, goes to Stopping state, and
|
||||
// deletion will be resumed across restarts.
|
||||
let tenant = tenant.clone();
|
||||
return self
|
||||
.delete_tenant_attached(slot_guard, tenant, activation_timeout)
|
||||
.await;
|
||||
// unwrap is safe because we used MustExist mode when acquiring
|
||||
let tenant = match slot_guard.get_old_value().as_ref().unwrap() {
|
||||
TenantSlot::Attached(tenant) => tenant.clone(),
|
||||
_ => {
|
||||
// Express "not attached" as equivalent to "not found"
|
||||
return Err(DeleteTenantError::NotAttached);
|
||||
}
|
||||
Some(TenantSlot::Secondary(secondary_tenant)) => {
|
||||
secondary_tenant.shutdown().await;
|
||||
let local_tenant_directory = self.conf.tenant_path(&tenant_shard_id);
|
||||
let tmp_dir = safe_rename_tenant_dir(&local_tenant_directory)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("local tenant directory {local_tenant_directory:?} rename")
|
||||
})?;
|
||||
spawn_background_purge(tmp_dir);
|
||||
}
|
||||
Some(TenantSlot::InProgress(_)) => unreachable!(),
|
||||
None => {}
|
||||
};
|
||||
|
||||
// Fall through: local state for this tenant is no longer present, proceed with remote delete
|
||||
let remote_path = remote_tenant_path(&tenant_shard_id);
|
||||
let keys = match self
|
||||
.resources
|
||||
.remote_storage
|
||||
.list(
|
||||
Some(&remote_path),
|
||||
remote_storage::ListingMode::NoDelimiter,
|
||||
None,
|
||||
&self.cancel,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(listing) => listing.keys,
|
||||
Err(remote_storage::DownloadError::Cancelled) => {
|
||||
return Err(DeleteTenantError::Cancelled)
|
||||
}
|
||||
Err(remote_storage::DownloadError::NotFound) => return Ok(StatusCode::NOT_FOUND),
|
||||
Err(other) => return Err(DeleteTenantError::Other(anyhow::anyhow!(other))),
|
||||
};
|
||||
|
||||
if keys.is_empty() {
|
||||
tracing::info!("Remote storage already deleted");
|
||||
} else {
|
||||
tracing::info!("Deleting {} keys from remote storage", keys.len());
|
||||
self.resources
|
||||
.remote_storage
|
||||
.delete_objects(&keys, &self.cancel)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Callers use 404 as success for deletions, for historical reasons.
|
||||
Ok(StatusCode::NOT_FOUND)
|
||||
}
|
||||
|
||||
async fn delete_tenant_attached(
|
||||
&self,
|
||||
slot_guard: SlotGuard,
|
||||
tenant: Arc<Tenant>,
|
||||
activation_timeout: Duration,
|
||||
) -> Result<StatusCode, DeleteTenantError> {
|
||||
match tenant.current_state() {
|
||||
TenantState::Broken { .. } | TenantState::Stopping { .. } => {
|
||||
// If deletion is already in progress, return success (the semantics of this
|
||||
@@ -1466,7 +1403,7 @@ impl TenantManager {
|
||||
// The `delete_progress` lock is held: deletion is already happening
|
||||
// in the bacckground
|
||||
slot_guard.revert();
|
||||
return Ok(StatusCode::ACCEPTED);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
@@ -1499,8 +1436,7 @@ impl TenantManager {
|
||||
|
||||
// The Tenant goes back into the map in Stopping state, it will eventually be removed by DeleteTenantFLow
|
||||
slot_guard.revert();
|
||||
let () = result?;
|
||||
Ok(StatusCode::ACCEPTED)
|
||||
result
|
||||
}
|
||||
|
||||
#[instrument(skip_all, fields(tenant_id=%tenant.get_tenant_shard_id().tenant_id, shard_id=%tenant.get_tenant_shard_id().shard_slug(), new_shard_count=%new_shard_count.literal()))]
|
||||
@@ -2897,13 +2833,7 @@ pub(crate) async fn immediate_gc(
|
||||
}
|
||||
}
|
||||
|
||||
result.map_err(|e| match e {
|
||||
GcError::TenantCancelled | GcError::TimelineCancelled => ApiError::ShuttingDown,
|
||||
GcError::TimelineNotFound => {
|
||||
ApiError::NotFound(anyhow::anyhow!("Timeline not found").into())
|
||||
}
|
||||
other => ApiError::InternalServerError(anyhow::anyhow!(other)),
|
||||
})
|
||||
result.map_err(ApiError::InternalServerError)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -91,7 +91,8 @@
|
||||
//!
|
||||
//! The *actual* remote state lags behind the *desired* remote state while
|
||||
//! there are in-flight operations.
|
||||
//! We keep track of the desired remote state in [`UploadQueueInitialized::dirty`].
|
||||
//! We keep track of the desired remote state in
|
||||
//! [`UploadQueueInitialized::latest_files`] and [`UploadQueueInitialized::latest_metadata`].
|
||||
//! It is initialized based on the [`IndexPart`] that was passed during init
|
||||
//! and updated with every `schedule_*` function call.
|
||||
//! All this is necessary necessary to compute the future [`IndexPart`]s
|
||||
@@ -114,7 +115,8 @@
|
||||
//!
|
||||
//! # Completion
|
||||
//!
|
||||
//! Once an operation has completed, we update [`UploadQueueInitialized::clean`] immediately,
|
||||
//! Once an operation has completed, we update
|
||||
//! [`UploadQueueInitialized::projected_remote_consistent_lsn`] immediately,
|
||||
//! and submit a request through the DeletionQueue to update
|
||||
//! [`UploadQueueInitialized::visible_remote_consistent_lsn`] after it has
|
||||
//! validated that our generation is not stale. It is this visible value
|
||||
@@ -414,7 +416,6 @@ impl RemoteTimelineClient {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns `None` if nothing is yet uplodaded, `Some(disk_consistent_lsn)` otherwise.
|
||||
pub fn remote_consistent_lsn_projected(&self) -> Option<Lsn> {
|
||||
match &mut *self.upload_queue.lock().unwrap() {
|
||||
UploadQueue::Uninitialized => None,
|
||||
@@ -441,11 +442,13 @@ impl RemoteTimelineClient {
|
||||
/// Returns true if this timeline was previously detached at this Lsn and the remote timeline
|
||||
/// client is currently initialized.
|
||||
pub(crate) fn is_previous_ancestor_lsn(&self, lsn: Lsn) -> bool {
|
||||
// technically this is a dirty read, but given how timeline detach ancestor is implemented
|
||||
// via tenant restart, the lineage has always been uploaded.
|
||||
self.upload_queue
|
||||
.lock()
|
||||
.unwrap()
|
||||
.initialized_mut()
|
||||
.map(|uq| uq.clean.0.lineage.is_previous_ancestor_lsn(lsn))
|
||||
.map(|uq| uq.latest_lineage.is_previous_ancestor_lsn(lsn))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
@@ -454,6 +457,7 @@ impl RemoteTimelineClient {
|
||||
current_remote_index_part
|
||||
.layer_metadata
|
||||
.values()
|
||||
// If we don't have the file size for the layer, don't account for it in the metric.
|
||||
.map(|ilmd| ilmd.file_size)
|
||||
.sum()
|
||||
} else {
|
||||
@@ -581,9 +585,9 @@ impl RemoteTimelineClient {
|
||||
|
||||
// As documented in the struct definition, it's ok for latest_metadata to be
|
||||
// ahead of what's _actually_ on the remote during index upload.
|
||||
upload_queue.dirty.metadata = metadata.clone();
|
||||
upload_queue.latest_metadata = metadata.clone();
|
||||
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
self.schedule_index_upload(upload_queue);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -602,9 +606,9 @@ impl RemoteTimelineClient {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
upload_queue.dirty.metadata.apply(update);
|
||||
upload_queue.latest_metadata.apply(update);
|
||||
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
self.schedule_index_upload(upload_queue);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -616,8 +620,8 @@ impl RemoteTimelineClient {
|
||||
) -> anyhow::Result<()> {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
upload_queue.dirty.last_aux_file_policy = last_aux_file_policy;
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
upload_queue.last_aux_file_policy = last_aux_file_policy;
|
||||
self.schedule_index_upload(upload_queue);
|
||||
Ok(())
|
||||
}
|
||||
///
|
||||
@@ -635,44 +639,30 @@ impl RemoteTimelineClient {
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
self.schedule_index_upload(upload_queue);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Launch an index-file upload operation in the background (internal function)
|
||||
fn schedule_index_upload(
|
||||
self: &Arc<Self>,
|
||||
upload_queue: &mut UploadQueueInitialized,
|
||||
) -> anyhow::Result<()> {
|
||||
let disk_consistent_lsn = upload_queue.dirty.metadata.disk_consistent_lsn();
|
||||
// fix up the duplicated field
|
||||
upload_queue.dirty.disk_consistent_lsn = disk_consistent_lsn;
|
||||
|
||||
// make sure it serializes before doing it in perform_upload_task so that it doesn't
|
||||
// look like a retryable error
|
||||
let void = std::io::sink();
|
||||
serde_json::to_writer(void, &upload_queue.dirty).context("serialize index_part.json")?;
|
||||
|
||||
let index_part = &upload_queue.dirty;
|
||||
fn schedule_index_upload(self: &Arc<Self>, upload_queue: &mut UploadQueueInitialized) {
|
||||
let disk_consistent_lsn = upload_queue.latest_metadata.disk_consistent_lsn();
|
||||
|
||||
info!(
|
||||
"scheduling metadata upload up to consistent LSN {disk_consistent_lsn} with {} files ({} changed)",
|
||||
index_part.layer_metadata.len(),
|
||||
upload_queue.latest_files.len(),
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled,
|
||||
);
|
||||
|
||||
let op = UploadOp::UploadMetadata {
|
||||
uploaded: Box::new(index_part.clone()),
|
||||
};
|
||||
let index_part = IndexPart::from(&*upload_queue);
|
||||
let op = UploadOp::UploadMetadata(Box::new(index_part), disk_consistent_lsn);
|
||||
self.metric_begin(&op);
|
||||
upload_queue.queued_operations.push_back(op);
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled = 0;
|
||||
|
||||
// Launch the task immediately, if possible
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn schedule_reparenting_and_wait(
|
||||
@@ -685,16 +675,16 @@ impl RemoteTimelineClient {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
let Some(prev) = upload_queue.dirty.metadata.ancestor_timeline() else {
|
||||
let Some(prev) = upload_queue.latest_metadata.ancestor_timeline() else {
|
||||
return Err(anyhow::anyhow!(
|
||||
"cannot reparent without a current ancestor"
|
||||
));
|
||||
};
|
||||
|
||||
upload_queue.dirty.metadata.reparent(new_parent);
|
||||
upload_queue.dirty.lineage.record_previous_ancestor(&prev);
|
||||
upload_queue.latest_metadata.reparent(new_parent);
|
||||
upload_queue.latest_lineage.record_previous_ancestor(&prev);
|
||||
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
self.schedule_index_upload(upload_queue);
|
||||
|
||||
self.schedule_barrier0(upload_queue)
|
||||
};
|
||||
@@ -715,17 +705,16 @@ impl RemoteTimelineClient {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
upload_queue.dirty.metadata.detach_from_ancestor(&adopted);
|
||||
upload_queue.dirty.lineage.record_detaching(&adopted);
|
||||
upload_queue.latest_metadata.detach_from_ancestor(&adopted);
|
||||
upload_queue.latest_lineage.record_detaching(&adopted);
|
||||
|
||||
for layer in layers {
|
||||
upload_queue
|
||||
.dirty
|
||||
.layer_metadata
|
||||
.latest_files
|
||||
.insert(layer.layer_desc().layer_name(), layer.metadata());
|
||||
}
|
||||
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
self.schedule_index_upload(upload_queue);
|
||||
|
||||
let barrier = self.schedule_barrier0(upload_queue);
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
@@ -757,8 +746,7 @@ impl RemoteTimelineClient {
|
||||
let metadata = layer.metadata();
|
||||
|
||||
upload_queue
|
||||
.dirty
|
||||
.layer_metadata
|
||||
.latest_files
|
||||
.insert(layer.layer_desc().layer_name(), metadata.clone());
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;
|
||||
|
||||
@@ -788,8 +776,8 @@ impl RemoteTimelineClient {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
let with_metadata = self
|
||||
.schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned())?;
|
||||
let with_metadata =
|
||||
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned());
|
||||
|
||||
self.schedule_deletion_of_unlinked0(upload_queue, with_metadata);
|
||||
|
||||
@@ -813,7 +801,7 @@ impl RemoteTimelineClient {
|
||||
|
||||
let names = gc_layers.iter().map(|x| x.layer_desc().layer_name());
|
||||
|
||||
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names)?;
|
||||
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);
|
||||
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
|
||||
@@ -826,7 +814,7 @@ impl RemoteTimelineClient {
|
||||
self: &Arc<Self>,
|
||||
upload_queue: &mut UploadQueueInitialized,
|
||||
names: I,
|
||||
) -> anyhow::Result<Vec<(LayerName, LayerFileMetadata)>>
|
||||
) -> Vec<(LayerName, LayerFileMetadata)>
|
||||
where
|
||||
I: IntoIterator<Item = LayerName>,
|
||||
{
|
||||
@@ -836,7 +824,7 @@ impl RemoteTimelineClient {
|
||||
let with_metadata: Vec<_> = names
|
||||
.into_iter()
|
||||
.filter_map(|name| {
|
||||
let meta = upload_queue.dirty.layer_metadata.remove(&name);
|
||||
let meta = upload_queue.latest_files.remove(&name);
|
||||
|
||||
if let Some(meta) = meta {
|
||||
upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;
|
||||
@@ -868,10 +856,10 @@ impl RemoteTimelineClient {
|
||||
// index_part update, because that needs to be uploaded before we can actually delete the
|
||||
// files.
|
||||
if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
self.schedule_index_upload(upload_queue);
|
||||
}
|
||||
|
||||
Ok(with_metadata)
|
||||
with_metadata
|
||||
}
|
||||
|
||||
/// Schedules deletion for layer files which have previously been unlinked from the
|
||||
@@ -962,7 +950,7 @@ impl RemoteTimelineClient {
|
||||
|
||||
let names = compacted_from.iter().map(|x| x.layer_desc().layer_name());
|
||||
|
||||
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names)?;
|
||||
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);
|
||||
self.launch_queued_tasks(upload_queue);
|
||||
|
||||
Ok(())
|
||||
@@ -1097,7 +1085,7 @@ impl RemoteTimelineClient {
|
||||
let deleted_at = Utc::now().naive_utc();
|
||||
stopped.deleted_at = SetDeletedFlagProgress::InProgress(deleted_at);
|
||||
|
||||
let mut index_part = stopped.upload_queue_for_deletion.dirty.clone();
|
||||
let mut index_part = IndexPart::from(&stopped.upload_queue_for_deletion);
|
||||
index_part.deleted_at = Some(deleted_at);
|
||||
index_part
|
||||
};
|
||||
@@ -1308,8 +1296,7 @@ impl RemoteTimelineClient {
|
||||
|
||||
stopped
|
||||
.upload_queue_for_deletion
|
||||
.dirty
|
||||
.layer_metadata
|
||||
.latest_files
|
||||
.drain()
|
||||
.map(|(file_name, meta)| {
|
||||
remote_layer_path(
|
||||
@@ -1446,7 +1433,7 @@ impl RemoteTimelineClient {
|
||||
// Can always be scheduled.
|
||||
true
|
||||
}
|
||||
UploadOp::UploadMetadata { .. } => {
|
||||
UploadOp::UploadMetadata(_, _) => {
|
||||
// These can only be performed after all the preceding operations
|
||||
// have finished.
|
||||
upload_queue.inprogress_tasks.is_empty()
|
||||
@@ -1488,7 +1475,7 @@ impl RemoteTimelineClient {
|
||||
UploadOp::UploadLayer(_, _) => {
|
||||
upload_queue.num_inprogress_layer_uploads += 1;
|
||||
}
|
||||
UploadOp::UploadMetadata { .. } => {
|
||||
UploadOp::UploadMetadata(_, _) => {
|
||||
upload_queue.num_inprogress_metadata_uploads += 1;
|
||||
}
|
||||
UploadOp::Delete(_) => {
|
||||
@@ -1597,13 +1584,22 @@ impl RemoteTimelineClient {
|
||||
)
|
||||
.await
|
||||
}
|
||||
UploadOp::UploadMetadata { ref uploaded } => {
|
||||
UploadOp::UploadMetadata(ref index_part, _lsn) => {
|
||||
let mention_having_future_layers = if cfg!(feature = "testing") {
|
||||
index_part
|
||||
.layer_metadata
|
||||
.keys()
|
||||
.any(|x| x.is_in_future(*_lsn))
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
let res = upload::upload_index_part(
|
||||
&self.storage_impl,
|
||||
&self.tenant_shard_id,
|
||||
&self.timeline_id,
|
||||
self.generation,
|
||||
uploaded,
|
||||
index_part,
|
||||
&self.cancel,
|
||||
)
|
||||
.measure_remote_op(
|
||||
@@ -1613,21 +1609,10 @@ impl RemoteTimelineClient {
|
||||
)
|
||||
.await;
|
||||
if res.is_ok() {
|
||||
self.update_remote_physical_size_gauge(Some(uploaded));
|
||||
let mention_having_future_layers = if cfg!(feature = "testing") {
|
||||
uploaded
|
||||
.layer_metadata
|
||||
.keys()
|
||||
.any(|x| x.is_in_future(uploaded.metadata.disk_consistent_lsn()))
|
||||
} else {
|
||||
false
|
||||
};
|
||||
self.update_remote_physical_size_gauge(Some(index_part));
|
||||
if mention_having_future_layers {
|
||||
// find rationale near crate::tenant::timeline::init::cleanup_future_layer
|
||||
tracing::info!(
|
||||
disk_consistent_lsn = %uploaded.metadata.disk_consistent_lsn(),
|
||||
"uploaded an index_part.json with future layers -- this is ok! if shutdown now, expect future layer cleanup"
|
||||
);
|
||||
tracing::info!(disk_consistent_lsn=%_lsn, "uploaded an index_part.json with future layers -- this is ok! if shutdown now, expect future layer cleanup");
|
||||
}
|
||||
}
|
||||
res
|
||||
@@ -1728,23 +1713,11 @@ impl RemoteTimelineClient {
|
||||
upload_queue.num_inprogress_layer_uploads -= 1;
|
||||
None
|
||||
}
|
||||
UploadOp::UploadMetadata { ref uploaded } => {
|
||||
UploadOp::UploadMetadata(_, lsn) => {
|
||||
upload_queue.num_inprogress_metadata_uploads -= 1;
|
||||
// XXX monotonicity check?
|
||||
|
||||
// the task id is reused as a monotonicity check for storing the "clean"
|
||||
// IndexPart.
|
||||
let last_updater = upload_queue.clean.1;
|
||||
let is_later = last_updater.is_some_and(|task_id| task_id < task.task_id);
|
||||
let monotone = is_later || last_updater.is_none();
|
||||
|
||||
assert!(monotone, "no two index uploads should be completing at the same time, prev={last_updater:?}, task.task_id={}", task.task_id);
|
||||
|
||||
// not taking ownership is wasteful
|
||||
upload_queue.clean.0.clone_from(uploaded);
|
||||
upload_queue.clean.1 = Some(task.task_id);
|
||||
|
||||
let lsn = upload_queue.clean.0.metadata.disk_consistent_lsn();
|
||||
|
||||
upload_queue.projected_remote_consistent_lsn = Some(lsn);
|
||||
if self.generation.is_none() {
|
||||
// Legacy mode: skip validating generation
|
||||
upload_queue.visible_remote_consistent_lsn.store(lsn);
|
||||
@@ -1798,7 +1771,7 @@ impl RemoteTimelineClient {
|
||||
RemoteOpKind::Upload,
|
||||
RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size),
|
||||
),
|
||||
UploadOp::UploadMetadata { .. } => (
|
||||
UploadOp::UploadMetadata(_, _) => (
|
||||
RemoteOpFileKind::Index,
|
||||
RemoteOpKind::Upload,
|
||||
DontTrackSize {
|
||||
@@ -1874,9 +1847,11 @@ impl RemoteTimelineClient {
|
||||
// Deletion is not really perf sensitive so there shouldnt be any problems with cloning a fraction of it.
|
||||
let upload_queue_for_deletion = UploadQueueInitialized {
|
||||
task_counter: 0,
|
||||
dirty: initialized.dirty.clone(),
|
||||
clean: initialized.clean.clone(),
|
||||
latest_files: initialized.latest_files.clone(),
|
||||
latest_files_changes_since_metadata_upload_scheduled: 0,
|
||||
latest_metadata: initialized.latest_metadata.clone(),
|
||||
latest_lineage: initialized.latest_lineage.clone(),
|
||||
projected_remote_consistent_lsn: None,
|
||||
visible_remote_consistent_lsn: initialized
|
||||
.visible_remote_consistent_lsn
|
||||
.clone(),
|
||||
@@ -1889,6 +1864,7 @@ impl RemoteTimelineClient {
|
||||
dangling_files: HashMap::default(),
|
||||
shutting_down: false,
|
||||
shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
|
||||
last_aux_file_policy: initialized.last_aux_file_policy,
|
||||
};
|
||||
|
||||
let upload_queue = std::mem::replace(
|
||||
|
||||
@@ -28,7 +28,6 @@ use crate::TEMP_FILE_SUFFIX;
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode, RemotePath};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::pausable_failpoint;
|
||||
|
||||
use super::index::{IndexPart, LayerFileMetadata};
|
||||
use super::{
|
||||
@@ -153,8 +152,6 @@ async fn download_object<'a>(
|
||||
|
||||
let download = storage.download(src_path, cancel).await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
let mut buf_writer =
|
||||
tokio::io::BufWriter::with_capacity(super::BUFFER_SIZE, destination_file);
|
||||
|
||||
@@ -202,8 +199,6 @@ async fn download_object<'a>(
|
||||
|
||||
let mut download = storage.download(src_path, cancel).await?;
|
||||
|
||||
pausable_failpoint!("before-downloading-layer-stream-pausable");
|
||||
|
||||
// TODO: use vectored write (writev) once supported by tokio-epoll-uring.
|
||||
// There's chunks_vectored() on the stream.
|
||||
let (bytes_amount, destination_file) = async {
|
||||
|
||||
@@ -11,6 +11,7 @@ use utils::id::TimelineId;
|
||||
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::upload_queue::UploadQueueInitialized;
|
||||
use crate::tenant::Generation;
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
|
||||
@@ -38,19 +39,12 @@ pub struct IndexPart {
|
||||
/// that latest version stores.
|
||||
pub layer_metadata: HashMap<LayerName, LayerFileMetadata>,
|
||||
|
||||
/// Because of the trouble of eyeballing the legacy "metadata" field, we copied the
|
||||
/// "disk_consistent_lsn" out. After version 7 this is no longer needed, but the name cannot be
|
||||
/// reused.
|
||||
pub(super) disk_consistent_lsn: Lsn,
|
||||
// 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata.
|
||||
// It's duplicated for convenience when reading the serialized structure, but is
|
||||
// private because internally we would read from metadata instead.
|
||||
disk_consistent_lsn: Lsn,
|
||||
|
||||
// TODO: rename as "metadata" next week, keep the alias = "metadata_bytes", bump version Adding
|
||||
// the "alias = metadata" was forgotten in #7693, so we have to use "rewrite = metadata_bytes"
|
||||
// for backwards compatibility.
|
||||
#[serde(
|
||||
rename = "metadata_bytes",
|
||||
alias = "metadata",
|
||||
with = "crate::tenant::metadata::modern_serde"
|
||||
)]
|
||||
#[serde(rename = "metadata_bytes")]
|
||||
pub metadata: TimelineMetadata,
|
||||
|
||||
#[serde(default)]
|
||||
@@ -79,33 +73,40 @@ impl IndexPart {
|
||||
/// - 4: timeline_layers is fully removed.
|
||||
/// - 5: lineage was added
|
||||
/// - 6: last_aux_file_policy is added.
|
||||
/// - 7: metadata_bytes is no longer written, but still read
|
||||
const LATEST_VERSION: usize = 7;
|
||||
const LATEST_VERSION: usize = 6;
|
||||
|
||||
// Versions we may see when reading from a bucket.
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7];
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6];
|
||||
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
pub(crate) fn empty(metadata: TimelineMetadata) -> Self {
|
||||
IndexPart {
|
||||
fn new(
|
||||
layers_and_metadata: &HashMap<LayerName, LayerFileMetadata>,
|
||||
disk_consistent_lsn: Lsn,
|
||||
metadata: TimelineMetadata,
|
||||
lineage: Lineage,
|
||||
last_aux_file_policy: Option<AuxFilePolicy>,
|
||||
) -> Self {
|
||||
let layer_metadata = layers_and_metadata.clone();
|
||||
|
||||
Self {
|
||||
version: Self::LATEST_VERSION,
|
||||
layer_metadata: Default::default(),
|
||||
disk_consistent_lsn: metadata.disk_consistent_lsn(),
|
||||
layer_metadata,
|
||||
disk_consistent_lsn,
|
||||
metadata,
|
||||
deleted_at: None,
|
||||
lineage: Default::default(),
|
||||
last_aux_file_policy: None,
|
||||
lineage,
|
||||
last_aux_file_policy,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn version(&self) -> usize {
|
||||
pub fn get_version(&self) -> usize {
|
||||
self.version
|
||||
}
|
||||
|
||||
/// If you want this under normal operations, read it from self.metadata:
|
||||
/// this method is just for the scrubber to use when validating an index.
|
||||
pub fn duplicated_disk_consistent_lsn(&self) -> Lsn {
|
||||
pub fn get_disk_consistent_lsn(&self) -> Lsn {
|
||||
self.disk_consistent_lsn
|
||||
}
|
||||
|
||||
@@ -119,7 +120,14 @@ impl IndexPart {
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn example() -> Self {
|
||||
Self::empty(TimelineMetadata::example())
|
||||
let example_metadata = TimelineMetadata::example();
|
||||
Self::new(
|
||||
&HashMap::new(),
|
||||
example_metadata.disk_consistent_lsn(),
|
||||
example_metadata,
|
||||
Default::default(),
|
||||
Some(AuxFilePolicy::V1),
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn last_aux_file_policy(&self) -> Option<AuxFilePolicy> {
|
||||
@@ -127,6 +135,22 @@ impl IndexPart {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&UploadQueueInitialized> for IndexPart {
|
||||
fn from(uq: &UploadQueueInitialized) -> Self {
|
||||
let disk_consistent_lsn = uq.latest_metadata.disk_consistent_lsn();
|
||||
let metadata = uq.latest_metadata.clone();
|
||||
let lineage = uq.latest_lineage.clone();
|
||||
|
||||
Self::new(
|
||||
&uq.latest_files,
|
||||
disk_consistent_lsn,
|
||||
metadata,
|
||||
lineage,
|
||||
uq.last_aux_file_policy,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Metadata gathered for each of the layer files.
|
||||
///
|
||||
/// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which
|
||||
@@ -212,18 +236,19 @@ impl Lineage {
|
||||
/// The queried lsn is most likely the basebackup lsn, and this answers question "is it allowed
|
||||
/// to start a read/write primary at this lsn".
|
||||
///
|
||||
/// Returns true if the Lsn was previously our branch point.
|
||||
/// Returns true if the Lsn was previously a branch point.
|
||||
pub(crate) fn is_previous_ancestor_lsn(&self, lsn: Lsn) -> bool {
|
||||
self.original_ancestor
|
||||
.is_some_and(|(_, ancestor_lsn, _)| ancestor_lsn == lsn)
|
||||
.as_ref()
|
||||
.is_some_and(|(_, ancestor_lsn, _)| lsn == *ancestor_lsn)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::str::FromStr;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn v1_indexpart_is_parsed() {
|
||||
@@ -342,7 +367,8 @@ mod tests {
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
|
||||
deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
deleted_at: Some(chrono::NaiveDateTime::parse_from_str(
|
||||
"2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap()),
|
||||
lineage: Lineage::default(),
|
||||
last_aux_file_policy: None,
|
||||
};
|
||||
@@ -518,7 +544,8 @@ mod tests {
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::from_bytes(&[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]).unwrap(),
|
||||
deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
deleted_at: Some(chrono::NaiveDateTime::parse_from_str(
|
||||
"2023-07-31T09:00:00.123000000", "%Y-%m-%dT%H:%M:%S.%f").unwrap()),
|
||||
lineage: Lineage {
|
||||
reparenting_history_truncated: false,
|
||||
reparenting_history: vec![TimelineId::from_str("e1bfd8c633d713d279e6fcd2bcc15b6d").unwrap()],
|
||||
@@ -531,60 +558,6 @@ mod tests {
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v7_indexpart_is_parsed() {
|
||||
let example = r#"{
|
||||
"version": 7,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata": {
|
||||
"disk_consistent_lsn": "0/16960E8",
|
||||
"prev_record_lsn": "0/1696070",
|
||||
"ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e",
|
||||
"ancestor_lsn": "0/0",
|
||||
"latest_gc_cutoff_lsn": "0/1696070",
|
||||
"initdb_lsn": "0/1696070",
|
||||
"pg_version": 14
|
||||
},
|
||||
"deleted_at": "2023-07-31T09:00:00.123"
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 7,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::new(
|
||||
Lsn::from_str("0/16960E8").unwrap(),
|
||||
Some(Lsn::from_str("0/1696070").unwrap()),
|
||||
Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()),
|
||||
Lsn::INVALID,
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
14,
|
||||
).with_recalculated_checksum().unwrap(),
|
||||
deleted_at: Some(parse_naive_datetime("2023-07-31T09:00:00.123000000")),
|
||||
lineage: Default::default(),
|
||||
last_aux_file_policy: Default::default(),
|
||||
};
|
||||
|
||||
let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
fn parse_naive_datetime(s: &str) -> NaiveDateTime {
|
||||
chrono::NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S.%f").unwrap()
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
//! Helper functions to upload files to remote storage with a RemoteStorage
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use bytes::Bytes;
|
||||
use camino::Utf8Path;
|
||||
use fail::fail_point;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
@@ -12,10 +11,10 @@ use tokio::io::AsyncSeekExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::{backoff, pausable_failpoint};
|
||||
|
||||
use super::index::IndexPart;
|
||||
use super::Generation;
|
||||
use crate::tenant::remote_timeline_client::{
|
||||
remote_index_path, remote_initdb_archive_path, remote_initdb_preserved_archive_path,
|
||||
index::IndexPart, remote_index_path, remote_initdb_archive_path,
|
||||
remote_initdb_preserved_archive_path,
|
||||
};
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
@@ -28,7 +27,7 @@ pub(crate) async fn upload_index_part<'a>(
|
||||
tenant_shard_id: &TenantShardId,
|
||||
timeline_id: &TimelineId,
|
||||
generation: Generation,
|
||||
index_part: &IndexPart,
|
||||
index_part: &'a IndexPart,
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
tracing::trace!("uploading new index part");
|
||||
@@ -38,16 +37,16 @@ pub(crate) async fn upload_index_part<'a>(
|
||||
});
|
||||
pausable_failpoint!("before-upload-index-pausable");
|
||||
|
||||
// FIXME: this error comes too late
|
||||
let serialized = index_part.to_s3_bytes()?;
|
||||
let serialized = Bytes::from(serialized);
|
||||
|
||||
let index_part_size = serialized.len();
|
||||
let index_part_bytes = index_part
|
||||
.to_s3_bytes()
|
||||
.context("serialize index part file into bytes")?;
|
||||
let index_part_size = index_part_bytes.len();
|
||||
let index_part_bytes = bytes::Bytes::from(index_part_bytes);
|
||||
|
||||
let remote_path = remote_index_path(tenant_shard_id, timeline_id, generation);
|
||||
storage
|
||||
.upload_storage_object(
|
||||
futures::stream::once(futures::future::ready(Ok(serialized))),
|
||||
futures::stream::once(futures::future::ready(Ok(index_part_bytes))),
|
||||
index_part_size,
|
||||
&remote_path,
|
||||
cancel,
|
||||
|
||||
@@ -187,7 +187,6 @@ impl SecondaryTenant {
|
||||
};
|
||||
|
||||
let now = SystemTime::now();
|
||||
tracing::info!("Evicting secondary layer");
|
||||
|
||||
let this = self.clone();
|
||||
|
||||
|
||||
@@ -513,7 +513,7 @@ impl<'a> TenantDownloader<'a> {
|
||||
// cover our access to local storage.
|
||||
let Ok(_guard) = self.secondary_state.gate.enter() else {
|
||||
// Shutting down
|
||||
return Err(UpdateError::Cancelled);
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
|
||||
@@ -846,7 +846,7 @@ impl<'a> TenantDownloader<'a> {
|
||||
for layer in timeline.layers {
|
||||
if self.secondary_state.cancel.is_cancelled() {
|
||||
tracing::debug!("Cancelled -- dropping out of layer loop");
|
||||
return Err(UpdateError::Cancelled);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Existing on-disk layers: just update their access time.
|
||||
@@ -909,7 +909,6 @@ impl<'a> TenantDownloader<'a> {
|
||||
strftime(&layer.access_time),
|
||||
strftime(evicted_at)
|
||||
);
|
||||
self.skip_layer(layer);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -964,15 +963,6 @@ impl<'a> TenantDownloader<'a> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Call this during timeline download if a layer will _not_ be downloaded, to update progress statistics
|
||||
fn skip_layer(&self, layer: HeatMapLayer) {
|
||||
let mut progress = self.secondary_state.progress.lock().unwrap();
|
||||
progress.layers_total = progress.layers_total.saturating_sub(1);
|
||||
progress.bytes_total = progress
|
||||
.bytes_total
|
||||
.saturating_sub(layer.metadata.file_size);
|
||||
}
|
||||
|
||||
async fn download_layer(
|
||||
&self,
|
||||
tenant_shard_id: &TenantShardId,
|
||||
@@ -1000,7 +990,7 @@ impl<'a> TenantDownloader<'a> {
|
||||
layer.name,
|
||||
layer.metadata.file_size
|
||||
);
|
||||
let downloaded_bytes = download_layer_file(
|
||||
let downloaded_bytes = match download_layer_file(
|
||||
self.conf,
|
||||
self.remote_storage,
|
||||
*tenant_shard_id,
|
||||
@@ -1011,9 +1001,8 @@ impl<'a> TenantDownloader<'a> {
|
||||
&self.secondary_state.cancel,
|
||||
ctx,
|
||||
)
|
||||
.await;
|
||||
|
||||
let downloaded_bytes = match downloaded_bytes {
|
||||
.await
|
||||
{
|
||||
Ok(bytes) => bytes,
|
||||
Err(DownloadError::NotFound) => {
|
||||
// A heatmap might be out of date and refer to a layer that doesn't exist any more.
|
||||
@@ -1023,7 +1012,13 @@ impl<'a> TenantDownloader<'a> {
|
||||
"Skipped downloading missing layer {}, raced with compaction/gc?",
|
||||
layer.name
|
||||
);
|
||||
self.skip_layer(layer);
|
||||
|
||||
// If the layer is 404, adjust the progress statistics to reflect that we will not download it.
|
||||
let mut progress = self.secondary_state.progress.lock().unwrap();
|
||||
progress.layers_total = progress.layers_total.saturating_sub(1);
|
||||
progress.bytes_total = progress
|
||||
.bytes_total
|
||||
.saturating_sub(layer.metadata.file_size);
|
||||
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
@@ -334,11 +334,8 @@ where
|
||||
|
||||
let tenant_shard_id = job.get_tenant_shard_id();
|
||||
let barrier = if let Some(barrier) = self.get_running(tenant_shard_id) {
|
||||
tracing::info!(
|
||||
tenant_id=%tenant_shard_id.tenant_id,
|
||||
shard_id=%tenant_shard_id.shard_slug(),
|
||||
"Command already running, waiting for it"
|
||||
);
|
||||
tracing::info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
|
||||
"Command already running, waiting for it");
|
||||
barrier
|
||||
} else {
|
||||
let running = self.spawn_now(job);
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use tokio::sync::oneshot::error::RecvError;
|
||||
use tokio::sync::Semaphore;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -10,7 +11,7 @@ use tokio_util::sync::CancellationToken;
|
||||
use crate::context::RequestContext;
|
||||
use crate::pgdatadir_mapping::CalculateLogicalSizeError;
|
||||
|
||||
use super::{GcError, LogicalSizeCalculationCause, Tenant};
|
||||
use super::{LogicalSizeCalculationCause, Tenant};
|
||||
use crate::tenant::Timeline;
|
||||
use utils::id::TimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
@@ -42,44 +43,6 @@ pub struct SegmentMeta {
|
||||
pub kind: LsnKind,
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub(crate) enum CalculateSyntheticSizeError {
|
||||
/// Something went wrong internally to the calculation of logical size at a particular branch point
|
||||
#[error("Failed to calculated logical size on timeline {timeline_id} at {lsn}: {error}")]
|
||||
LogicalSize {
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
error: CalculateLogicalSizeError,
|
||||
},
|
||||
|
||||
/// Something went wrong internally when calculating GC parameters at start of size calculation
|
||||
#[error(transparent)]
|
||||
GcInfo(GcError),
|
||||
|
||||
/// Totally unexpected errors, like panics joining a task
|
||||
#[error(transparent)]
|
||||
Fatal(anyhow::Error),
|
||||
|
||||
/// The LSN we are trying to calculate a size at no longer exists at the point we query it
|
||||
#[error("Could not find size at {lsn} in timeline {timeline_id}")]
|
||||
LsnNotFound { timeline_id: TimelineId, lsn: Lsn },
|
||||
|
||||
/// Tenant shut down while calculating size
|
||||
#[error("Cancelled")]
|
||||
Cancelled,
|
||||
}
|
||||
|
||||
impl From<GcError> for CalculateSyntheticSizeError {
|
||||
fn from(value: GcError) -> Self {
|
||||
match value {
|
||||
GcError::TenantCancelled | GcError::TimelineCancelled => {
|
||||
CalculateSyntheticSizeError::Cancelled
|
||||
}
|
||||
other => CalculateSyntheticSizeError::GcInfo(other),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SegmentMeta {
|
||||
fn size_needed(&self) -> bool {
|
||||
match self.kind {
|
||||
@@ -153,9 +116,12 @@ pub(super) async fn gather_inputs(
|
||||
cause: LogicalSizeCalculationCause,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<ModelInputs, CalculateSyntheticSizeError> {
|
||||
) -> anyhow::Result<ModelInputs> {
|
||||
// refresh is needed to update gc related pitr_cutoff and horizon_cutoff
|
||||
tenant.refresh_gc_info(cancel, ctx).await?;
|
||||
tenant
|
||||
.refresh_gc_info(cancel, ctx)
|
||||
.await
|
||||
.context("Failed to refresh gc_info before gathering inputs")?;
|
||||
|
||||
// Collect information about all the timelines
|
||||
let mut timelines = tenant.list_timelines();
|
||||
@@ -361,12 +327,6 @@ pub(super) async fn gather_inputs(
|
||||
)
|
||||
.await?;
|
||||
|
||||
if tenant.cancel.is_cancelled() {
|
||||
// If we're shutting down, return an error rather than a sparse result that might include some
|
||||
// timelines from before we started shutting down
|
||||
return Err(CalculateSyntheticSizeError::Cancelled);
|
||||
}
|
||||
|
||||
Ok(ModelInputs {
|
||||
segments,
|
||||
timeline_inputs,
|
||||
@@ -385,7 +345,7 @@ async fn fill_logical_sizes(
|
||||
logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
|
||||
cause: LogicalSizeCalculationCause,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), CalculateSyntheticSizeError> {
|
||||
) -> anyhow::Result<()> {
|
||||
let timeline_hash: HashMap<TimelineId, Arc<Timeline>> = HashMap::from_iter(
|
||||
timelines
|
||||
.iter()
|
||||
@@ -427,7 +387,7 @@ async fn fill_logical_sizes(
|
||||
}
|
||||
|
||||
// Perform the size lookups
|
||||
let mut have_any_error = None;
|
||||
let mut have_any_error = false;
|
||||
while let Some(res) = joinset.join_next().await {
|
||||
// each of these come with Result<anyhow::Result<_>, JoinError>
|
||||
// because of spawn + spawn_blocking
|
||||
@@ -438,36 +398,21 @@ async fn fill_logical_sizes(
|
||||
Err(join_error) => {
|
||||
// cannot really do anything, as this panic is likely a bug
|
||||
error!("task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}");
|
||||
|
||||
have_any_error = Some(CalculateSyntheticSizeError::Fatal(
|
||||
anyhow::anyhow!(join_error)
|
||||
.context("task that calls spawn_ondemand_logical_size_calculation"),
|
||||
));
|
||||
have_any_error = true;
|
||||
}
|
||||
Ok(Err(recv_result_error)) => {
|
||||
// cannot really do anything, as this panic is likely a bug
|
||||
error!("failed to receive logical size query result: {recv_result_error:#}");
|
||||
have_any_error = Some(CalculateSyntheticSizeError::Fatal(
|
||||
anyhow::anyhow!(recv_result_error)
|
||||
.context("Receiving logical size query result"),
|
||||
));
|
||||
have_any_error = true;
|
||||
}
|
||||
Ok(Ok(TimelineAtLsnSizeResult(timeline, lsn, Err(error)))) => {
|
||||
if matches!(error, CalculateLogicalSizeError::Cancelled) {
|
||||
// Skip this: it's okay if one timeline among many is shutting down while we
|
||||
// calculate inputs for the overall tenant.
|
||||
continue;
|
||||
} else {
|
||||
if !matches!(error, CalculateLogicalSizeError::Cancelled) {
|
||||
warn!(
|
||||
timeline_id=%timeline.timeline_id,
|
||||
"failed to calculate logical size at {lsn}: {error:#}"
|
||||
);
|
||||
have_any_error = Some(CalculateSyntheticSizeError::LogicalSize {
|
||||
timeline_id: timeline.timeline_id,
|
||||
lsn,
|
||||
error,
|
||||
});
|
||||
}
|
||||
have_any_error = true;
|
||||
}
|
||||
Ok(Ok(TimelineAtLsnSizeResult(timeline, lsn, Ok(size)))) => {
|
||||
debug!(timeline_id=%timeline.timeline_id, %lsn, size, "size calculated");
|
||||
@@ -481,10 +426,10 @@ async fn fill_logical_sizes(
|
||||
// prune any keys not needed anymore; we record every used key and added key.
|
||||
logical_size_cache.retain(|key, _| sizes_needed.contains_key(key));
|
||||
|
||||
if let Some(error) = have_any_error {
|
||||
if have_any_error {
|
||||
// we cannot complete this round, because we are missing data.
|
||||
// we have however cached all we were able to request calculation on.
|
||||
return Err(error);
|
||||
anyhow::bail!("failed to calculate some logical_sizes");
|
||||
}
|
||||
|
||||
// Insert the looked up sizes to the Segments
|
||||
@@ -499,29 +444,32 @@ async fn fill_logical_sizes(
|
||||
if let Some(Some(size)) = sizes_needed.get(&(timeline_id, lsn)) {
|
||||
seg.segment.size = Some(*size);
|
||||
} else {
|
||||
return Err(CalculateSyntheticSizeError::LsnNotFound { timeline_id, lsn });
|
||||
bail!("could not find size at {} in timeline {}", lsn, timeline_id);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl ModelInputs {
|
||||
pub fn calculate_model(&self) -> tenant_size_model::StorageModel {
|
||||
pub fn calculate_model(&self) -> anyhow::Result<tenant_size_model::StorageModel> {
|
||||
// Convert SegmentMetas into plain Segments
|
||||
StorageModel {
|
||||
let storage = StorageModel {
|
||||
segments: self
|
||||
.segments
|
||||
.iter()
|
||||
.map(|seg| seg.segment.clone())
|
||||
.collect(),
|
||||
}
|
||||
};
|
||||
|
||||
Ok(storage)
|
||||
}
|
||||
|
||||
// calculate total project size
|
||||
pub fn calculate(&self) -> u64 {
|
||||
let storage = self.calculate_model();
|
||||
pub fn calculate(&self) -> anyhow::Result<u64> {
|
||||
let storage = self.calculate_model()?;
|
||||
let sizes = storage.calculate();
|
||||
sizes.total_size
|
||||
|
||||
Ok(sizes.total_size)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -708,7 +656,7 @@ fn verify_size_for_multiple_branches() {
|
||||
"#;
|
||||
let inputs: ModelInputs = serde_json::from_str(doc).unwrap();
|
||||
|
||||
assert_eq!(inputs.calculate(), 37_851_408);
|
||||
assert_eq!(inputs.calculate().unwrap(), 37_851_408);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -763,7 +711,7 @@ fn verify_size_for_one_branch() {
|
||||
|
||||
let model: ModelInputs = serde_json::from_str(doc).unwrap();
|
||||
|
||||
let res = model.calculate_model().calculate();
|
||||
let res = model.calculate_model().unwrap().calculate();
|
||||
|
||||
println!("calculated synthetic size: {}", res.total_size);
|
||||
println!("result: {:?}", serde_json::to_string(&res.segments));
|
||||
|
||||
@@ -318,7 +318,7 @@ pub(crate) struct LayerFringe {
|
||||
#[derive(Debug)]
|
||||
struct LayerKeyspace {
|
||||
layer: ReadableLayer,
|
||||
target_keyspace: KeySpaceRandomAccum,
|
||||
target_keyspace: KeySpace,
|
||||
}
|
||||
|
||||
impl LayerFringe {
|
||||
@@ -336,19 +336,14 @@ impl LayerFringe {
|
||||
};
|
||||
|
||||
let removed = self.layers.remove_entry(&read_desc.layer_id);
|
||||
|
||||
match removed {
|
||||
Some((
|
||||
_,
|
||||
LayerKeyspace {
|
||||
layer,
|
||||
mut target_keyspace,
|
||||
target_keyspace,
|
||||
},
|
||||
)) => Some((
|
||||
layer,
|
||||
target_keyspace.consume_keyspace(),
|
||||
read_desc.lsn_range,
|
||||
)),
|
||||
)) => Some((layer, target_keyspace, read_desc.lsn_range)),
|
||||
None => unreachable!("fringe internals are always consistent"),
|
||||
}
|
||||
}
|
||||
@@ -363,18 +358,16 @@ impl LayerFringe {
|
||||
let entry = self.layers.entry(layer_id.clone());
|
||||
match entry {
|
||||
Entry::Occupied(mut entry) => {
|
||||
entry.get_mut().target_keyspace.add_keyspace(keyspace);
|
||||
entry.get_mut().target_keyspace.merge(&keyspace);
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
self.planned_reads_by_lsn.push(ReadDesc {
|
||||
lsn_range,
|
||||
layer_id: layer_id.clone(),
|
||||
});
|
||||
let mut accum = KeySpaceRandomAccum::new();
|
||||
accum.add_keyspace(keyspace);
|
||||
entry.insert(LayerKeyspace {
|
||||
layer,
|
||||
target_keyspace: accum,
|
||||
target_keyspace: keyspace,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -219,6 +219,7 @@ pub struct DeltaLayerInner {
|
||||
// values copied from summary
|
||||
index_start_blk: u32,
|
||||
index_root_blk: u32,
|
||||
lsn_range: Range<Lsn>,
|
||||
|
||||
file: VirtualFile,
|
||||
file_id: FileId,
|
||||
@@ -477,23 +478,6 @@ impl DeltaLayerWriterInner {
|
||||
key_end: Key,
|
||||
timeline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<ResidentLayer> {
|
||||
let temp_path = self.path.clone();
|
||||
let result = self.finish0(key_end, timeline, ctx).await;
|
||||
if result.is_err() {
|
||||
tracing::info!(%temp_path, "cleaning up temporary file after error during writing");
|
||||
if let Err(e) = std::fs::remove_file(&temp_path) {
|
||||
tracing::warn!(error=%e, %temp_path, "error cleaning up temporary layer file after error during writing");
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
async fn finish0(
|
||||
self,
|
||||
key_end: Key,
|
||||
timeline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<ResidentLayer> {
|
||||
let index_start_blk =
|
||||
((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32;
|
||||
@@ -667,11 +651,19 @@ impl DeltaLayerWriter {
|
||||
timeline: &Arc<Timeline>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<ResidentLayer> {
|
||||
self.inner
|
||||
.take()
|
||||
.unwrap()
|
||||
.finish(key_end, timeline, ctx)
|
||||
.await
|
||||
let inner = self.inner.take().unwrap();
|
||||
let temp_path = inner.path.clone();
|
||||
let result = inner.finish(key_end, timeline, ctx).await;
|
||||
// The delta layer files can sometimes be really large. Clean them up.
|
||||
if result.is_err() {
|
||||
tracing::warn!(
|
||||
"Cleaning up temporary delta file {temp_path} after error during writing"
|
||||
);
|
||||
if let Err(e) = std::fs::remove_file(&temp_path) {
|
||||
tracing::warn!("Error cleaning up temporary delta layer file {temp_path}: {e:?}")
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
@@ -784,6 +776,7 @@ impl DeltaLayerInner {
|
||||
file_id,
|
||||
index_start_blk: actual_summary.index_start_blk,
|
||||
index_root_blk: actual_summary.index_root_blk,
|
||||
lsn_range: actual_summary.lsn_range,
|
||||
max_vectored_read_bytes,
|
||||
}))
|
||||
}
|
||||
@@ -909,7 +902,7 @@ impl DeltaLayerInner {
|
||||
|
||||
let reads = Self::plan_reads(
|
||||
&keyspace,
|
||||
lsn_range.clone(),
|
||||
lsn_range,
|
||||
data_end_offset,
|
||||
index_reader,
|
||||
planner,
|
||||
@@ -922,50 +915,11 @@ impl DeltaLayerInner {
|
||||
self.do_reads_and_update_state(reads, reconstruct_state, ctx)
|
||||
.await;
|
||||
|
||||
reconstruct_state.on_lsn_advanced(&keyspace, lsn_range.start);
|
||||
reconstruct_state.on_lsn_advanced(&keyspace, self.lsn_range.start);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load all key-values in the delta layer, should be replaced by an iterator-based interface in the future.
|
||||
#[cfg(test)]
|
||||
pub(super) async fn load_key_values(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Vec<(Key, Lsn, Value)>> {
|
||||
let block_reader = FileBlockReader::new(&self.file, self.file_id);
|
||||
let index_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
|
||||
self.index_start_blk,
|
||||
self.index_root_blk,
|
||||
block_reader,
|
||||
);
|
||||
let mut result = Vec::new();
|
||||
let mut stream =
|
||||
Box::pin(self.stream_index_forwards(&index_reader, &[0; DELTA_KEY_SIZE], ctx));
|
||||
let block_reader = FileBlockReader::new(&self.file, self.file_id);
|
||||
let cursor = block_reader.block_cursor();
|
||||
let mut buf = Vec::new();
|
||||
while let Some(item) = stream.next().await {
|
||||
let (key, lsn, pos) = item?;
|
||||
// TODO: dedup code with get_reconstruct_value
|
||||
// TODO: ctx handling and sharding
|
||||
cursor
|
||||
.read_blob_into_buf(pos.pos(), &mut buf, ctx)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to read blob from virtual file {}", self.file.path)
|
||||
})?;
|
||||
let val = Value::des(&buf).with_context(|| {
|
||||
format!(
|
||||
"Failed to deserialize file blob from virtual file {}",
|
||||
self.file.path
|
||||
)
|
||||
})?;
|
||||
result.push((key, lsn, val));
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn plan_reads<Reader>(
|
||||
keyspace: &KeySpace,
|
||||
lsn_range: Range<Lsn>,
|
||||
|
||||
@@ -485,34 +485,6 @@ impl ImageLayerInner {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load all key-values in the delta layer, should be replaced by an iterator-based interface in the future.
|
||||
#[cfg(test)]
|
||||
pub(super) async fn load_key_values(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Vec<(Key, Lsn, Value)>> {
|
||||
let block_reader = FileBlockReader::new(&self.file, self.file_id);
|
||||
let tree_reader =
|
||||
DiskBtreeReader::new(self.index_start_blk, self.index_root_blk, &block_reader);
|
||||
let mut result = Vec::new();
|
||||
let mut stream = Box::pin(tree_reader.get_stream_from(&[0; KEY_SIZE], ctx));
|
||||
let block_reader = FileBlockReader::new(&self.file, self.file_id);
|
||||
let cursor = block_reader.block_cursor();
|
||||
while let Some(item) = stream.next().await {
|
||||
// TODO: dedup code with get_reconstruct_value
|
||||
let (raw_key, offset) = item?;
|
||||
let key = Key::from_slice(&raw_key[..KEY_SIZE]);
|
||||
// TODO: ctx handling and sharding
|
||||
let blob = cursor
|
||||
.read_blob(offset, ctx)
|
||||
.await
|
||||
.with_context(|| format!("failed to read value from offset {}", offset))?;
|
||||
let value = Bytes::from(blob);
|
||||
result.push((key, self.lsn, Value::Image(value)));
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Traverse the layer's index to build read operations on the overlap of the input keyspace
|
||||
/// and the keys in this layer.
|
||||
///
|
||||
@@ -945,57 +917,26 @@ impl Drop for ImageLayerWriter {
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::time::Duration;
|
||||
|
||||
use bytes::Bytes;
|
||||
use pageserver_api::{
|
||||
key::Key,
|
||||
shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize},
|
||||
};
|
||||
use utils::{
|
||||
generation::Generation,
|
||||
id::{TenantId, TimelineId},
|
||||
lsn::Lsn,
|
||||
};
|
||||
use utils::{id::TimelineId, lsn::Lsn};
|
||||
|
||||
use crate::{
|
||||
tenant::{config::TenantConf, harness::TenantHarness},
|
||||
DEFAULT_PG_VERSION,
|
||||
};
|
||||
use crate::{tenant::harness::TenantHarness, DEFAULT_PG_VERSION};
|
||||
|
||||
use super::ImageLayerWriter;
|
||||
|
||||
#[tokio::test]
|
||||
async fn image_layer_rewrite() {
|
||||
let tenant_conf = TenantConf {
|
||||
gc_period: Duration::ZERO,
|
||||
compaction_period: Duration::ZERO,
|
||||
..TenantConf::default()
|
||||
};
|
||||
let tenant_id = TenantId::generate();
|
||||
let mut gen = Generation::new(0xdead0001);
|
||||
let mut get_next_gen = || {
|
||||
let ret = gen;
|
||||
gen = gen.next();
|
||||
ret
|
||||
};
|
||||
let harness = TenantHarness::create("test_image_layer_rewrite").unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
|
||||
// The LSN at which we will create an image layer to filter
|
||||
let lsn = Lsn(0xdeadbeef0000);
|
||||
|
||||
let timeline_id = TimelineId::generate();
|
||||
|
||||
//
|
||||
// Create an unsharded parent with a layer.
|
||||
//
|
||||
|
||||
let harness = TenantHarness::create_custom(
|
||||
"test_image_layer_rewrite--parent",
|
||||
tenant_conf.clone(),
|
||||
tenant_id,
|
||||
ShardIdentity::unsharded(),
|
||||
get_next_gen(),
|
||||
)
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
let timeline = tenant
|
||||
.create_test_timeline(timeline_id, lsn, DEFAULT_PG_VERSION, &ctx)
|
||||
.await
|
||||
@@ -1030,47 +971,9 @@ mod test {
|
||||
};
|
||||
let original_size = resident.metadata().file_size;
|
||||
|
||||
//
|
||||
// Create child shards and do the rewrite, exercising filter().
|
||||
// TODO: abstraction in TenantHarness for splits.
|
||||
//
|
||||
|
||||
// Filter for various shards: this exercises cases like values at start of key range, end of key
|
||||
// range, middle of key range.
|
||||
let shard_count = ShardCount::new(4);
|
||||
for shard_number in 0..shard_count.count() {
|
||||
//
|
||||
// mimic the shard split
|
||||
//
|
||||
let shard_identity = ShardIdentity::new(
|
||||
ShardNumber(shard_number),
|
||||
shard_count,
|
||||
ShardStripeSize(0x8000),
|
||||
)
|
||||
.unwrap();
|
||||
let harness = TenantHarness::create_custom(
|
||||
Box::leak(Box::new(format!(
|
||||
"test_image_layer_rewrite--child{}",
|
||||
shard_identity.shard_slug()
|
||||
))),
|
||||
tenant_conf.clone(),
|
||||
tenant_id,
|
||||
shard_identity,
|
||||
// NB: in reality, the shards would each fork off their own gen number sequence from the parent.
|
||||
// But here, all we care about is that the gen number is unique.
|
||||
get_next_gen(),
|
||||
)
|
||||
.unwrap();
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
let timeline = tenant
|
||||
.create_test_timeline(timeline_id, lsn, DEFAULT_PG_VERSION, &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
//
|
||||
// use filter() and make assertions
|
||||
//
|
||||
|
||||
for shard_number in 0..4 {
|
||||
let mut filtered_writer = ImageLayerWriter::new(
|
||||
harness.conf,
|
||||
timeline_id,
|
||||
@@ -1082,6 +985,15 @@ mod test {
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// TenantHarness gave us an unsharded tenant, but we'll use a sharded ShardIdentity
|
||||
// to exercise filter()
|
||||
let shard_identity = ShardIdentity::new(
|
||||
ShardNumber(shard_number),
|
||||
ShardCount::new(4),
|
||||
ShardStripeSize(0x8000),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let wrote_keys = resident
|
||||
.filter(&shard_identity, &mut filtered_writer, &ctx)
|
||||
.await
|
||||
|
||||
@@ -52,7 +52,7 @@ pub struct InMemoryLayer {
|
||||
|
||||
/// Frozen layers have an exclusive end LSN.
|
||||
/// Writes are only allowed when this is `None`.
|
||||
pub(crate) end_lsn: OnceLock<Lsn>,
|
||||
end_lsn: OnceLock<Lsn>,
|
||||
|
||||
/// Used for traversal path. Cached representation of the in-memory layer before frozen.
|
||||
local_path_str: Arc<str>,
|
||||
|
||||
@@ -277,10 +277,9 @@ impl Layer {
|
||||
|
||||
let downloaded = resident.expect("just initialized");
|
||||
|
||||
// We never want to overwrite an existing file, so we use `RENAME_NOREPLACE`.
|
||||
// TODO: this leaves the temp file in place if the rename fails, risking us running
|
||||
// out of space. Should we clean it up here or does the calling context deal with this?
|
||||
utils::fs_ext::rename_noreplace(temp_path.as_std_path(), owner.local_path().as_std_path())
|
||||
// if the rename works, the path is as expected
|
||||
// TODO: sync system call
|
||||
std::fs::rename(temp_path, owner.local_path())
|
||||
.with_context(|| format!("rename temporary file as correct path for {owner}"))?;
|
||||
|
||||
Ok(ResidentLayer { downloaded, owner })
|
||||
@@ -367,10 +366,7 @@ impl Layer {
|
||||
.0
|
||||
.get_or_maybe_download(true, Some(ctx))
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
DownloadError::DownloadCancelled => GetVectoredError::Cancelled,
|
||||
other => GetVectoredError::Other(anyhow::anyhow!(other)),
|
||||
})?;
|
||||
.map_err(|err| GetVectoredError::Other(anyhow::anyhow!(err)))?;
|
||||
|
||||
self.0
|
||||
.access_stats
|
||||
@@ -388,23 +384,6 @@ impl Layer {
|
||||
})
|
||||
}
|
||||
|
||||
/// Get all key/values in the layer. Should be replaced with an iterator-based API in the future.
|
||||
#[cfg(test)]
|
||||
pub(crate) async fn load_key_values(
|
||||
&self,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Vec<(Key, Lsn, crate::repository::Value)>> {
|
||||
let layer = self
|
||||
.0
|
||||
.get_or_maybe_download(true, Some(ctx))
|
||||
.await
|
||||
.map_err(|err| match err {
|
||||
DownloadError::DownloadCancelled => GetVectoredError::Cancelled,
|
||||
other => GetVectoredError::Other(anyhow::anyhow!(other)),
|
||||
})?;
|
||||
layer.load_key_values(&self.0, ctx).await
|
||||
}
|
||||
|
||||
/// Download the layer if evicted.
|
||||
///
|
||||
/// Will not error when the layer is already downloaded.
|
||||
@@ -1179,11 +1158,6 @@ impl LayerInner {
|
||||
let consecutive_failures =
|
||||
1 + self.consecutive_failures.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
if timeline.cancel.is_cancelled() {
|
||||
// If we're shutting down, drop out before logging the error
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
tracing::error!(consecutive_failures, "layer file download failed: {e:#}");
|
||||
|
||||
let backoff = utils::backoff::exponential_backoff_duration_seconds(
|
||||
@@ -1774,20 +1748,6 @@ impl DownloadedLayer {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
async fn load_key_values(
|
||||
&self,
|
||||
owner: &Arc<LayerInner>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<Vec<(Key, Lsn, crate::repository::Value)>> {
|
||||
use LayerKind::*;
|
||||
|
||||
match self.get(owner, ctx).await? {
|
||||
Delta(d) => d.load_key_values(ctx).await,
|
||||
Image(i) => i.load_key_values(ctx).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn dump(&self, owner: &Arc<LayerInner>, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
use LayerKind::*;
|
||||
match self.get(owner, ctx).await? {
|
||||
|
||||
@@ -815,7 +815,6 @@ async fn eviction_cancellation_on_drop() {
|
||||
/// A test case to remind you the cost of these structures. You can bump the size limit
|
||||
/// below if it is really necessary to add more fields to the structures.
|
||||
#[test]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
fn layer_size() {
|
||||
assert_eq!(std::mem::size_of::<LayerAccessStats>(), 2040);
|
||||
assert_eq!(std::mem::size_of::<PersistentLayerDesc>(), 104);
|
||||
|
||||
@@ -380,28 +380,21 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
|
||||
let res = tenant
|
||||
.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &cancel, &ctx)
|
||||
.await;
|
||||
match res {
|
||||
Ok(_) => {
|
||||
error_run_count = 0;
|
||||
period
|
||||
}
|
||||
Err(crate::tenant::GcError::TenantCancelled) => {
|
||||
return;
|
||||
}
|
||||
Err(e) => {
|
||||
let wait_duration = backoff::exponential_backoff_duration_seconds(
|
||||
error_run_count + 1,
|
||||
1.0,
|
||||
MAX_BACKOFF_SECS,
|
||||
);
|
||||
error_run_count += 1;
|
||||
let wait_duration = Duration::from_secs_f64(wait_duration);
|
||||
|
||||
error!(
|
||||
if let Err(e) = res {
|
||||
let wait_duration = backoff::exponential_backoff_duration_seconds(
|
||||
error_run_count + 1,
|
||||
1.0,
|
||||
MAX_BACKOFF_SECS,
|
||||
);
|
||||
error_run_count += 1;
|
||||
let wait_duration = Duration::from_secs_f64(wait_duration);
|
||||
error!(
|
||||
"Gc failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}",
|
||||
);
|
||||
wait_duration
|
||||
}
|
||||
wait_duration
|
||||
} else {
|
||||
error_run_count = 0;
|
||||
period
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,90 +0,0 @@
|
||||
use std::{collections::BTreeSet, ops::Range};
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use super::Timeline;
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
pub(crate) struct RangeAnalysis {
|
||||
start: String,
|
||||
end: String,
|
||||
has_image: bool,
|
||||
num_of_deltas_above_image: usize,
|
||||
total_num_of_deltas: usize,
|
||||
}
|
||||
|
||||
impl Timeline {
|
||||
pub(crate) async fn perf_info(&self) -> Vec<RangeAnalysis> {
|
||||
// First, collect all split points of the layers.
|
||||
let mut split_points = BTreeSet::new();
|
||||
let mut delta_ranges = Vec::new();
|
||||
let mut image_ranges = Vec::new();
|
||||
|
||||
let all_layer_files = {
|
||||
let guard = self.layers.read().await;
|
||||
guard.all_persistent_layers()
|
||||
};
|
||||
let lsn = self.get_last_record_lsn();
|
||||
|
||||
for key in all_layer_files {
|
||||
split_points.insert(key.key_range.start);
|
||||
split_points.insert(key.key_range.end);
|
||||
if key.is_delta {
|
||||
delta_ranges.push((key.key_range.clone(), key.lsn_range.clone()));
|
||||
} else {
|
||||
image_ranges.push((key.key_range.clone(), key.lsn_range.start));
|
||||
}
|
||||
}
|
||||
|
||||
// For each split range, compute the estimated read amplification.
|
||||
let split_points = split_points.into_iter().collect::<Vec<_>>();
|
||||
|
||||
let mut result = Vec::new();
|
||||
|
||||
for i in 0..(split_points.len() - 1) {
|
||||
let start = split_points[i];
|
||||
let end = split_points[i + 1];
|
||||
// Find the latest image layer that contains the information.
|
||||
let mut maybe_image_layers = image_ranges
|
||||
.iter()
|
||||
// We insert split points for all image layers, and therefore a `contains` check for the start point should be enough.
|
||||
.filter(|(key_range, img_lsn)| key_range.contains(&start) && img_lsn <= &lsn)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
maybe_image_layers.sort_by(|a, b| a.1.cmp(&b.1));
|
||||
let image_layer = maybe_image_layers.last().cloned();
|
||||
let lsn_filter_start = image_layer
|
||||
.as_ref()
|
||||
.map(|(_, lsn)| *lsn)
|
||||
.unwrap_or(Lsn::INVALID);
|
||||
|
||||
fn overlaps_with(lsn_range_a: &Range<Lsn>, lsn_range_b: &Range<Lsn>) -> bool {
|
||||
!(lsn_range_a.end <= lsn_range_b.start || lsn_range_a.start >= lsn_range_b.end)
|
||||
}
|
||||
|
||||
let maybe_delta_layers = delta_ranges
|
||||
.iter()
|
||||
.filter(|(key_range, lsn_range)| {
|
||||
key_range.contains(&start) && overlaps_with(&(lsn_filter_start..lsn), lsn_range)
|
||||
})
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let pitr_delta_layers = delta_ranges
|
||||
.iter()
|
||||
.filter(|(key_range, _)| key_range.contains(&start))
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
result.push(RangeAnalysis {
|
||||
start: start.to_string(),
|
||||
end: end.to_string(),
|
||||
has_image: image_layer.is_some(),
|
||||
num_of_deltas_above_image: maybe_delta_layers.len(),
|
||||
total_num_of_deltas: pitr_delta_layers.len(),
|
||||
});
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
@@ -133,7 +133,8 @@ impl Timeline {
|
||||
},
|
||||
&image_ctx,
|
||||
)
|
||||
.await?;
|
||||
.await
|
||||
.map_err(anyhow::Error::from)?;
|
||||
|
||||
self.upload_new_image_layers(image_layers)?;
|
||||
partitioning.parts.len()
|
||||
@@ -421,6 +422,48 @@ impl Timeline {
|
||||
return Ok(CompactLevel0Phase1Result::default());
|
||||
}
|
||||
|
||||
// This failpoint is used together with `test_duplicate_layers` integration test.
|
||||
// It returns the compaction result exactly the same layers as input to compaction.
|
||||
// We want to ensure that this will not cause any problem when updating the layer map
|
||||
// after the compaction is finished.
|
||||
//
|
||||
// Currently, there are two rare edge cases that will cause duplicated layers being
|
||||
// inserted.
|
||||
// 1. The compaction job is inturrupted / did not finish successfully. Assume we have file 1, 2, 3, 4, which
|
||||
// is compacted to 5, but the page server is shut down, next time we start page server we will get a layer
|
||||
// map containing 1, 2, 3, 4, and 5, whereas 5 has the same content as 4. If we trigger L0 compation at this
|
||||
// point again, it is likely that we will get a file 6 which has the same content and the key range as 5,
|
||||
// and this causes an overwrite. This is acceptable because the content is the same, and we should do a
|
||||
// layer replace instead of the normal remove / upload process.
|
||||
// 2. The input workload pattern creates exactly n files that are sorted, non-overlapping and is of target file
|
||||
// size length. Compaction will likely create the same set of n files afterwards.
|
||||
//
|
||||
// This failpoint is a superset of both of the cases.
|
||||
if cfg!(feature = "testing") {
|
||||
let active = (|| {
|
||||
::fail::fail_point!("compact-level0-phase1-return-same", |_| true);
|
||||
false
|
||||
})();
|
||||
|
||||
if active {
|
||||
let mut new_layers = Vec::with_capacity(level0_deltas.len());
|
||||
for delta in &level0_deltas {
|
||||
// we are just faking these layers as being produced again for this failpoint
|
||||
new_layers.push(
|
||||
delta
|
||||
.download_and_keep_resident()
|
||||
.await
|
||||
.context("download layer for failpoint")?,
|
||||
);
|
||||
}
|
||||
tracing::info!("compact-level0-phase1-return-same"); // so that we can check if we hit the failpoint
|
||||
return Ok(CompactLevel0Phase1Result {
|
||||
new_layers,
|
||||
deltas_to_compact: level0_deltas,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Gather the files to compact in this iteration.
|
||||
//
|
||||
// Start with the oldest Level 0 delta file, and collect any other
|
||||
@@ -952,178 +995,6 @@ impl Timeline {
|
||||
adaptor.flush_updates().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// An experimental compaction building block that combines compaction with garbage collection.
|
||||
///
|
||||
/// The current implementation picks all delta + image layers that are below or intersecting with
|
||||
/// the GC horizon without considering retain_lsns. Then, it does a full compaction over all these delta
|
||||
/// layers and image layers, which generates image layers on the gc horizon, drop deltas below gc horizon,
|
||||
/// and create delta layers with all deltas >= gc horizon.
|
||||
#[cfg(test)]
|
||||
pub(crate) async fn compact_with_gc(
|
||||
self: &Arc<Self>,
|
||||
_cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<(), CompactionError> {
|
||||
use crate::tenant::storage_layer::ValueReconstructState;
|
||||
// Step 0: pick all delta layers + image layers below/intersect with the GC horizon.
|
||||
// The layer selection has the following properties:
|
||||
// 1. If a layer is in the selection, all layers below it are in the selection.
|
||||
// 2. Inferred from (1), for each key in the layer selection, the value can be reconstructed only with the layers in the layer selection.
|
||||
let (layer_selection, gc_cutoff) = {
|
||||
let guard = self.layers.read().await;
|
||||
let layers = guard.layer_map();
|
||||
let gc_info = self.gc_info.read().unwrap();
|
||||
let gc_cutoff = Lsn::min(gc_info.cutoffs.horizon, gc_info.cutoffs.pitr);
|
||||
let mut selected_layers = Vec::new();
|
||||
// TODO: consider retain_lsns
|
||||
drop(gc_info);
|
||||
for desc in layers.iter_historic_layers() {
|
||||
if desc.get_lsn_range().start <= gc_cutoff {
|
||||
selected_layers.push(guard.get_from_desc(&desc));
|
||||
}
|
||||
}
|
||||
(selected_layers, gc_cutoff)
|
||||
};
|
||||
// Step 1: (In the future) construct a k-merge iterator over all layers. For now, simply collect all keys + LSNs.
|
||||
let mut all_key_values = Vec::new();
|
||||
for layer in &layer_selection {
|
||||
all_key_values.extend(layer.load_key_values(ctx).await?);
|
||||
}
|
||||
// Key small to large, LSN low to high, if the same LSN has both image and delta due to the merge of delta layers and
|
||||
// image layers, make image appear later than delta.
|
||||
struct ValueWrapper<'a>(&'a crate::repository::Value);
|
||||
impl Ord for ValueWrapper<'_> {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
use crate::repository::Value;
|
||||
use std::cmp::Ordering;
|
||||
match (self.0, other.0) {
|
||||
(Value::Image(_), Value::WalRecord(_)) => Ordering::Greater,
|
||||
(Value::WalRecord(_), Value::Image(_)) => Ordering::Less,
|
||||
_ => Ordering::Equal,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl PartialOrd for ValueWrapper<'_> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
impl PartialEq for ValueWrapper<'_> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.cmp(other) == std::cmp::Ordering::Equal
|
||||
}
|
||||
}
|
||||
impl Eq for ValueWrapper<'_> {}
|
||||
all_key_values.sort_by(|(k1, l1, v1), (k2, l2, v2)| {
|
||||
(k1, l1, ValueWrapper(v1)).cmp(&(k2, l2, ValueWrapper(v2)))
|
||||
});
|
||||
let max_lsn = all_key_values
|
||||
.iter()
|
||||
.map(|(_, lsn, _)| lsn)
|
||||
.max()
|
||||
.copied()
|
||||
.unwrap()
|
||||
+ 1;
|
||||
// Step 2: Produce images+deltas. TODO: ensure newly-produced delta does not overlap with other deltas.
|
||||
// Data of the same key.
|
||||
let mut accumulated_values = Vec::new();
|
||||
let mut last_key = all_key_values.first().unwrap().0; // TODO: assert all_key_values not empty
|
||||
|
||||
/// Take a list of images and deltas, produce an image at the GC horizon, and a list of deltas above the GC horizon.
|
||||
async fn flush_accumulated_states(
|
||||
tline: &Arc<Timeline>,
|
||||
key: Key,
|
||||
accumulated_values: &[&(Key, Lsn, crate::repository::Value)],
|
||||
horizon: Lsn,
|
||||
) -> anyhow::Result<(Vec<(Key, Lsn, crate::repository::Value)>, bytes::Bytes)> {
|
||||
let mut base_image = None;
|
||||
let mut keys_above_horizon = Vec::new();
|
||||
let mut delta_above_base_image = Vec::new();
|
||||
// We have a list of deltas/images. We want to create image layers while collect garbages.
|
||||
for (key, lsn, val) in accumulated_values.iter().rev() {
|
||||
if *lsn > horizon {
|
||||
keys_above_horizon.push((*key, *lsn, val.clone())); // TODO: ensure one LSN corresponds to either delta or image instead of both
|
||||
} else if *lsn <= horizon {
|
||||
match val {
|
||||
crate::repository::Value::Image(image) => {
|
||||
if lsn <= &horizon {
|
||||
base_image = Some((*lsn, image.clone()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
crate::repository::Value::WalRecord(wal) => {
|
||||
delta_above_base_image.push((*lsn, wal.clone()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
delta_above_base_image.reverse();
|
||||
keys_above_horizon.reverse();
|
||||
let state = ValueReconstructState {
|
||||
img: base_image,
|
||||
records: delta_above_base_image,
|
||||
};
|
||||
let img = tline.reconstruct_value(key, horizon, state).await?;
|
||||
Ok((keys_above_horizon, img))
|
||||
}
|
||||
|
||||
let mut delta_layer_writer = DeltaLayerWriter::new(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
all_key_values.first().unwrap().0,
|
||||
gc_cutoff..max_lsn, // TODO: off by one?
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
let mut image_layer_writer = ImageLayerWriter::new(
|
||||
self.conf,
|
||||
self.timeline_id,
|
||||
self.tenant_shard_id,
|
||||
&(all_key_values.first().unwrap().0..all_key_values.last().unwrap().0.next()),
|
||||
gc_cutoff,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
for item @ (key, _, _) in &all_key_values {
|
||||
if &last_key == key {
|
||||
accumulated_values.push(item);
|
||||
} else {
|
||||
let (deltas, image) =
|
||||
flush_accumulated_states(self, last_key, &accumulated_values, gc_cutoff)
|
||||
.await?;
|
||||
image_layer_writer.put_image(last_key, image, ctx).await?;
|
||||
for (key, lsn, val) in deltas {
|
||||
delta_layer_writer.put_value(key, lsn, val, ctx).await?;
|
||||
}
|
||||
accumulated_values.clear();
|
||||
accumulated_values.push(item);
|
||||
last_key = *key;
|
||||
}
|
||||
}
|
||||
let (deltas, image) =
|
||||
flush_accumulated_states(self, last_key, &accumulated_values, gc_cutoff).await?;
|
||||
image_layer_writer.put_image(last_key, image, ctx).await?;
|
||||
for (key, lsn, val) in deltas {
|
||||
delta_layer_writer.put_value(key, lsn, val, ctx).await?;
|
||||
}
|
||||
accumulated_values.clear();
|
||||
// TODO: split layers
|
||||
let delta_layer = delta_layer_writer.finish(last_key, self, ctx).await?;
|
||||
let image_layer = image_layer_writer.finish(self, ctx).await?;
|
||||
// Step 3: Place back to the layer map.
|
||||
{
|
||||
let mut guard = self.layers.write().await;
|
||||
guard.finish_gc_compaction(
|
||||
&layer_selection,
|
||||
&[delta_layer.clone(), image_layer.clone()],
|
||||
&self.metrics,
|
||||
)
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct TimelineAdaptor {
|
||||
|
||||
@@ -11,6 +11,7 @@ use utils::{crashsafe, fs_ext, id::TimelineId, pausable_failpoint};
|
||||
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
deletion_queue::DeletionQueueClient,
|
||||
task_mgr::{self, TaskKind},
|
||||
tenant::{
|
||||
metadata::TimelineMetadata,
|
||||
@@ -262,6 +263,7 @@ impl DeleteTimelineFlow {
|
||||
timeline_id: TimelineId,
|
||||
local_metadata: &TimelineMetadata,
|
||||
remote_client: RemoteTimelineClient,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
) -> anyhow::Result<()> {
|
||||
// Note: here we even skip populating layer map. Timeline is essentially uninitialized.
|
||||
// RemoteTimelineClient is the only functioning part.
|
||||
@@ -272,6 +274,7 @@ impl DeleteTimelineFlow {
|
||||
None, // Ancestor is not needed for deletion.
|
||||
TimelineResources {
|
||||
remote_client,
|
||||
deletion_queue_client,
|
||||
timeline_get_throttle: tenant.timeline_get_throttle.clone(),
|
||||
},
|
||||
// Important. We dont pass ancestor above because it can be missing.
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user