mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-17 13:10:38 +00:00
Compare commits
1 Commits
release-72
...
erik/wal-w
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e7e26834ef |
@@ -7,10 +7,6 @@ inputs:
|
||||
type: boolean
|
||||
required: false
|
||||
default: false
|
||||
aws_oicd_role_arn:
|
||||
description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
outputs:
|
||||
base-url:
|
||||
@@ -83,14 +79,6 @@ runs:
|
||||
ALLURE_VERSION: 2.27.0
|
||||
ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777
|
||||
|
||||
- name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
|
||||
if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws_oicd_role_arn }}
|
||||
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
|
||||
|
||||
# Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
|
||||
- name: Acquire lock
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
12
.github/actions/allure-report-store/action.yml
vendored
12
.github/actions/allure-report-store/action.yml
vendored
@@ -8,10 +8,6 @@ inputs:
|
||||
unique-key:
|
||||
description: 'string to distinguish different results in the same run'
|
||||
required: true
|
||||
aws_oicd_role_arn:
|
||||
description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
@@ -35,14 +31,6 @@ runs:
|
||||
env:
|
||||
REPORT_DIR: ${{ inputs.report-dir }}
|
||||
|
||||
- name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
|
||||
if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws_oicd_role_arn }}
|
||||
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
|
||||
|
||||
- name: Upload test results
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
|
||||
11
.github/actions/run-python-test-set/action.yml
vendored
11
.github/actions/run-python-test-set/action.yml
vendored
@@ -48,10 +48,6 @@ inputs:
|
||||
description: 'benchmark durations JSON'
|
||||
required: false
|
||||
default: '{}'
|
||||
aws_oicd_role_arn:
|
||||
description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
@@ -226,13 +222,6 @@ runs:
|
||||
# (for example if we didn't run the test for non build-and-test workflow)
|
||||
skip-if-does-not-exist: true
|
||||
|
||||
- name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
|
||||
if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws_oicd_role_arn }}
|
||||
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
|
||||
- name: Upload test results
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-store
|
||||
|
||||
@@ -19,8 +19,8 @@ on:
|
||||
description: 'debug or release'
|
||||
required: true
|
||||
type: string
|
||||
test-cfg:
|
||||
description: 'a json object of postgres versions and lfc states to run regression tests on'
|
||||
pg-versions:
|
||||
description: 'a json array of postgres versions to run regression tests on'
|
||||
required: true
|
||||
type: string
|
||||
|
||||
@@ -276,14 +276,14 @@ jobs:
|
||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix: ${{ fromJSON(format('{{"include":{0}}}', inputs.test-cfg)) }}
|
||||
matrix:
|
||||
pg_version: ${{ fromJson(inputs.pg-versions) }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Pytest regression tests
|
||||
continue-on-error: ${{ matrix.lfc_state == 'with-lfc' }}
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
timeout-minutes: 60
|
||||
with:
|
||||
@@ -300,7 +300,6 @@ jobs:
|
||||
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
||||
BUILD_TAG: ${{ inputs.build-tag }}
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||
|
||||
79
.github/workflows/_create-release-pr.yml
vendored
79
.github/workflows/_create-release-pr.yml
vendored
@@ -1,79 +0,0 @@
|
||||
name: Create Release PR
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
component-name:
|
||||
description: 'Component name'
|
||||
required: true
|
||||
type: string
|
||||
release-branch:
|
||||
description: 'Release branch'
|
||||
required: true
|
||||
type: string
|
||||
secrets:
|
||||
ci-access-token:
|
||||
description: 'CI access token'
|
||||
required: true
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euo pipefail {0}
|
||||
|
||||
jobs:
|
||||
create-storage-release-branch:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
contents: write # for `git push`
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
|
||||
- name: Set variables
|
||||
id: vars
|
||||
env:
|
||||
COMPONENT_NAME: ${{ inputs.component-name }}
|
||||
RELEASE_BRANCH: ${{ inputs.release-branch }}
|
||||
run: |
|
||||
today=$(date +'%Y-%m-%d')
|
||||
echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT}
|
||||
echo "rc-branch=rc/${RELEASE_BRANCH}/${today}" | tee -a ${GITHUB_OUTPUT}
|
||||
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
- name: Create RC branch
|
||||
env:
|
||||
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
|
||||
TITLE: ${{ steps.vars.outputs.title }}
|
||||
run: |
|
||||
git checkout -b "${RC_BRANCH}"
|
||||
|
||||
# create an empty commit to distinguish workflow runs
|
||||
# from other possible releases from the same commit
|
||||
git commit --allow-empty -m "${TITLE}"
|
||||
|
||||
git push origin "${RC_BRANCH}"
|
||||
|
||||
- name: Create a PR into ${{ inputs.release-branch }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ci-access-token }}
|
||||
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
|
||||
RELEASE_BRANCH: ${{ inputs.release-branch }}
|
||||
TITLE: ${{ steps.vars.outputs.title }}
|
||||
run: |
|
||||
cat << EOF > body.md
|
||||
## ${TITLE}
|
||||
|
||||
**Please merge this Pull Request using 'Create a merge commit' button**
|
||||
EOF
|
||||
|
||||
gh pr create --title "${TITLE}" \
|
||||
--body-file "body.md" \
|
||||
--head "${RC_BRANCH}" \
|
||||
--base "${RELEASE_BRANCH}"
|
||||
52
.github/workflows/benchmarking.yml
vendored
52
.github/workflows/benchmarking.yml
vendored
@@ -122,7 +122,6 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
# Set --sparse-ordering option of pytest-order plugin
|
||||
# to ensure tests are running in order of appears in the file.
|
||||
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
||||
@@ -134,7 +133,6 @@ jobs:
|
||||
--ignore test_runner/performance/test_perf_pgvector_queries.py
|
||||
--ignore test_runner/performance/test_logical_replication.py
|
||||
--ignore test_runner/performance/test_physical_replication.py
|
||||
--ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -151,14 +149,12 @@ jobs:
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic perf testing: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
@@ -214,7 +210,6 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -231,7 +226,6 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -243,13 +237,11 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
# Post both success and failure to the Slack channel
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule }}
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06T9AMNDQQ" # on-call-compute-staging-stream
|
||||
@@ -452,7 +444,6 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -467,7 +458,6 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -482,7 +472,6 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -499,14 +488,12 @@ jobs:
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic perf testing on ${{ matrix.platform }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
@@ -558,12 +545,12 @@ jobs:
|
||||
arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
|
||||
|
||||
cd /home/nonroot
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.2-1.pgdg110+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.6-1.pgdg110+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.6-1.pgdg110+1_${arch}.deb"
|
||||
dpkg -x libpq5_17.2-1.pgdg110+1_${arch}.deb pg
|
||||
dpkg -x postgresql-16_16.6-1.pgdg110+1_${arch}.deb pg
|
||||
dpkg -x postgresql-client-16_16.6-1.pgdg110+1_${arch}.deb pg
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.0-1.pgdg110+1_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb"
|
||||
wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110+2_${arch}.deb"
|
||||
dpkg -x libpq5_17.0-1.pgdg110+1_${arch}.deb pg
|
||||
dpkg -x postgresql-16_16.4-1.pgdg110+2_${arch}.deb pg
|
||||
dpkg -x postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb pg
|
||||
|
||||
mkdir -p /tmp/neon/pg_install/v16/bin
|
||||
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
|
||||
@@ -611,7 +598,6 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -626,7 +612,6 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -636,14 +621,12 @@ jobs:
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic perf testing on ${{ env.PLATFORM }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
@@ -739,7 +722,6 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -752,14 +734,12 @@ jobs:
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic OLAP perf testing on ${{ matrix.platform }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
@@ -856,7 +836,6 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -867,14 +846,12 @@ jobs:
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
@@ -957,7 +934,6 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -967,14 +943,12 @@ jobs:
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: |
|
||||
Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
|
||||
59
.github/workflows/build-build-tools-image.yml
vendored
59
.github/workflows/build-build-tools-image.yml
vendored
@@ -2,13 +2,18 @@ name: Build build-tools image
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
image-tag:
|
||||
description: "build-tools image tag"
|
||||
required: true
|
||||
type: string
|
||||
outputs:
|
||||
image-tag:
|
||||
description: "build-tools tag"
|
||||
value: ${{ jobs.check-image.outputs.tag }}
|
||||
value: ${{ inputs.image-tag }}
|
||||
image:
|
||||
description: "build-tools image"
|
||||
value: neondatabase/build-tools:${{ jobs.check-image.outputs.tag }}
|
||||
value: neondatabase/build-tools:${{ inputs.image-tag }}
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -30,36 +35,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
check-image:
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
|
||||
found: ${{ steps.check-image.outputs.found }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Get build-tools image tag for the current commit
|
||||
id: get-build-tools-tag
|
||||
env:
|
||||
IMAGE_TAG: |
|
||||
${{ hashFiles('build-tools.Dockerfile',
|
||||
'.github/workflows/build-build-tools-image.yml') }}
|
||||
run: |
|
||||
echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
- name: Check if such tag found in the registry
|
||||
id: check-image
|
||||
env:
|
||||
IMAGE_TAG: ${{ steps.get-build-tools-tag.outputs.image-tag }}
|
||||
run: |
|
||||
if docker manifest inspect neondatabase/build-tools:${IMAGE_TAG}; then
|
||||
found=true
|
||||
else
|
||||
found=false
|
||||
fi
|
||||
|
||||
echo "found=${found}" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
uses: ./.github/workflows/check-build-tools-image.yml
|
||||
|
||||
build-image:
|
||||
needs: [ check-image ]
|
||||
@@ -72,7 +48,20 @@ jobs:
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
|
||||
env:
|
||||
IMAGE_TAG: ${{ inputs.image-tag }}
|
||||
|
||||
steps:
|
||||
- name: Check `input.tag` is correct
|
||||
env:
|
||||
INPUTS_IMAGE_TAG: ${{ inputs.image-tag }}
|
||||
CHECK_IMAGE_TAG : ${{ needs.check-image.outputs.image-tag }}
|
||||
run: |
|
||||
if [ "${INPUTS_IMAGE_TAG}" != "${CHECK_IMAGE_TAG}" ]; then
|
||||
echo "'inputs.image-tag' (${INPUTS_IMAGE_TAG}) does not match the tag of the latest build-tools image 'inputs.image-tag' (${CHECK_IMAGE_TAG})"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
|
||||
@@ -103,10 +92,10 @@ jobs:
|
||||
cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.debian-version }}-${{ matrix.arch }}
|
||||
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0}-{1},mode=max', matrix.debian-version, matrix.arch) || '' }}
|
||||
tags: |
|
||||
neondatabase/build-tools:${{ needs.check-image.outputs.tag }}-${{ matrix.debian-version }}-${{ matrix.arch }}
|
||||
neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.debian-version }}-${{ matrix.arch }}
|
||||
|
||||
merge-images:
|
||||
needs: [ check-image, build-image ]
|
||||
needs: [ build-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
@@ -118,7 +107,7 @@ jobs:
|
||||
- name: Create multi-arch image
|
||||
env:
|
||||
DEFAULT_DEBIAN_VERSION: bullseye
|
||||
IMAGE_TAG: ${{ needs.check-image.outputs.tag }}
|
||||
IMAGE_TAG: ${{ inputs.image-tag }}
|
||||
run: |
|
||||
for debian_version in bullseye bookworm; do
|
||||
tags=("-t" "neondatabase/build-tools:${IMAGE_TAG}-${debian_version}")
|
||||
|
||||
17
.github/workflows/build_and_test.yml
vendored
17
.github/workflows/build_and_test.yml
vendored
@@ -77,9 +77,15 @@ jobs:
|
||||
shell: bash
|
||||
id: build-tag
|
||||
|
||||
build-build-tools-image:
|
||||
check-build-tools-image:
|
||||
needs: [ check-permissions ]
|
||||
uses: ./.github/workflows/check-build-tools-image.yml
|
||||
|
||||
build-build-tools-image:
|
||||
needs: [ check-build-tools-image ]
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
with:
|
||||
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
|
||||
secrets: inherit
|
||||
|
||||
check-codestyle-python:
|
||||
@@ -253,14 +259,7 @@ jobs:
|
||||
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds
|
||||
# run without LFC on v17 release only
|
||||
test-cfg: |
|
||||
${{ matrix.build-type == 'release' && '[{"pg_version":"v14", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v15", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v16", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "with-lfc"}]'
|
||||
|| '[{"pg_version":"v17", "lfc_state": "without-lfc"}]' }}
|
||||
pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16", "v17"]' || '["v17"]' }}
|
||||
secrets: inherit
|
||||
|
||||
# Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
|
||||
|
||||
51
.github/workflows/check-build-tools-image.yml
vendored
Normal file
51
.github/workflows/check-build-tools-image.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
name: Check build-tools image
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
outputs:
|
||||
image-tag:
|
||||
description: "build-tools image tag"
|
||||
value: ${{ jobs.check-image.outputs.tag }}
|
||||
found:
|
||||
description: "Whether the image is found in the registry"
|
||||
value: ${{ jobs.check-image.outputs.found }}
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euo pipefail {0}
|
||||
|
||||
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
check-image:
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
|
||||
found: ${{ steps.check-image.outputs.found }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Get build-tools image tag for the current commit
|
||||
id: get-build-tools-tag
|
||||
env:
|
||||
IMAGE_TAG: |
|
||||
${{ hashFiles('build-tools.Dockerfile',
|
||||
'.github/workflows/check-build-tools-image.yml',
|
||||
'.github/workflows/build-build-tools-image.yml') }}
|
||||
run: |
|
||||
echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT
|
||||
|
||||
- name: Check if such tag found in the registry
|
||||
id: check-image
|
||||
env:
|
||||
IMAGE_TAG: ${{ steps.get-build-tools-tag.outputs.image-tag }}
|
||||
run: |
|
||||
if docker manifest inspect neondatabase/build-tools:${IMAGE_TAG}; then
|
||||
found=true
|
||||
else
|
||||
found=false
|
||||
fi
|
||||
|
||||
echo "found=${found}" | tee -a $GITHUB_OUTPUT
|
||||
277
.github/workflows/ingest_benchmark.yml
vendored
277
.github/workflows/ingest_benchmark.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: benchmarking ingest
|
||||
name: Benchmarking
|
||||
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
@@ -74,16 +74,18 @@ jobs:
|
||||
compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Initialize Neon project
|
||||
- name: Initialize Neon project and retrieve current backpressure seconds
|
||||
if: ${{ matrix.target_project == 'new_empty_project' }}
|
||||
env:
|
||||
BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
|
||||
NEW_PROJECT_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }}
|
||||
NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }}
|
||||
run: |
|
||||
echo "Initializing Neon project with project_id: ${NEW_PROJECT_ID}"
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
|
||||
echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV
|
||||
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
|
||||
BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
|
||||
echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
|
||||
echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
|
||||
|
||||
- name: Create Neon Branch for large tenant
|
||||
if: ${{ matrix.target_project == 'large_existing_project' }}
|
||||
@@ -93,55 +95,266 @@ jobs:
|
||||
project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Initialize Neon project
|
||||
- name: Initialize Neon project and retrieve current backpressure seconds
|
||||
if: ${{ matrix.target_project == 'large_existing_project' }}
|
||||
env:
|
||||
BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
|
||||
NEW_PROJECT_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
|
||||
NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }}
|
||||
run: |
|
||||
echo "Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}"
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
# Extract the part before the database name
|
||||
base_connstr="${BENCHMARK_INGEST_TARGET_CONNSTR%/*}"
|
||||
base_connstr="${NEW_PROJECT_CONNSTR%/*}"
|
||||
# Extract the query parameters (if any) after the database name
|
||||
query_params="${BENCHMARK_INGEST_TARGET_CONNSTR#*\?}"
|
||||
query_params="${NEW_PROJECT_CONNSTR#*\?}"
|
||||
# Reconstruct the new connection string
|
||||
if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then
|
||||
if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
|
||||
new_connstr="${base_connstr}/neondb?${query_params}"
|
||||
else
|
||||
new_connstr="${base_connstr}/neondb"
|
||||
fi
|
||||
${PSQL} "${new_connstr}" -c "drop database ludicrous;"
|
||||
${PSQL} "${new_connstr}" -c "CREATE DATABASE ludicrous;"
|
||||
if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then
|
||||
BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous?${query_params}"
|
||||
if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then
|
||||
NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous?${query_params}"
|
||||
else
|
||||
BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous"
|
||||
NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous"
|
||||
fi
|
||||
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
|
||||
echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV
|
||||
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
|
||||
BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
|
||||
echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV
|
||||
echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV
|
||||
|
||||
|
||||
- name: Create pgcopydb filter file
|
||||
run: |
|
||||
cat << EOF > /tmp/pgcopydb_filter.txt
|
||||
[include-only-table]
|
||||
public.events
|
||||
public.emails
|
||||
public.email_transmissions
|
||||
public.payments
|
||||
public.editions
|
||||
public.edition_modules
|
||||
public.sp_content
|
||||
public.email_broadcasts
|
||||
public.user_collections
|
||||
public.devices
|
||||
public.user_accounts
|
||||
public.lessons
|
||||
public.lesson_users
|
||||
public.payment_methods
|
||||
public.orders
|
||||
public.course_emails
|
||||
public.modules
|
||||
public.users
|
||||
public.module_users
|
||||
public.courses
|
||||
public.payment_gateway_keys
|
||||
public.accounts
|
||||
public.roles
|
||||
public.payment_gateways
|
||||
public.management
|
||||
public.event_names
|
||||
EOF
|
||||
|
||||
- name: Invoke pgcopydb
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: remote
|
||||
test_selection: performance/test_perf_ingest_using_pgcopydb.py
|
||||
run_in_parallel: false
|
||||
extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
|
||||
pg_version: v16
|
||||
save_perf_report: true
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
- name: Invoke pgcopydb
|
||||
env:
|
||||
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
|
||||
TARGET_PROJECT_TYPE: ${{ matrix.target_project }}
|
||||
# we report PLATFORM in zenbenchmark NeonBenchmarker perf database and want to distinguish between new project and large tenant
|
||||
PLATFORM: "${{ matrix.target_project }}-us-east-2-staging"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
|
||||
run: |
|
||||
export LD_LIBRARY_PATH=${PGCOPYDB_LIB_PATH}:${PG_16_LIB_PATH}
|
||||
export PGCOPYDB_SOURCE_PGURI="${BENCHMARK_INGEST_SOURCE_CONNSTR}"
|
||||
export PGCOPYDB_TARGET_PGURI="${NEW_PROJECT_CONNSTR}"
|
||||
export PGOPTIONS="-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
|
||||
${PG_CONFIG} --bindir
|
||||
${PGCOPYDB} --version
|
||||
${PGCOPYDB} clone --skip-vacuum --no-owner --no-acl --skip-db-properties --table-jobs 4 \
|
||||
--index-jobs 4 --restore-jobs 4 --split-tables-larger-than 10GB --skip-extensions \
|
||||
--use-copy-binary --filters /tmp/pgcopydb_filter.txt 2>&1 | tee /tmp/pgcopydb_${{ matrix.target_project }}.log
|
||||
|
||||
- name: show tables sizes after ingest
|
||||
# create dummy pgcopydb log to test parsing
|
||||
# - name: create dummy log for parser test
|
||||
# run: |
|
||||
# cat << EOF > /tmp/pgcopydb_${{ matrix.target_project }}.log
|
||||
# 2024-11-04 18:00:53.433 500861 INFO main.c:136 Running pgcopydb version 0.17.10.g8361a93 from "/usr/lib/postgresql/17/bin/pgcopydb"
|
||||
# 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1225 [SOURCE] Copying database from "postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
|
||||
# 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1226 [TARGET] Copying database into "postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60"
|
||||
# 2024-11-04 18:00:53.442 500861 INFO copydb.c:105 Using work dir "/tmp/pgcopydb"
|
||||
# 2024-11-04 18:00:53.541 500861 INFO snapshot.c:107 Exported snapshot "00000008-00000033-1" from the source database
|
||||
# 2024-11-04 18:00:53.556 500865 INFO cli_clone_follow.c:543 STEP 1: fetch source database tables, indexes, and sequences
|
||||
# 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:716 Splitting source candidate tables larger than 10 GB
|
||||
# 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:829 Table public.events is 96 GB large which is larger than --split-tables-larger-than 10 GB, and does not have a unique column of type integer: splitting by CTID
|
||||
# 2024-11-04 18:01:05.538 500865 INFO copydb_schema.c:905 Table public.events is 96 GB large, 10 COPY processes will be used, partitioning on ctid.
|
||||
# 2024-11-04 18:01:05.564 500865 INFO copydb_schema.c:905 Table public.email_transmissions is 27 GB large, 4 COPY processes will be used, partitioning on id.
|
||||
# 2024-11-04 18:01:05.584 500865 INFO copydb_schema.c:905 Table public.lessons is 25 GB large, 4 COPY processes will be used, partitioning on id.
|
||||
# 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:905 Table public.lesson_users is 16 GB large, 3 COPY processes will be used, partitioning on id.
|
||||
# 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:761 Fetched information for 26 tables (including 4 tables split in 21 partitions total), with an estimated total of 907 million tuples and 175 GB on-disk
|
||||
# 2024-11-04 18:01:05.687 500865 INFO copydb_schema.c:968 Fetched information for 57 indexes (supporting 25 constraints)
|
||||
# 2024-11-04 18:01:05.753 500865 INFO sequences.c:78 Fetching information for 24 sequences
|
||||
# 2024-11-04 18:01:05.903 500865 INFO copydb_schema.c:1122 Fetched information for 4 extensions
|
||||
# 2024-11-04 18:01:06.178 500865 INFO copydb_schema.c:1538 Found 0 indexes (supporting 0 constraints) in the target database
|
||||
# 2024-11-04 18:01:06.184 500865 INFO cli_clone_follow.c:584 STEP 2: dump the source database schema (pre/post data)
|
||||
# 2024-11-04 18:01:06.186 500865 INFO pgcmd.c:468 /usr/lib/postgresql/16/bin/pg_dump -Fc --snapshot 00000008-00000033-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60'
|
||||
# 2024-11-04 18:01:06.952 500865 INFO cli_clone_follow.c:592 STEP 3: restore the pre-data section to the target database
|
||||
# 2024-11-04 18:01:07.004 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump
|
||||
# 2024-11-04 18:01:07.438 500874 INFO table-data.c:656 STEP 4: starting 4 table-data COPY processes
|
||||
# 2024-11-04 18:01:07.451 500877 INFO vacuum.c:139 STEP 8: skipping VACUUM jobs per --skip-vacuum
|
||||
# 2024-11-04 18:01:07.457 500875 INFO indexes.c:182 STEP 6: starting 4 CREATE INDEX processes
|
||||
# 2024-11-04 18:01:07.457 500875 INFO indexes.c:183 STEP 7: constraints are built by the CREATE INDEX processes
|
||||
# 2024-11-04 18:01:07.507 500865 INFO blobs.c:74 Skipping large objects: none found.
|
||||
# 2024-11-04 18:01:07.509 500865 INFO sequences.c:194 STEP 9: reset sequences values
|
||||
# 2024-11-04 18:01:07.510 500886 INFO sequences.c:290 Set sequences values on the target database
|
||||
# 2024-11-04 20:49:00.587 500865 INFO cli_clone_follow.c:608 STEP 10: restore the post-data section to the target database
|
||||
# 2024-11-04 20:49:00.600 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump
|
||||
# 2024-11-05 10:50:58.508 500865 INFO cli_clone_follow.c:639 All step are now done, 16h49m elapsed
|
||||
# 2024-11-05 10:50:58.508 500865 INFO summary.c:3155 Printing summary for 26 tables and 57 indexes
|
||||
|
||||
# OID | Schema | Name | Parts | copy duration | transmitted bytes | indexes | create index duration
|
||||
# ------+--------+----------------------+-------+---------------+-------------------+---------+----------------------
|
||||
# 24654 | public | events | 10 | 1d11h | 878 GB | 1 | 1h41m
|
||||
# 24623 | public | email_transmissions | 4 | 4h46m | 99 GB | 3 | 2h04m
|
||||
# 24665 | public | lessons | 4 | 4h42m | 161 GB | 4 | 1m11s
|
||||
# 24661 | public | lesson_users | 3 | 2h46m | 49 GB | 3 | 39m35s
|
||||
# 24631 | public | emails | 1 | 34m07s | 10 GB | 2 | 17s
|
||||
# 24739 | public | payments | 1 | 5m47s | 1848 MB | 4 | 4m40s
|
||||
# 24681 | public | module_users | 1 | 4m57s | 1610 MB | 3 | 1m50s
|
||||
# 24694 | public | orders | 1 | 2m50s | 835 MB | 3 | 1m05s
|
||||
# 24597 | public | devices | 1 | 1m45s | 498 MB | 2 | 40s
|
||||
# 24723 | public | payment_methods | 1 | 1m24s | 548 MB | 2 | 31s
|
||||
# 24765 | public | user_collections | 1 | 2m17s | 1005 MB | 2 | 968ms
|
||||
# 24774 | public | users | 1 | 52s | 291 MB | 4 | 27s
|
||||
# 24760 | public | user_accounts | 1 | 16s | 172 MB | 3 | 16s
|
||||
# 24606 | public | edition_modules | 1 | 8s983 | 46 MB | 3 | 4s749
|
||||
# 24583 | public | course_emails | 1 | 8s526 | 26 MB | 2 | 996ms
|
||||
# 24685 | public | modules | 1 | 1s592 | 21 MB | 3 | 1s696
|
||||
# 24610 | public | editions | 1 | 2s199 | 7483 kB | 2 | 1s032
|
||||
# 24755 | public | sp_content | 1 | 1s555 | 4177 kB | 0 | 0ms
|
||||
# 24619 | public | email_broadcasts | 1 | 744ms | 2645 kB | 2 | 677ms
|
||||
# 24590 | public | courses | 1 | 387ms | 1540 kB | 2 | 367ms
|
||||
# 24704 | public | payment_gateway_keys | 1 | 1s972 | 164 kB | 2 | 27ms
|
||||
# 24576 | public | accounts | 1 | 58ms | 24 kB | 1 | 14ms
|
||||
# 24647 | public | event_names | 1 | 32ms | 397 B | 1 | 8ms
|
||||
# 24716 | public | payment_gateways | 1 | 1s675 | 117 B | 1 | 11ms
|
||||
# 24748 | public | roles | 1 | 71ms | 173 B | 1 | 8ms
|
||||
# 24676 | public | management | 1 | 33ms | 40 B | 1 | 19ms
|
||||
|
||||
|
||||
# Step Connection Duration Transfer Concurrency
|
||||
# -------------------------------------------------- ---------- ---------- ---------- ------------
|
||||
# Catalog Queries (table ordering, filtering, etc) source 12s 1
|
||||
# Dump Schema source 765ms 1
|
||||
# Prepare Schema target 466ms 1
|
||||
# COPY, INDEX, CONSTRAINTS, VACUUM (wall clock) both 2h47m 12
|
||||
# COPY (cumulative) both 7h46m 1225 GB 4
|
||||
# CREATE INDEX (cumulative) target 4h36m 4
|
||||
# CONSTRAINTS (cumulative) target 8s493 4
|
||||
# VACUUM (cumulative) target 0ms 4
|
||||
# Reset Sequences both 60ms 1
|
||||
# Large Objects (cumulative) (null) 0ms 0
|
||||
# Finalize Schema both 14h01m 4
|
||||
# -------------------------------------------------- ---------- ---------- ---------- ------------
|
||||
# Total Wall Clock Duration both 16h49m 20
|
||||
|
||||
|
||||
# EOF
|
||||
|
||||
|
||||
- name: show tables sizes and retrieve current backpressure seconds
|
||||
run: |
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+"
|
||||
${PSQL} "${NEW_PROJECT_CONNSTR}" -c "\dt+"
|
||||
BACKPRESSURE_TIME_AFTER_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;")
|
||||
echo "BACKPRESSURE_TIME_AFTER_INGEST=${BACKPRESSURE_TIME_AFTER_INGEST}" >> $GITHUB_ENV
|
||||
|
||||
- name: Parse pgcopydb log and report performance metrics
|
||||
env:
|
||||
PERF_TEST_RESULT_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }}
|
||||
run: |
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
|
||||
# Define the log file path
|
||||
LOG_FILE="/tmp/pgcopydb_${{ matrix.target_project }}.log"
|
||||
|
||||
# Get the current git commit hash
|
||||
git config --global --add safe.directory /__w/neon/neon
|
||||
COMMIT_HASH=$(git rev-parse --short HEAD)
|
||||
|
||||
# Define the platform and test suite
|
||||
PLATFORM="pg16-${{ matrix.target_project }}-us-east-2-staging"
|
||||
SUIT="pgcopydb_ingest_bench"
|
||||
|
||||
# Function to convert time (e.g., "2h47m", "4h36m", "118ms", "8s493") to seconds
|
||||
convert_to_seconds() {
|
||||
local duration=$1
|
||||
local total_seconds=0
|
||||
|
||||
# Check for hours (h)
|
||||
if [[ "$duration" =~ ([0-9]+)h ]]; then
|
||||
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 3600))
|
||||
fi
|
||||
|
||||
# Check for seconds (s)
|
||||
if [[ "$duration" =~ ([0-9]+)s ]]; then
|
||||
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0}))
|
||||
fi
|
||||
|
||||
# Check for milliseconds (ms) (if applicable)
|
||||
if [[ "$duration" =~ ([0-9]+)ms ]]; then
|
||||
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} / 1000))
|
||||
duration=${duration/${BASH_REMATCH[0]}/} # need to remove it to avoid double counting with m
|
||||
fi
|
||||
|
||||
# Check for minutes (m) - must be checked after ms because m is contained in ms
|
||||
if [[ "$duration" =~ ([0-9]+)m ]]; then
|
||||
total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 60))
|
||||
fi
|
||||
|
||||
echo $total_seconds
|
||||
}
|
||||
|
||||
# Calculate the backpressure difference in seconds
|
||||
BACKPRESSURE_TIME_DIFF=$(awk "BEGIN {print $BACKPRESSURE_TIME_AFTER_INGEST - $BACKPRESSURE_TIME_BEFORE_INGEST}")
|
||||
|
||||
# Insert the backpressure time difference into the performance database
|
||||
if [ -n "$BACKPRESSURE_TIME_DIFF" ]; then
|
||||
PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
|
||||
INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
|
||||
VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', 'backpressure_time', ${BACKPRESSURE_TIME_DIFF}, 'seconds', 'lower_is_better', now());
|
||||
\""
|
||||
echo "Inserting backpressure time difference: ${BACKPRESSURE_TIME_DIFF} seconds"
|
||||
eval $PSQL_CMD
|
||||
fi
|
||||
|
||||
# Extract and process log lines
|
||||
while IFS= read -r line; do
|
||||
METRIC_NAME=""
|
||||
# Match each desired line and extract the relevant information
|
||||
if [[ "$line" =~ COPY,\ INDEX,\ CONSTRAINTS,\ VACUUM.* ]]; then
|
||||
METRIC_NAME="COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)"
|
||||
elif [[ "$line" =~ COPY\ \(cumulative\).* ]]; then
|
||||
METRIC_NAME="COPY (cumulative)"
|
||||
elif [[ "$line" =~ CREATE\ INDEX\ \(cumulative\).* ]]; then
|
||||
METRIC_NAME="CREATE INDEX (cumulative)"
|
||||
elif [[ "$line" =~ CONSTRAINTS\ \(cumulative\).* ]]; then
|
||||
METRIC_NAME="CONSTRAINTS (cumulative)"
|
||||
elif [[ "$line" =~ Finalize\ Schema.* ]]; then
|
||||
METRIC_NAME="Finalize Schema"
|
||||
elif [[ "$line" =~ Total\ Wall\ Clock\ Duration.* ]]; then
|
||||
METRIC_NAME="Total Wall Clock Duration"
|
||||
fi
|
||||
|
||||
# If a metric was matched, insert it into the performance database
|
||||
if [ -n "$METRIC_NAME" ]; then
|
||||
DURATION=$(echo "$line" | grep -oP '\d+h\d+m|\d+s|\d+ms|\d{1,2}h\d{1,2}m|\d+\.\d+s' | head -n 1)
|
||||
METRIC_VALUE=$(convert_to_seconds "$DURATION")
|
||||
PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \"
|
||||
INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp)
|
||||
VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', '${METRIC_NAME}', ${METRIC_VALUE}, 'seconds', 'lower_is_better', now());
|
||||
\""
|
||||
echo "Inserting ${METRIC_NAME} with value ${METRIC_VALUE} seconds"
|
||||
eval $PSQL_CMD
|
||||
fi
|
||||
done < "$LOG_FILE"
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ always() && matrix.target_project == 'new_empty_project' }}
|
||||
|
||||
12
.github/workflows/neon_extra_builds.yml
vendored
12
.github/workflows/neon_extra_builds.yml
vendored
@@ -26,9 +26,15 @@ jobs:
|
||||
with:
|
||||
github-event-name: ${{ github.event_name}}
|
||||
|
||||
build-build-tools-image:
|
||||
check-build-tools-image:
|
||||
needs: [ check-permissions ]
|
||||
uses: ./.github/workflows/check-build-tools-image.yml
|
||||
|
||||
build-build-tools-image:
|
||||
needs: [ check-build-tools-image ]
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
with:
|
||||
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
|
||||
secrets: inherit
|
||||
|
||||
check-macos-build:
|
||||
@@ -38,7 +44,7 @@ jobs:
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
timeout-minutes: 90
|
||||
runs-on: macos-15
|
||||
runs-on: macos-14
|
||||
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
@@ -52,7 +58,7 @@ jobs:
|
||||
submodules: true
|
||||
|
||||
- name: Install macOS postgres dependencies
|
||||
run: brew install flex bison openssl protobuf icu4c
|
||||
run: brew install flex bison openssl protobuf icu4c pkg-config
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
|
||||
8
.github/workflows/periodic_pagebench.yml
vendored
8
.github/workflows/periodic_pagebench.yml
vendored
@@ -72,7 +72,7 @@ jobs:
|
||||
echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Start Bench with run_id
|
||||
- name: Start Bench with run_id
|
||||
run: |
|
||||
curl -k -X 'POST' \
|
||||
"${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
|
||||
@@ -116,7 +116,7 @@ jobs:
|
||||
-H 'accept: application/gzip' \
|
||||
-H "Authorization: Bearer $API_KEY" \
|
||||
--output "test_log_${GITHUB_RUN_ID}.gz"
|
||||
|
||||
|
||||
- name: Unzip Test Log and Print it into this job's log
|
||||
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
|
||||
run: |
|
||||
@@ -134,13 +134,13 @@ jobs:
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
- name: Cleanup Test Resources
|
||||
if: always()
|
||||
if: always()
|
||||
run: |
|
||||
curl -k -X 'POST' \
|
||||
"${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \
|
||||
|
||||
8
.github/workflows/pg-clients.yml
vendored
8
.github/workflows/pg-clients.yml
vendored
@@ -39,9 +39,15 @@ jobs:
|
||||
with:
|
||||
github-event-name: ${{ github.event_name }}
|
||||
|
||||
build-build-tools-image:
|
||||
check-build-tools-image:
|
||||
needs: [ check-permissions ]
|
||||
uses: ./.github/workflows/check-build-tools-image.yml
|
||||
|
||||
build-build-tools-image:
|
||||
needs: [ check-build-tools-image ]
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
with:
|
||||
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
|
||||
secrets: inherit
|
||||
|
||||
test-logical-replication:
|
||||
|
||||
8
.github/workflows/pre-merge-checks.yml
vendored
8
.github/workflows/pre-merge-checks.yml
vendored
@@ -34,10 +34,16 @@ jobs:
|
||||
run: |
|
||||
echo "${PYTHON_CHANGED_FILES}"
|
||||
|
||||
build-build-tools-image:
|
||||
check-build-tools-image:
|
||||
if: needs.get-changed-files.outputs.python-changed == 'true'
|
||||
needs: [ get-changed-files ]
|
||||
uses: ./.github/workflows/check-build-tools-image.yml
|
||||
|
||||
build-build-tools-image:
|
||||
needs: [ check-build-tools-image ]
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
with:
|
||||
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
|
||||
secrets: inherit
|
||||
|
||||
check-codestyle-python:
|
||||
|
||||
84
.github/workflows/release.yml
vendored
84
.github/workflows/release.yml
vendored
@@ -26,26 +26,82 @@ defaults:
|
||||
jobs:
|
||||
create-storage-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
contents: write # for `git push`
|
||||
|
||||
uses: ./.github/workflows/_create-release-pr.yml
|
||||
with:
|
||||
component-name: 'Storage & Compute'
|
||||
release-branch: 'release'
|
||||
secrets:
|
||||
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
|
||||
- name: Set environment variables
|
||||
run: |
|
||||
echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
|
||||
echo "RELEASE_BRANCH=rc/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
|
||||
|
||||
- name: Create release branch
|
||||
run: git checkout -b $RELEASE_BRANCH
|
||||
|
||||
- name: Push new branch
|
||||
run: git push origin $RELEASE_BRANCH
|
||||
|
||||
- name: Create pull request into release
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
TITLE="Storage & Compute release ${RELEASE_DATE}"
|
||||
|
||||
cat << EOF > body.md
|
||||
## ${TITLE}
|
||||
|
||||
**Please merge this Pull Request using 'Create a merge commit' button**
|
||||
EOF
|
||||
|
||||
gh pr create --title "${TITLE}" \
|
||||
--body-file "body.md" \
|
||||
--head "${RELEASE_BRANCH}" \
|
||||
--base "release"
|
||||
|
||||
create-proxy-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }}
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
contents: write # for `git push`
|
||||
|
||||
uses: ./.github/workflows/_create-release-pr.yml
|
||||
with:
|
||||
component-name: 'Proxy'
|
||||
release-branch: 'release-proxy'
|
||||
secrets:
|
||||
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
steps:
|
||||
- name: Check out code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
|
||||
- name: Set environment variables
|
||||
run: |
|
||||
echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
|
||||
echo "RELEASE_BRANCH=rc/proxy/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV
|
||||
|
||||
- name: Create release branch
|
||||
run: git checkout -b $RELEASE_BRANCH
|
||||
|
||||
- name: Push new branch
|
||||
run: git push origin $RELEASE_BRANCH
|
||||
|
||||
- name: Create pull request into release
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
TITLE="Proxy release ${RELEASE_DATE}"
|
||||
|
||||
cat << EOF > body.md
|
||||
## ${TITLE}
|
||||
|
||||
**Please merge this Pull Request using 'Create a merge commit' button**
|
||||
EOF
|
||||
|
||||
gh pr create --title "${TITLE}" \
|
||||
--body-file "body.md" \
|
||||
--head "${RELEASE_BRANCH}" \
|
||||
--base "release-proxy"
|
||||
|
||||
@@ -4,12 +4,10 @@ on:
|
||||
schedule:
|
||||
- cron: '*/15 * * * *'
|
||||
- cron: '25 0 * * *'
|
||||
- cron: '25 1 * * 6'
|
||||
|
||||
jobs:
|
||||
gh-workflow-stats-batch-2h:
|
||||
name: GitHub Workflow Stats Batch 2 hours
|
||||
if: github.event.schedule == '*/15 * * * *'
|
||||
gh-workflow-stats-batch:
|
||||
name: GitHub Workflow Stats Batch
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
@@ -18,36 +16,14 @@ jobs:
|
||||
uses: neondatabase/gh-workflow-stats-action@v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_neon"
|
||||
db_table: "gh_workflow_stats_batch_neon"
|
||||
gh_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
duration: '2h'
|
||||
|
||||
gh-workflow-stats-batch-48h:
|
||||
name: GitHub Workflow Stats Batch 48 hours
|
||||
if: github.event.schedule == '25 0 * * *'
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
steps:
|
||||
- name: Export Workflow Run for the past 48 hours
|
||||
- name: Export Workflow Run for the past 24 hours
|
||||
if: github.event.schedule == '25 0 * * *'
|
||||
uses: neondatabase/gh-workflow-stats-action@v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_neon"
|
||||
db_table: "gh_workflow_stats_batch_neon"
|
||||
gh_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
duration: '48h'
|
||||
|
||||
gh-workflow-stats-batch-30d:
|
||||
name: GitHub Workflow Stats Batch 30 days
|
||||
if: github.event.schedule == '25 1 * * 6'
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
steps:
|
||||
- name: Export Workflow Run for the past 30 days
|
||||
uses: neondatabase/gh-workflow-stats-action@v0.2.1
|
||||
with:
|
||||
db_uri: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
db_table: "gh_workflow_stats_neon"
|
||||
gh_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
duration: '720h'
|
||||
duration: '24h'
|
||||
|
||||
42
.github/workflows/report-workflow-stats.yml
vendored
Normal file
42
.github/workflows/report-workflow-stats.yml
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
name: Report Workflow Stats
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows:
|
||||
- Add `external` label to issues and PRs created by external users
|
||||
- Benchmarking
|
||||
- Build and Test
|
||||
- Build and Test Locally
|
||||
- Build build-tools image
|
||||
- Check Permissions
|
||||
- Check build-tools image
|
||||
- Check neon with extra platform builds
|
||||
- Cloud Regression Test
|
||||
- Create Release Branch
|
||||
- Handle `approved-for-ci-run` label
|
||||
- Lint GitHub Workflows
|
||||
- Notify Slack channel about upcoming release
|
||||
- Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
|
||||
- Pin build-tools image
|
||||
- Prepare benchmarking databases by restoring dumps
|
||||
- Push images to ACR
|
||||
- Test Postgres client libraries
|
||||
- Trigger E2E Tests
|
||||
- cleanup caches by a branch
|
||||
- Pre-merge checks
|
||||
types: [completed]
|
||||
|
||||
jobs:
|
||||
gh-workflow-stats:
|
||||
name: Github Workflow Stats
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
actions: read
|
||||
steps:
|
||||
- name: Export GH Workflow Stats
|
||||
uses: neondatabase/gh-workflow-stats-action@v0.1.4
|
||||
with:
|
||||
DB_URI: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
|
||||
DB_TABLE: "gh_workflow_stats_neon"
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GH_RUN_ID: ${{ github.event.workflow_run.id }}
|
||||
@@ -1,5 +1,6 @@
|
||||
/.github/ @neondatabase/developer-productivity
|
||||
/compute_tools/ @neondatabase/control-plane @neondatabase/compute
|
||||
/storage_controller @neondatabase/storage
|
||||
/storage_scrubber @neondatabase/storage
|
||||
/libs/pageserver_api/ @neondatabase/storage
|
||||
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
|
||||
/libs/remote_storage/ @neondatabase/storage
|
||||
@@ -10,6 +11,4 @@
|
||||
/pgxn/neon/ @neondatabase/compute @neondatabase/storage
|
||||
/proxy/ @neondatabase/proxy
|
||||
/safekeeper/ @neondatabase/storage
|
||||
/storage_controller @neondatabase/storage
|
||||
/storage_scrubber @neondatabase/storage
|
||||
/vendor/ @neondatabase/compute
|
||||
|
||||
318
Cargo.lock
generated
318
Cargo.lock
generated
@@ -46,15 +46,6 @@ dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aligned-vec"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7e0966165eaf052580bd70eb1b32cb3d6245774c0104d1b2793e9650bf83b52a"
|
||||
dependencies = [
|
||||
"equator",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.16"
|
||||
@@ -155,12 +146,6 @@ dependencies = [
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
|
||||
|
||||
[[package]]
|
||||
name = "asn1-rs"
|
||||
version = "0.6.2"
|
||||
@@ -374,28 +359,6 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-kms"
|
||||
version = "1.47.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "564a597a3c71a957d60a2e4c62c93d78ee5a0d636531e15b760acad983a5c18e"
|
||||
dependencies = [
|
||||
"aws-credential-types",
|
||||
"aws-runtime",
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
"aws-smithy-json",
|
||||
"aws-smithy-runtime",
|
||||
"aws-smithy-runtime-api",
|
||||
"aws-smithy-types",
|
||||
"aws-types",
|
||||
"bytes",
|
||||
"http 0.2.9",
|
||||
"once_cell",
|
||||
"regex-lite",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aws-sdk-s3"
|
||||
version = "1.52.0"
|
||||
@@ -612,9 +575,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "aws-smithy-runtime"
|
||||
version = "1.7.2"
|
||||
version = "1.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a065c0fe6fdbdf9f11817eb68582b2ab4aff9e9c39e986ae48f7ec576c6322db"
|
||||
checksum = "d1ce695746394772e7000b39fe073095db6d45a862d0767dd5ad0ac0d7f8eb87"
|
||||
dependencies = [
|
||||
"aws-smithy-async",
|
||||
"aws-smithy-http",
|
||||
@@ -779,7 +742,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"paste",
|
||||
"pin-project",
|
||||
"quick-xml 0.31.0",
|
||||
"quick-xml",
|
||||
"rand 0.8.5",
|
||||
"reqwest 0.11.19",
|
||||
"rustc_version",
|
||||
@@ -1257,10 +1220,6 @@ name = "compute_tools"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"aws-config",
|
||||
"aws-sdk-kms",
|
||||
"aws-sdk-s3",
|
||||
"base64 0.13.1",
|
||||
"bytes",
|
||||
"camino",
|
||||
"cfg-if",
|
||||
@@ -1278,16 +1237,13 @@ dependencies = [
|
||||
"opentelemetry",
|
||||
"opentelemetry_sdk",
|
||||
"postgres",
|
||||
"postgres_initdb",
|
||||
"prometheus",
|
||||
"regex",
|
||||
"remote_storage",
|
||||
"reqwest 0.12.4",
|
||||
"rlimit",
|
||||
"rust-ini",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
"signal-hook",
|
||||
"tar",
|
||||
"thiserror",
|
||||
@@ -1425,15 +1381,6 @@ version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
|
||||
|
||||
[[package]]
|
||||
name = "cpp_demangle"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96e58d342ad113c2b878f16d5d034c03be492ae460cdbc02b7f0f2284d310c7d"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.9"
|
||||
@@ -1957,26 +1904,6 @@ dependencies = [
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equator"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c35da53b5a021d2484a7cc49b2ac7f2d840f8236a286f84202369bd338d761ea"
|
||||
dependencies = [
|
||||
"equator-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equator-macro"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.52",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.1"
|
||||
@@ -2084,18 +2011,6 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "findshlibs"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fixedbitset"
|
||||
version = "0.4.2"
|
||||
@@ -2174,9 +2089,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
|
||||
checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
@@ -2184,9 +2099,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
|
||||
checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
@@ -2201,9 +2116,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6"
|
||||
checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1"
|
||||
|
||||
[[package]]
|
||||
name = "futures-lite"
|
||||
@@ -2222,9 +2137,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
||||
checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -2233,15 +2148,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
|
||||
checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
|
||||
checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
|
||||
|
||||
[[package]]
|
||||
name = "futures-timer"
|
||||
@@ -2251,9 +2166,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.31"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
|
||||
checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
@@ -2799,24 +2714,6 @@ version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac"
|
||||
|
||||
[[package]]
|
||||
name = "inferno"
|
||||
version = "0.11.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"indexmap 2.0.1",
|
||||
"is-terminal",
|
||||
"itoa",
|
||||
"log",
|
||||
"num-format",
|
||||
"once_cell",
|
||||
"quick-xml 0.26.0",
|
||||
"rgb",
|
||||
"str_stack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inotify"
|
||||
version = "0.9.6"
|
||||
@@ -2867,9 +2764,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.10.1"
|
||||
version = "2.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708"
|
||||
checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
@@ -3156,15 +3053,6 @@ version = "2.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.7.1"
|
||||
@@ -3390,16 +3278,6 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
|
||||
|
||||
[[package]]
|
||||
name = "num-format"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.45"
|
||||
@@ -3700,6 +3578,7 @@ dependencies = [
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"toml_edit",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -3741,7 +3620,6 @@ dependencies = [
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"pageserver_compaction",
|
||||
"pin-project-lite",
|
||||
"postgres",
|
||||
@@ -3750,7 +3628,6 @@ dependencies = [
|
||||
"postgres_backend",
|
||||
"postgres_connection",
|
||||
"postgres_ffi",
|
||||
"postgres_initdb",
|
||||
"pq_proto",
|
||||
"procfs",
|
||||
"rand 0.8.5",
|
||||
@@ -3765,7 +3642,6 @@ dependencies = [
|
||||
"serde_json",
|
||||
"serde_path_to_error",
|
||||
"serde_with",
|
||||
"smallvec",
|
||||
"storage_broker",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
@@ -4182,7 +4058,7 @@ dependencies = [
|
||||
"bytes",
|
||||
"once_cell",
|
||||
"pq_proto",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"serde",
|
||||
"thiserror",
|
||||
@@ -4226,48 +4102,12 @@ dependencies = [
|
||||
"utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres_initdb"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"camino",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "powerfmt"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
|
||||
|
||||
[[package]]
|
||||
name = "pprof"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ebbe2f8898beba44815fdc9e5a4ae9c929e21c5dc29b0c774a15555f7f58d6d0"
|
||||
dependencies = [
|
||||
"aligned-vec",
|
||||
"backtrace",
|
||||
"cfg-if",
|
||||
"criterion",
|
||||
"findshlibs",
|
||||
"inferno",
|
||||
"libc",
|
||||
"log",
|
||||
"nix 0.26.4",
|
||||
"once_cell",
|
||||
"parking_lot 0.12.1",
|
||||
"protobuf",
|
||||
"protobuf-codegen-pure",
|
||||
"smallvec",
|
||||
"symbolic-demangle",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.17"
|
||||
@@ -4420,31 +4260,6 @@ dependencies = [
|
||||
"prost",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "protobuf"
|
||||
version = "2.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
|
||||
|
||||
[[package]]
|
||||
name = "protobuf-codegen"
|
||||
version = "2.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6"
|
||||
dependencies = [
|
||||
"protobuf",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "protobuf-codegen-pure"
|
||||
version = "2.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865"
|
||||
dependencies = [
|
||||
"protobuf",
|
||||
"protobuf-codegen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proxy"
|
||||
version = "0.1.0"
|
||||
@@ -4518,7 +4333,7 @@ dependencies = [
|
||||
"rsa",
|
||||
"rstest",
|
||||
"rustc-hash",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"rustls-pemfile 2.1.1",
|
||||
"scopeguard",
|
||||
@@ -4556,15 +4371,6 @@ dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.31.0"
|
||||
@@ -5047,15 +4853,6 @@ dependencies = [
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rgb"
|
||||
version = "0.8.50"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.6"
|
||||
@@ -5231,9 +5028,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.23.18"
|
||||
version = "0.23.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c9cc1d47e243d655ace55ed38201c19ae02c148ae56412ab8750e8f0166ab7f"
|
||||
checksum = "eee87ff5d9b36712a58574e12e9f0ea80f915a5b0ac518d322b24a465617925e"
|
||||
dependencies = [
|
||||
"log",
|
||||
"once_cell",
|
||||
@@ -5369,7 +5166,6 @@ dependencies = [
|
||||
"postgres-protocol",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
"pprof",
|
||||
"pq_proto",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
@@ -5867,9 +5663,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.13.2"
|
||||
version = "1.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
||||
checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
|
||||
|
||||
[[package]]
|
||||
name = "smol_str"
|
||||
@@ -5916,12 +5712,6 @@ dependencies = [
|
||||
"der 0.7.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "stable_deref_trait"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
@@ -5948,7 +5738,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"parking_lot 0.12.1",
|
||||
"prost",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"tokio",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
@@ -6031,7 +5821,7 @@ dependencies = [
|
||||
"postgres_ffi",
|
||||
"remote_storage",
|
||||
"reqwest 0.12.4",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -6068,12 +5858,6 @@ dependencies = [
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "str_stack"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
|
||||
|
||||
[[package]]
|
||||
name = "stringprep"
|
||||
version = "0.1.2"
|
||||
@@ -6121,29 +5905,6 @@ version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20e16a0f46cf5fd675563ef54f26e83e20f2366bcf027bcb3cc3ed2b98aaf2ca"
|
||||
|
||||
[[package]]
|
||||
name = "symbolic-common"
|
||||
version = "12.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "366f1b4c6baf6cfefc234bbd4899535fca0b06c74443039a73f6dfb2fad88d77"
|
||||
dependencies = [
|
||||
"debugid",
|
||||
"memmap2",
|
||||
"stable_deref_trait",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "symbolic-demangle"
|
||||
version = "12.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aba05ba5b9962ea5617baf556293720a8b2d0a282aa14ee4bf10e22efc7da8c8"
|
||||
dependencies = [
|
||||
"cpp_demangle",
|
||||
"rustc-demangle",
|
||||
"symbolic-common",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
@@ -6313,9 +6074,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemalloc-ctl"
|
||||
version = "0.6.0"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f21f216790c8df74ce3ab25b534e0718da5a1916719771d3fec23315c99e468b"
|
||||
checksum = "619bfed27d807b54f7f776b9430d4f8060e66ee138a28632ca898584d462c31c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"paste",
|
||||
@@ -6324,9 +6085,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemalloc-sys"
|
||||
version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7"
|
||||
version = "0.5.4+5.3.0-patched"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cd3c60906412afa9c2b5b5a48ca6a5abe5736aec9eb48ad05037a677e52e4e2d"
|
||||
checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
@@ -6334,9 +6095,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "tikv-jemallocator"
|
||||
version = "0.6.0"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4cec5ff18518d81584f477e9bfdf957f5bb0979b0bac3af4ca30b5b3ae2d2865"
|
||||
checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"tikv-jemalloc-sys",
|
||||
@@ -6493,7 +6254,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04fb792ccd6bbcd4bba408eb8a292f70fc4a3589e5d793626f45190e6454b6ab"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"tokio",
|
||||
"tokio-postgres",
|
||||
"tokio-rustls 0.26.0",
|
||||
@@ -6527,7 +6288,7 @@ version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
|
||||
dependencies = [
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"rustls-pki-types",
|
||||
"tokio",
|
||||
]
|
||||
@@ -6936,7 +6697,7 @@ dependencies = [
|
||||
"base64 0.22.1",
|
||||
"log",
|
||||
"once_cell",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"rustls-pki-types",
|
||||
"url",
|
||||
"webpki-roots 0.26.1",
|
||||
@@ -7011,7 +6772,6 @@ dependencies = [
|
||||
"once_cell",
|
||||
"pin-project-lite",
|
||||
"postgres_connection",
|
||||
"pprof",
|
||||
"pq_proto",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
@@ -7546,7 +7306,6 @@ dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
"axum-core",
|
||||
"base64 0.13.1",
|
||||
"base64 0.21.1",
|
||||
"base64ct",
|
||||
"bytes",
|
||||
@@ -7581,7 +7340,6 @@ dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"memchr",
|
||||
"nix 0.26.4",
|
||||
"nom",
|
||||
"num-bigint",
|
||||
"num-integer",
|
||||
@@ -7598,7 +7356,7 @@ dependencies = [
|
||||
"regex-automata 0.4.3",
|
||||
"regex-syntax 0.8.2",
|
||||
"reqwest 0.12.4",
|
||||
"rustls 0.23.18",
|
||||
"rustls 0.23.16",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
||||
15
Cargo.toml
15
Cargo.toml
@@ -34,7 +34,6 @@ members = [
|
||||
"libs/vm_monitor",
|
||||
"libs/walproposer",
|
||||
"libs/wal_decoder",
|
||||
"libs/postgres_initdb",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -58,7 +57,6 @@ async-trait = "0.1"
|
||||
aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
|
||||
aws-sdk-s3 = "1.52"
|
||||
aws-sdk-iam = "1.46.0"
|
||||
aws-sdk-kms = "1.47.0"
|
||||
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
|
||||
aws-smithy-types = "1.2"
|
||||
aws-credential-types = "1.2.0"
|
||||
@@ -75,7 +73,7 @@ bytes = "1.0"
|
||||
camino = "1.1.6"
|
||||
cfg-if = "1.0.0"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
clap = { version = "4.0", features = ["derive", "env"] }
|
||||
clap = { version = "4.0", features = ["derive"] }
|
||||
comfy-table = "7.1"
|
||||
const_format = "0.2"
|
||||
crc32c = "0.6"
|
||||
@@ -108,7 +106,7 @@ hyper-util = "0.1"
|
||||
tokio-tungstenite = "0.21.0"
|
||||
indexmap = "2"
|
||||
indoc = "2"
|
||||
ipnet = "2.10.0"
|
||||
ipnet = "2.9.0"
|
||||
itertools = "0.10"
|
||||
itoa = "1.0.11"
|
||||
jsonwebtoken = "9"
|
||||
@@ -132,7 +130,6 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pin-project-lite = "0.2"
|
||||
pprof = { version = "0.14", features = ["criterion", "flamegraph", "protobuf", "protobuf-codec"] }
|
||||
procfs = "0.16"
|
||||
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
||||
prost = "0.13"
|
||||
@@ -156,7 +153,7 @@ sentry = { version = "0.32", default-features = false, features = ["backtrace",
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_path_to_error = "0.1"
|
||||
serde_with = { version = "2.0", features = [ "base64" ] }
|
||||
serde_with = "2.0"
|
||||
serde_assert = "0.5.0"
|
||||
sha2 = "0.10.2"
|
||||
signal-hook = "0.3"
|
||||
@@ -171,8 +168,8 @@ sync_wrapper = "0.1.2"
|
||||
tar = "0.4"
|
||||
test-context = "0.3"
|
||||
thiserror = "1.0"
|
||||
tikv-jemallocator = { version = "0.6", features = ["stats"] }
|
||||
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
|
||||
tikv-jemallocator = "0.5"
|
||||
tikv-jemalloc-ctl = "0.5"
|
||||
tokio = { version = "1.17", features = ["macros"] }
|
||||
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
|
||||
tokio-io-timeout = "1.2.0"
|
||||
@@ -215,14 +212,12 @@ tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", br
|
||||
compute_api = { version = "0.1", path = "./libs/compute_api/" }
|
||||
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
|
||||
metrics = { version = "0.1", path = "./libs/metrics/" }
|
||||
pageserver = { path = "./pageserver" }
|
||||
pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
|
||||
pageserver_client = { path = "./pageserver/client" }
|
||||
pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
|
||||
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
|
||||
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
|
||||
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
|
||||
postgres_initdb = { path = "./libs/postgres_initdb" }
|
||||
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
|
||||
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
|
||||
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
|
||||
|
||||
@@ -132,7 +132,7 @@ make -j`sysctl -n hw.logicalcpu` -s
|
||||
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
|
||||
|
||||
To run the integration tests or Python scripts (not required to use the code), install
|
||||
Python (3.11 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
|
||||
Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
|
||||
|
||||
|
||||
#### Running neon database
|
||||
|
||||
@@ -234,7 +234,7 @@ USER nonroot:nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
# Python
|
||||
ENV PYTHON_VERSION=3.11.10 \
|
||||
ENV PYTHON_VERSION=3.9.19 \
|
||||
PYENV_ROOT=/home/nonroot/.pyenv \
|
||||
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
|
||||
RUN set -e \
|
||||
|
||||
@@ -1243,7 +1243,7 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Compile and run the Neon-specific `compute_ctl` and `fast_import` binaries
|
||||
# Compile and run the Neon-specific `compute_ctl` binary
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
|
||||
@@ -1264,7 +1264,6 @@ RUN cd compute_tools && mold -run cargo build --locked --profile release-line-de
|
||||
FROM debian:$DEBIAN_FLAVOR AS compute-tools-image
|
||||
|
||||
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
|
||||
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/fast_import /usr/local/bin/fast_import
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1459,7 +1458,6 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
|
||||
|
||||
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
|
||||
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
|
||||
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/fast_import /usr/local/bin/fast_import
|
||||
|
||||
# pgbouncer and its config
|
||||
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
|
||||
@@ -1535,25 +1533,6 @@ RUN apt update && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||
|
||||
# s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2
|
||||
# used by fast_import
|
||||
ARG TARGETARCH
|
||||
ADD https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_linux_$TARGETARCH.deb /tmp/s5cmd.deb
|
||||
RUN set -ex; \
|
||||
\
|
||||
# Determine the expected checksum based on TARGETARCH
|
||||
if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
CHECKSUM="392c385320cd5ffa435759a95af77c215553d967e4b1c0fffe52e4f14c29cf85"; \
|
||||
elif [ "${TARGETARCH}" = "arm64" ]; then \
|
||||
CHECKSUM="939bee3cf4b5604ddb00e67f8c157b91d7c7a5b553d1fbb6890fad32894b7b46"; \
|
||||
else \
|
||||
echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
|
||||
fi; \
|
||||
\
|
||||
# Compute and validate the checksum
|
||||
echo "${CHECKSUM} /tmp/s5cmd.deb" | sha256sum -c -
|
||||
RUN dpkg -i /tmp/s5cmd.deb && rm /tmp/s5cmd.deb
|
||||
|
||||
ENV LANG=en_US.utf8
|
||||
USER postgres
|
||||
ENTRYPOINT ["/usr/local/bin/compute_ctl"]
|
||||
|
||||
@@ -147,7 +147,7 @@ index 542c2e098c..0062d3024f 100644
|
||||
ALTER TABLE ptnowner1 OWNER TO regress_ptnowner;
|
||||
ALTER TABLE ptnowner OWNER TO regress_ptnowner;
|
||||
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
|
||||
index 3f9a8f539c..0a51b52940 100644
|
||||
index 97bbe53b64..eac3d42a79 100644
|
||||
--- a/src/test/regress/expected/collate.icu.utf8.out
|
||||
+++ b/src/test/regress/expected/collate.icu.utf8.out
|
||||
@@ -1016,7 +1016,7 @@ select * from collate_test1 where b ilike 'ABC';
|
||||
@@ -309,7 +309,7 @@ index b48365ec98..a6ef910055 100644
|
||||
-- the wrong partition. This test is *not* guaranteed to trigger that bug, but
|
||||
-- does so when shared_buffers is small enough. To test if we encountered the
|
||||
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
|
||||
index 9a74820ee8..22400a5551 100644
|
||||
index faf1a4d1b0..a44c97db52 100644
|
||||
--- a/src/test/regress/expected/copy2.out
|
||||
+++ b/src/test/regress/expected/copy2.out
|
||||
@@ -553,8 +553,8 @@ select * from check_con_tbl;
|
||||
@@ -573,7 +573,7 @@ index 93302a07ef..1a73f083ac 100644
|
||||
-- that does not match with what's expected.
|
||||
-- This checks all the object types that include schema qualifications.
|
||||
diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out
|
||||
index f551624afb..57f1e432d4 100644
|
||||
index f3f8c7b5a2..3e3e54ff4c 100644
|
||||
--- a/src/test/regress/expected/create_view.out
|
||||
+++ b/src/test/regress/expected/create_view.out
|
||||
@@ -18,7 +18,8 @@ CREATE TABLE real_city (
|
||||
@@ -700,12 +700,12 @@ index 6ed50fdcfa..caa00a345d 100644
|
||||
COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';
|
||||
CREATE FOREIGN DATA WRAPPER postgresql VALIDATOR postgresql_fdw_validator;
|
||||
diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out
|
||||
index 6b8c2f2414..8e13b7fa46 100644
|
||||
index 12e523c737..8872e23935 100644
|
||||
--- a/src/test/regress/expected/foreign_key.out
|
||||
+++ b/src/test/regress/expected/foreign_key.out
|
||||
@@ -1985,7 +1985,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES
|
||||
ERROR: cannot ALTER TABLE "fk_partitioned_pk_61" because it is being used by active queries in this session
|
||||
DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6;
|
||||
@@ -1968,7 +1968,7 @@ ALTER TABLE fk_partitioned_fk ATTACH PARTITION fk_partitioned_fk_2
|
||||
FOR VALUES IN (1600);
|
||||
-- leave these tables around intentionally
|
||||
-- test the case when the referenced table is owned by a different user
|
||||
-create role regress_other_partitioned_fk_owner;
|
||||
+create role regress_other_partitioned_fk_owner PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
@@ -713,7 +713,7 @@ index 6b8c2f2414..8e13b7fa46 100644
|
||||
set role regress_other_partitioned_fk_owner;
|
||||
create table other_partitioned_fk(a int, b int) partition by list (a);
|
||||
diff --git a/src/test/regress/expected/generated.out b/src/test/regress/expected/generated.out
|
||||
index 5881420388..4ae21aa43c 100644
|
||||
index 0f623f7119..b48588a54e 100644
|
||||
--- a/src/test/regress/expected/generated.out
|
||||
+++ b/src/test/regress/expected/generated.out
|
||||
@@ -534,7 +534,7 @@ CREATE TABLE gtest10a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) STOR
|
||||
@@ -762,7 +762,7 @@ index a2036a1597..805d73b9d2 100644
|
||||
-- fields, leading to long bucket chains and lots of table expansion.
|
||||
-- this is therefore a stress test of the bucket overflow code (unlike
|
||||
diff --git a/src/test/regress/expected/identity.out b/src/test/regress/expected/identity.out
|
||||
index 1b74958de9..078187b542 100644
|
||||
index cc7772349f..98a08eb48d 100644
|
||||
--- a/src/test/regress/expected/identity.out
|
||||
+++ b/src/test/regress/expected/identity.out
|
||||
@@ -520,7 +520,7 @@ ALTER TABLE itest7 ALTER COLUMN a SET GENERATED BY DEFAULT;
|
||||
@@ -775,10 +775,10 @@ index 1b74958de9..078187b542 100644
|
||||
GRANT SELECT, INSERT ON itest8 TO regress_identity_user1;
|
||||
SET ROLE regress_identity_user1;
|
||||
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
|
||||
index 8f831c95c3..ec681b52af 100644
|
||||
index 4943429e9b..0257f22b15 100644
|
||||
--- a/src/test/regress/expected/inherit.out
|
||||
+++ b/src/test/regress/expected/inherit.out
|
||||
@@ -2636,7 +2636,7 @@ create index on permtest_parent (left(c, 3));
|
||||
@@ -2606,7 +2606,7 @@ create index on permtest_parent (left(c, 3));
|
||||
insert into permtest_parent
|
||||
select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i;
|
||||
analyze permtest_parent;
|
||||
@@ -1133,7 +1133,7 @@ index 8475231735..1afae5395f 100644
|
||||
SELECT rolname, rolpassword
|
||||
FROM pg_authid
|
||||
diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out
|
||||
index 5b9dba7b32..cc408dad42 100644
|
||||
index fbb0489a4f..2905194e2c 100644
|
||||
--- a/src/test/regress/expected/privileges.out
|
||||
+++ b/src/test/regress/expected/privileges.out
|
||||
@@ -20,19 +20,19 @@ SELECT lo_unlink(oid) FROM pg_largeobject_metadata WHERE oid >= 1000 AND oid < 3
|
||||
@@ -1185,7 +1185,7 @@ index 5b9dba7b32..cc408dad42 100644
|
||||
GRANT pg_read_all_data TO regress_priv_user6;
|
||||
GRANT pg_write_all_data TO regress_priv_user7;
|
||||
GRANT pg_read_all_settings TO regress_priv_user8 WITH ADMIN OPTION;
|
||||
@@ -212,8 +212,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8;
|
||||
@@ -145,8 +145,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8;
|
||||
DROP USER regress_priv_user10;
|
||||
DROP USER regress_priv_user9;
|
||||
DROP USER regress_priv_user8;
|
||||
@@ -1196,7 +1196,7 @@ index 5b9dba7b32..cc408dad42 100644
|
||||
ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4;
|
||||
GRANT regress_priv_group2 TO regress_priv_user2 GRANTED BY regress_priv_user1;
|
||||
SET SESSION AUTHORIZATION regress_priv_user1;
|
||||
@@ -239,12 +239,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre
|
||||
@@ -172,12 +172,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre
|
||||
ERROR: permission denied to grant privileges as role "regress_priv_role"
|
||||
DETAIL: The grantor must have the ADMIN option on role "regress_priv_role".
|
||||
GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY CURRENT_ROLE;
|
||||
@@ -1213,7 +1213,7 @@ index 5b9dba7b32..cc408dad42 100644
|
||||
DROP ROLE regress_priv_role;
|
||||
SET SESSION AUTHORIZATION regress_priv_user1;
|
||||
SELECT session_user, current_user;
|
||||
@@ -1776,7 +1780,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
|
||||
@@ -1709,7 +1713,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
|
||||
|
||||
-- security-restricted operations
|
||||
\c -
|
||||
@@ -1222,7 +1222,7 @@ index 5b9dba7b32..cc408dad42 100644
|
||||
-- Check that index expressions and predicates are run as the table's owner
|
||||
-- A dummy index function checking current_user
|
||||
CREATE FUNCTION sro_ifun(int) RETURNS int AS $$
|
||||
@@ -2668,8 +2672,8 @@ drop cascades to function testns.priv_testagg(integer)
|
||||
@@ -2601,8 +2605,8 @@ drop cascades to function testns.priv_testagg(integer)
|
||||
drop cascades to function testns.priv_testproc(integer)
|
||||
-- Change owner of the schema & and rename of new schema owner
|
||||
\c -
|
||||
@@ -1233,7 +1233,7 @@ index 5b9dba7b32..cc408dad42 100644
|
||||
SET SESSION ROLE regress_schemauser1;
|
||||
CREATE SCHEMA testns;
|
||||
SELECT nspname, rolname FROM pg_namespace, pg_roles WHERE pg_namespace.nspname = 'testns' AND pg_namespace.nspowner = pg_roles.oid;
|
||||
@@ -2792,7 +2796,7 @@ DROP USER regress_priv_user7;
|
||||
@@ -2725,7 +2729,7 @@ DROP USER regress_priv_user7;
|
||||
DROP USER regress_priv_user8; -- does not exist
|
||||
ERROR: role "regress_priv_user8" does not exist
|
||||
-- permissions with LOCK TABLE
|
||||
@@ -1242,7 +1242,7 @@ index 5b9dba7b32..cc408dad42 100644
|
||||
CREATE TABLE lock_table (a int);
|
||||
-- LOCK TABLE and SELECT permission
|
||||
GRANT SELECT ON lock_table TO regress_locktable_user;
|
||||
@@ -2874,7 +2878,7 @@ DROP USER regress_locktable_user;
|
||||
@@ -2807,7 +2811,7 @@ DROP USER regress_locktable_user;
|
||||
-- pg_backend_memory_contexts.
|
||||
-- switch to superuser
|
||||
\c -
|
||||
@@ -1251,7 +1251,7 @@ index 5b9dba7b32..cc408dad42 100644
|
||||
SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no
|
||||
has_table_privilege
|
||||
---------------------
|
||||
@@ -2918,10 +2922,10 @@ RESET ROLE;
|
||||
@@ -2851,10 +2855,10 @@ RESET ROLE;
|
||||
-- clean up
|
||||
DROP ROLE regress_readallstats;
|
||||
-- test role grantor machinery
|
||||
@@ -1266,7 +1266,7 @@ index 5b9dba7b32..cc408dad42 100644
|
||||
GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE;
|
||||
GRANT regress_group_direct_manager TO regress_group_indirect_manager;
|
||||
SET SESSION AUTHORIZATION regress_group_direct_manager;
|
||||
@@ -2950,9 +2954,9 @@ DROP ROLE regress_group_direct_manager;
|
||||
@@ -2883,9 +2887,9 @@ DROP ROLE regress_group_direct_manager;
|
||||
DROP ROLE regress_group_indirect_manager;
|
||||
DROP ROLE regress_group_member;
|
||||
-- test SET and INHERIT options with object ownership changes
|
||||
@@ -1813,7 +1813,7 @@ index 5e6969b173..2c4d52237f 100644
|
||||
|
||||
-- clean up roles
|
||||
diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out
|
||||
index 218c0c2863..f7af0cfb12 100644
|
||||
index 97ca9bf72c..b2a7a6f710 100644
|
||||
--- a/src/test/regress/expected/rowsecurity.out
|
||||
+++ b/src/test/regress/expected/rowsecurity.out
|
||||
@@ -14,13 +14,13 @@ DROP ROLE IF EXISTS regress_rls_group2;
|
||||
@@ -1917,19 +1917,6 @@ index b79fe9a1c0..e29fab88ab 100644
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user
|
||||
REVOKE INSERT ON TABLES FROM regress_selinto_user;
|
||||
GRANT ALL ON SCHEMA selinto_schema TO public;
|
||||
diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out
|
||||
index afc6ab08c2..dfcd891af3 100644
|
||||
--- a/src/test/regress/expected/select_parallel.out
|
||||
+++ b/src/test/regress/expected/select_parallel.out
|
||||
@@ -1220,7 +1220,7 @@ SELECT 1 FROM tenk1_vw_sec
|
||||
|
||||
rollback;
|
||||
-- test that function option SET ROLE works in parallel workers.
|
||||
-create role regress_parallel_worker;
|
||||
+create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
create function set_and_report_role() returns text as
|
||||
$$ select current_setting('role') $$ language sql parallel safe
|
||||
set role = regress_parallel_worker;
|
||||
diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out
|
||||
index 1aeed8452b..7d9427d070 100644
|
||||
--- a/src/test/regress/expected/select_views.out
|
||||
@@ -2382,7 +2369,7 @@ index 6cb9c926c0..5e689e4062 100644
|
||||
ALTER TABLE ptnowner1 OWNER TO regress_ptnowner;
|
||||
ALTER TABLE ptnowner OWNER TO regress_ptnowner;
|
||||
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
|
||||
index 8aa902d5ab..24bb823b86 100644
|
||||
index 3db9e25913..c66d5aa2c2 100644
|
||||
--- a/src/test/regress/sql/collate.icu.utf8.sql
|
||||
+++ b/src/test/regress/sql/collate.icu.utf8.sql
|
||||
@@ -353,7 +353,7 @@ reset enable_seqscan;
|
||||
@@ -2545,7 +2532,7 @@ index 43d2e906dd..6c993d70f0 100644
|
||||
-- An earlier bug (see commit b1ecb9b3fcf) could end up using a buffer from
|
||||
-- the wrong partition. This test is *not* guaranteed to trigger that bug, but
|
||||
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
|
||||
index cf3828c16e..cf3ca38175 100644
|
||||
index d759635068..d58e50dcc5 100644
|
||||
--- a/src/test/regress/sql/copy2.sql
|
||||
+++ b/src/test/regress/sql/copy2.sql
|
||||
@@ -365,8 +365,8 @@ copy check_con_tbl from stdin;
|
||||
@@ -2787,7 +2774,7 @@ index 1b7064247a..be5b662ce1 100644
|
||||
-- Cases where schema creation fails as objects are qualified with a schema
|
||||
-- that does not match with what's expected.
|
||||
diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql
|
||||
index ae6841308b..47bc792e30 100644
|
||||
index 3a78be1b0c..617d2dc8d6 100644
|
||||
--- a/src/test/regress/sql/create_view.sql
|
||||
+++ b/src/test/regress/sql/create_view.sql
|
||||
@@ -23,7 +23,8 @@ CREATE TABLE real_city (
|
||||
@@ -2914,11 +2901,11 @@ index aa147b14a9..370e0dd570 100644
|
||||
CREATE FOREIGN DATA WRAPPER dummy;
|
||||
COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless';
|
||||
diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql
|
||||
index 45c7a534cb..32dd26b8cd 100644
|
||||
index 22e177f89b..7138d5e1d4 100644
|
||||
--- a/src/test/regress/sql/foreign_key.sql
|
||||
+++ b/src/test/regress/sql/foreign_key.sql
|
||||
@@ -1435,7 +1435,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES
|
||||
DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6;
|
||||
@@ -1418,7 +1418,7 @@ ALTER TABLE fk_partitioned_fk ATTACH PARTITION fk_partitioned_fk_2
|
||||
-- leave these tables around intentionally
|
||||
|
||||
-- test the case when the referenced table is owned by a different user
|
||||
-create role regress_other_partitioned_fk_owner;
|
||||
@@ -2976,7 +2963,7 @@ index 527024f710..de49c0b85f 100644
|
||||
-- the data in this file has a lot of duplicates in the index key
|
||||
-- fields, leading to long bucket chains and lots of table expansion.
|
||||
diff --git a/src/test/regress/sql/identity.sql b/src/test/regress/sql/identity.sql
|
||||
index 7537258a75..9041e35e34 100644
|
||||
index 91d2e443b4..241c93f373 100644
|
||||
--- a/src/test/regress/sql/identity.sql
|
||||
+++ b/src/test/regress/sql/identity.sql
|
||||
@@ -287,7 +287,7 @@ ALTER TABLE itest7 ALTER COLUMN a RESTART;
|
||||
@@ -2989,10 +2976,10 @@ index 7537258a75..9041e35e34 100644
|
||||
GRANT SELECT, INSERT ON itest8 TO regress_identity_user1;
|
||||
SET ROLE regress_identity_user1;
|
||||
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
|
||||
index b5b554a125..109889ad24 100644
|
||||
index fe699c54d5..bdd5993f45 100644
|
||||
--- a/src/test/regress/sql/inherit.sql
|
||||
+++ b/src/test/regress/sql/inherit.sql
|
||||
@@ -958,7 +958,7 @@ create index on permtest_parent (left(c, 3));
|
||||
@@ -950,7 +950,7 @@ create index on permtest_parent (left(c, 3));
|
||||
insert into permtest_parent
|
||||
select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i;
|
||||
analyze permtest_parent;
|
||||
@@ -3231,7 +3218,7 @@ index 53e86b0b6c..f07cf1ec54 100644
|
||||
CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
|
||||
|
||||
diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql
|
||||
index 249df17a58..b258e7f26a 100644
|
||||
index 3f68cafcd1..004b26831d 100644
|
||||
--- a/src/test/regress/sql/privileges.sql
|
||||
+++ b/src/test/regress/sql/privileges.sql
|
||||
@@ -24,18 +24,18 @@ RESET client_min_messages;
|
||||
@@ -3282,7 +3269,7 @@ index 249df17a58..b258e7f26a 100644
|
||||
|
||||
GRANT pg_read_all_data TO regress_priv_user6;
|
||||
GRANT pg_write_all_data TO regress_priv_user7;
|
||||
@@ -163,8 +163,8 @@ DROP USER regress_priv_user10;
|
||||
@@ -130,8 +130,8 @@ DROP USER regress_priv_user10;
|
||||
DROP USER regress_priv_user9;
|
||||
DROP USER regress_priv_user8;
|
||||
|
||||
@@ -3293,7 +3280,7 @@ index 249df17a58..b258e7f26a 100644
|
||||
|
||||
ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4;
|
||||
|
||||
@@ -1157,7 +1157,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
|
||||
@@ -1124,7 +1124,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP
|
||||
|
||||
-- security-restricted operations
|
||||
\c -
|
||||
@@ -3302,7 +3289,7 @@ index 249df17a58..b258e7f26a 100644
|
||||
|
||||
-- Check that index expressions and predicates are run as the table's owner
|
||||
|
||||
@@ -1653,8 +1653,8 @@ DROP SCHEMA testns CASCADE;
|
||||
@@ -1620,8 +1620,8 @@ DROP SCHEMA testns CASCADE;
|
||||
-- Change owner of the schema & and rename of new schema owner
|
||||
\c -
|
||||
|
||||
@@ -3313,7 +3300,7 @@ index 249df17a58..b258e7f26a 100644
|
||||
|
||||
SET SESSION ROLE regress_schemauser1;
|
||||
CREATE SCHEMA testns;
|
||||
@@ -1748,7 +1748,7 @@ DROP USER regress_priv_user8; -- does not exist
|
||||
@@ -1715,7 +1715,7 @@ DROP USER regress_priv_user8; -- does not exist
|
||||
|
||||
|
||||
-- permissions with LOCK TABLE
|
||||
@@ -3322,7 +3309,7 @@ index 249df17a58..b258e7f26a 100644
|
||||
CREATE TABLE lock_table (a int);
|
||||
|
||||
-- LOCK TABLE and SELECT permission
|
||||
@@ -1836,7 +1836,7 @@ DROP USER regress_locktable_user;
|
||||
@@ -1803,7 +1803,7 @@ DROP USER regress_locktable_user;
|
||||
-- switch to superuser
|
||||
\c -
|
||||
|
||||
@@ -3331,7 +3318,7 @@ index 249df17a58..b258e7f26a 100644
|
||||
|
||||
SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no
|
||||
SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- no
|
||||
@@ -1856,10 +1856,10 @@ RESET ROLE;
|
||||
@@ -1823,10 +1823,10 @@ RESET ROLE;
|
||||
DROP ROLE regress_readallstats;
|
||||
|
||||
-- test role grantor machinery
|
||||
@@ -3346,7 +3333,7 @@ index 249df17a58..b258e7f26a 100644
|
||||
|
||||
GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE;
|
||||
GRANT regress_group_direct_manager TO regress_group_indirect_manager;
|
||||
@@ -1881,9 +1881,9 @@ DROP ROLE regress_group_indirect_manager;
|
||||
@@ -1848,9 +1848,9 @@ DROP ROLE regress_group_indirect_manager;
|
||||
DROP ROLE regress_group_member;
|
||||
|
||||
-- test SET and INHERIT options with object ownership changes
|
||||
@@ -3638,7 +3625,7 @@ index c961b2d730..0859b89c4f 100644
|
||||
-- clean up roles
|
||||
DROP ROLE regress_test_def_superuser;
|
||||
diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql
|
||||
index d3bfd53e23..919ce1d0c6 100644
|
||||
index dec7340538..cdbc03a5cc 100644
|
||||
--- a/src/test/regress/sql/rowsecurity.sql
|
||||
+++ b/src/test/regress/sql/rowsecurity.sql
|
||||
@@ -20,13 +20,13 @@ DROP SCHEMA IF EXISTS regress_rls_schema CASCADE;
|
||||
@@ -3714,19 +3701,6 @@ index 689c448cc2..223ceb1d75 100644
|
||||
ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user
|
||||
REVOKE INSERT ON TABLES FROM regress_selinto_user;
|
||||
GRANT ALL ON SCHEMA selinto_schema TO public;
|
||||
diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql
|
||||
index 33d78e16dc..cb193c9b27 100644
|
||||
--- a/src/test/regress/sql/select_parallel.sql
|
||||
+++ b/src/test/regress/sql/select_parallel.sql
|
||||
@@ -464,7 +464,7 @@ SELECT 1 FROM tenk1_vw_sec
|
||||
rollback;
|
||||
|
||||
-- test that function option SET ROLE works in parallel workers.
|
||||
-create role regress_parallel_worker;
|
||||
+create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
|
||||
create function set_and_report_role() returns text as
|
||||
$$ select current_setting('role') $$ language sql parallel safe
|
||||
diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql
|
||||
index e742f13699..7bd0255df8 100644
|
||||
--- a/src/test/regress/sql/select_views.sql
|
||||
|
||||
@@ -10,10 +10,6 @@ default = []
|
||||
testing = []
|
||||
|
||||
[dependencies]
|
||||
base64.workspace = true
|
||||
aws-config.workspace = true
|
||||
aws-sdk-s3.workspace = true
|
||||
aws-sdk-kms.workspace = true
|
||||
anyhow.workspace = true
|
||||
camino.workspace = true
|
||||
chrono.workspace = true
|
||||
@@ -31,8 +27,6 @@ opentelemetry.workspace = true
|
||||
opentelemetry_sdk.workspace = true
|
||||
postgres.workspace = true
|
||||
regex.workspace = true
|
||||
serde.workspace = true
|
||||
serde_with.workspace = true
|
||||
serde_json.workspace = true
|
||||
signal-hook.workspace = true
|
||||
tar.workspace = true
|
||||
@@ -49,7 +43,6 @@ thiserror.workspace = true
|
||||
url.workspace = true
|
||||
prometheus.workspace = true
|
||||
|
||||
postgres_initdb.workspace = true
|
||||
compute_api.workspace = true
|
||||
utils.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
|
||||
@@ -105,11 +105,6 @@ fn main() -> Result<()> {
|
||||
fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
opentelemetry::global::set_error_handler(|err| {
|
||||
tracing::info!("OpenTelemetry error: {err}");
|
||||
})
|
||||
.expect("global error handler lock poisoned");
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
thread::spawn(move || {
|
||||
for sig in signals.forever() {
|
||||
|
||||
@@ -1,338 +0,0 @@
|
||||
//! This program dumps a remote Postgres database into a local Postgres database
|
||||
//! and uploads the resulting PGDATA into object storage for import into a Timeline.
|
||||
//!
|
||||
//! # Context, Architecture, Design
|
||||
//!
|
||||
//! See cloud.git Fast Imports RFC (<https://github.com/neondatabase/cloud/pull/19799>)
|
||||
//! for the full picture.
|
||||
//! The RFC describing the storage pieces of importing the PGDATA dump into a Timeline
|
||||
//! is publicly accessible at <https://github.com/neondatabase/neon/pull/9538>.
|
||||
//!
|
||||
//! # This is a Prototype!
|
||||
//!
|
||||
//! This program is part of a prototype feature and not yet used in production.
|
||||
//!
|
||||
//! The cloud.git RFC contains lots of suggestions for improving e2e throughput
|
||||
//! of this step of the timeline import process.
|
||||
//!
|
||||
//! # Local Testing
|
||||
//!
|
||||
//! - Comment out most of the pgxns in The Dockerfile.compute-tools to speed up the build.
|
||||
//! - Build the image with the following command:
|
||||
//!
|
||||
//! ```bash
|
||||
//! docker buildx build --build-arg DEBIAN_FLAVOR=bullseye-slim --build-arg GIT_VERSION=local --build-arg PG_VERSION=v14 --build-arg BUILD_TAG="$(date --iso-8601=s -u)" -t localhost:3030/localregistry/compute-node-v14:latest -f compute/Dockerfile.com
|
||||
//! docker push localhost:3030/localregistry/compute-node-v14:latest
|
||||
//! ```
|
||||
|
||||
use anyhow::Context;
|
||||
use aws_config::BehaviorVersion;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use clap::Parser;
|
||||
use nix::unistd::Pid;
|
||||
use tracing::{info, info_span, warn, Instrument};
|
||||
use utils::fs_ext::is_directory_empty;
|
||||
|
||||
#[path = "fast_import/child_stdio_to_log.rs"]
|
||||
mod child_stdio_to_log;
|
||||
#[path = "fast_import/s3_uri.rs"]
|
||||
mod s3_uri;
|
||||
#[path = "fast_import/s5cmd.rs"]
|
||||
mod s5cmd;
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
#[clap(long)]
|
||||
working_directory: Utf8PathBuf,
|
||||
#[clap(long, env = "NEON_IMPORTER_S3_PREFIX")]
|
||||
s3_prefix: s3_uri::S3Uri,
|
||||
#[clap(long)]
|
||||
pg_bin_dir: Utf8PathBuf,
|
||||
#[clap(long)]
|
||||
pg_lib_dir: Utf8PathBuf,
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(serde::Deserialize)]
|
||||
struct Spec {
|
||||
encryption_secret: EncryptionSecret,
|
||||
#[serde_as(as = "serde_with::base64::Base64")]
|
||||
source_connstring_ciphertext_base64: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
enum EncryptionSecret {
|
||||
#[allow(clippy::upper_case_acronyms)]
|
||||
KMS { key_id: String },
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
utils::logging::init(
|
||||
utils::logging::LogFormat::Plain,
|
||||
utils::logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
|
||||
utils::logging::Output::Stdout,
|
||||
)?;
|
||||
|
||||
info!("starting");
|
||||
|
||||
let Args {
|
||||
working_directory,
|
||||
s3_prefix,
|
||||
pg_bin_dir,
|
||||
pg_lib_dir,
|
||||
} = Args::parse();
|
||||
|
||||
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
|
||||
let spec: Spec = {
|
||||
let spec_key = s3_prefix.append("/spec.json");
|
||||
let s3_client = aws_sdk_s3::Client::new(&aws_config);
|
||||
let object = s3_client
|
||||
.get_object()
|
||||
.bucket(&spec_key.bucket)
|
||||
.key(spec_key.key)
|
||||
.send()
|
||||
.await
|
||||
.context("get spec from s3")?
|
||||
.body
|
||||
.collect()
|
||||
.await
|
||||
.context("download spec body")?;
|
||||
serde_json::from_slice(&object.into_bytes()).context("parse spec as json")?
|
||||
};
|
||||
|
||||
match tokio::fs::create_dir(&working_directory).await {
|
||||
Ok(()) => {}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
|
||||
if !is_directory_empty(&working_directory)
|
||||
.await
|
||||
.context("check if working directory is empty")?
|
||||
{
|
||||
anyhow::bail!("working directory is not empty");
|
||||
} else {
|
||||
// ok
|
||||
}
|
||||
}
|
||||
Err(e) => return Err(anyhow::Error::new(e).context("create working directory")),
|
||||
}
|
||||
|
||||
let pgdata_dir = working_directory.join("pgdata");
|
||||
tokio::fs::create_dir(&pgdata_dir)
|
||||
.await
|
||||
.context("create pgdata directory")?;
|
||||
|
||||
//
|
||||
// Setup clients
|
||||
//
|
||||
let aws_config = aws_config::load_defaults(BehaviorVersion::v2024_03_28()).await;
|
||||
let kms_client = aws_sdk_kms::Client::new(&aws_config);
|
||||
|
||||
//
|
||||
// Initialize pgdata
|
||||
//
|
||||
let superuser = "cloud_admin"; // XXX: this shouldn't be hard-coded
|
||||
postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
|
||||
superuser,
|
||||
locale: "en_US.UTF-8", // XXX: this shouldn't be hard-coded,
|
||||
pg_version: 140000, // XXX: this shouldn't be hard-coded but derived from which compute image we're running in
|
||||
initdb_bin: pg_bin_dir.join("initdb").as_ref(),
|
||||
library_search_path: &pg_lib_dir, // TODO: is this right? Prob works in compute image, not sure about neon_local.
|
||||
pgdata: &pgdata_dir,
|
||||
})
|
||||
.await
|
||||
.context("initdb")?;
|
||||
|
||||
let nproc = num_cpus::get();
|
||||
|
||||
//
|
||||
// Launch postgres process
|
||||
//
|
||||
let mut postgres_proc = tokio::process::Command::new(pg_bin_dir.join("postgres"))
|
||||
.arg("-D")
|
||||
.arg(&pgdata_dir)
|
||||
.args(["-c", "wal_level=minimal"])
|
||||
.args(["-c", "shared_buffers=10GB"])
|
||||
.args(["-c", "max_wal_senders=0"])
|
||||
.args(["-c", "fsync=off"])
|
||||
.args(["-c", "full_page_writes=off"])
|
||||
.args(["-c", "synchronous_commit=off"])
|
||||
.args(["-c", "maintenance_work_mem=8388608"])
|
||||
.args(["-c", &format!("max_parallel_maintenance_workers={nproc}")])
|
||||
.args(["-c", &format!("max_parallel_workers={nproc}")])
|
||||
.args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
|
||||
.args(["-c", &format!("max_worker_processes={nproc}")])
|
||||
.args(["-c", "effective_io_concurrency=100"])
|
||||
.env_clear()
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.context("spawn postgres")?;
|
||||
|
||||
info!("spawned postgres, waiting for it to become ready");
|
||||
tokio::spawn(
|
||||
child_stdio_to_log::relay_process_output(
|
||||
postgres_proc.stdout.take(),
|
||||
postgres_proc.stderr.take(),
|
||||
)
|
||||
.instrument(info_span!("postgres")),
|
||||
);
|
||||
let restore_pg_connstring =
|
||||
format!("host=localhost port=5432 user={superuser} dbname=postgres");
|
||||
loop {
|
||||
let res = tokio_postgres::connect(&restore_pg_connstring, tokio_postgres::NoTls).await;
|
||||
if res.is_ok() {
|
||||
info!("postgres is ready, could connect to it");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Decrypt connection string
|
||||
//
|
||||
let source_connection_string = {
|
||||
match spec.encryption_secret {
|
||||
EncryptionSecret::KMS { key_id } => {
|
||||
let mut output = kms_client
|
||||
.decrypt()
|
||||
.key_id(key_id)
|
||||
.ciphertext_blob(aws_sdk_s3::primitives::Blob::new(
|
||||
spec.source_connstring_ciphertext_base64,
|
||||
))
|
||||
.send()
|
||||
.await
|
||||
.context("decrypt source connection string")?;
|
||||
let plaintext = output
|
||||
.plaintext
|
||||
.take()
|
||||
.context("get plaintext source connection string")?;
|
||||
String::from_utf8(plaintext.into_inner())
|
||||
.context("parse source connection string as utf8")?
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//
|
||||
// Start the work
|
||||
//
|
||||
|
||||
let dumpdir = working_directory.join("dumpdir");
|
||||
|
||||
let common_args = [
|
||||
// schema mapping (prob suffices to specify them on one side)
|
||||
"--no-owner".to_string(),
|
||||
"--no-privileges".to_string(),
|
||||
"--no-publications".to_string(),
|
||||
"--no-security-labels".to_string(),
|
||||
"--no-subscriptions".to_string(),
|
||||
"--no-tablespaces".to_string(),
|
||||
// format
|
||||
"--format".to_string(),
|
||||
"directory".to_string(),
|
||||
// concurrency
|
||||
"--jobs".to_string(),
|
||||
num_cpus::get().to_string(),
|
||||
// progress updates
|
||||
"--verbose".to_string(),
|
||||
];
|
||||
|
||||
info!("dump into the working directory");
|
||||
{
|
||||
let mut pg_dump = tokio::process::Command::new(pg_bin_dir.join("pg_dump"))
|
||||
.args(&common_args)
|
||||
.arg("-f")
|
||||
.arg(&dumpdir)
|
||||
.arg("--no-sync")
|
||||
// POSITIONAL args
|
||||
// source db (db name included in connection string)
|
||||
.arg(&source_connection_string)
|
||||
// how we run it
|
||||
.env_clear()
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.context("spawn pg_dump")?;
|
||||
|
||||
info!(pid=%pg_dump.id().unwrap(), "spawned pg_dump");
|
||||
|
||||
tokio::spawn(
|
||||
child_stdio_to_log::relay_process_output(pg_dump.stdout.take(), pg_dump.stderr.take())
|
||||
.instrument(info_span!("pg_dump")),
|
||||
);
|
||||
|
||||
let st = pg_dump.wait().await.context("wait for pg_dump")?;
|
||||
info!(status=?st, "pg_dump exited");
|
||||
if !st.success() {
|
||||
warn!(status=%st, "pg_dump failed, restore will likely fail as well");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: do it in a streaming way, plenty of internal research done on this already
|
||||
// TODO: do the unlogged table trick
|
||||
|
||||
info!("restore from working directory into vanilla postgres");
|
||||
{
|
||||
let mut pg_restore = tokio::process::Command::new(pg_bin_dir.join("pg_restore"))
|
||||
.args(&common_args)
|
||||
.arg("-d")
|
||||
.arg(&restore_pg_connstring)
|
||||
// POSITIONAL args
|
||||
.arg(&dumpdir)
|
||||
// how we run it
|
||||
.env_clear()
|
||||
.kill_on_drop(true)
|
||||
.stdout(std::process::Stdio::piped())
|
||||
.stderr(std::process::Stdio::piped())
|
||||
.spawn()
|
||||
.context("spawn pg_restore")?;
|
||||
|
||||
info!(pid=%pg_restore.id().unwrap(), "spawned pg_restore");
|
||||
tokio::spawn(
|
||||
child_stdio_to_log::relay_process_output(
|
||||
pg_restore.stdout.take(),
|
||||
pg_restore.stderr.take(),
|
||||
)
|
||||
.instrument(info_span!("pg_restore")),
|
||||
);
|
||||
let st = pg_restore.wait().await.context("wait for pg_restore")?;
|
||||
info!(status=?st, "pg_restore exited");
|
||||
if !st.success() {
|
||||
warn!(status=%st, "pg_restore failed, restore will likely fail as well");
|
||||
}
|
||||
}
|
||||
|
||||
info!("shutdown postgres");
|
||||
{
|
||||
nix::sys::signal::kill(
|
||||
Pid::from_raw(
|
||||
i32::try_from(postgres_proc.id().unwrap()).expect("convert child pid to i32"),
|
||||
),
|
||||
nix::sys::signal::SIGTERM,
|
||||
)
|
||||
.context("signal postgres to shut down")?;
|
||||
postgres_proc
|
||||
.wait()
|
||||
.await
|
||||
.context("wait for postgres to shut down")?;
|
||||
}
|
||||
|
||||
info!("upload pgdata");
|
||||
s5cmd::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/"))
|
||||
.await
|
||||
.context("sync dump directory to destination")?;
|
||||
|
||||
info!("write status");
|
||||
{
|
||||
let status_dir = working_directory.join("status");
|
||||
std::fs::create_dir(&status_dir).context("create status directory")?;
|
||||
let status_file = status_dir.join("status");
|
||||
std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
|
||||
.context("write status file")?;
|
||||
s5cmd::sync(&status_file, &s3_prefix.append("/status/pgdata"))
|
||||
.await
|
||||
.context("sync status directory to destination")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::{ChildStderr, ChildStdout};
|
||||
use tracing::info;
|
||||
|
||||
/// Asynchronously relays the output from a child process's `stdout` and `stderr` to the tracing log.
|
||||
/// Each line is read and logged individually, with lossy UTF-8 conversion.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `stdout`: An `Option<ChildStdout>` from the child process.
|
||||
/// * `stderr`: An `Option<ChildStderr>` from the child process.
|
||||
///
|
||||
pub(crate) async fn relay_process_output(stdout: Option<ChildStdout>, stderr: Option<ChildStderr>) {
|
||||
let stdout_fut = async {
|
||||
if let Some(stdout) = stdout {
|
||||
let reader = BufReader::new(stdout);
|
||||
let mut lines = reader.lines();
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
info!(fd = "stdout", "{}", line);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let stderr_fut = async {
|
||||
if let Some(stderr) = stderr {
|
||||
let reader = BufReader::new(stderr);
|
||||
let mut lines = reader.lines();
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
info!(fd = "stderr", "{}", line);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
tokio::join!(stdout_fut, stderr_fut);
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use std::str::FromStr;
|
||||
|
||||
/// Struct to hold parsed S3 components
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct S3Uri {
|
||||
pub bucket: String,
|
||||
pub key: String,
|
||||
}
|
||||
|
||||
impl FromStr for S3Uri {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
/// Parse an S3 URI into a bucket and key
|
||||
fn from_str(uri: &str) -> Result<Self> {
|
||||
// Ensure the URI starts with "s3://"
|
||||
if !uri.starts_with("s3://") {
|
||||
return Err(anyhow::anyhow!("Invalid S3 URI scheme"));
|
||||
}
|
||||
|
||||
// Remove the "s3://" prefix
|
||||
let stripped_uri = &uri[5..];
|
||||
|
||||
// Split the remaining string into bucket and key parts
|
||||
if let Some((bucket, key)) = stripped_uri.split_once('/') {
|
||||
Ok(S3Uri {
|
||||
bucket: bucket.to_string(),
|
||||
key: key.to_string(),
|
||||
})
|
||||
} else {
|
||||
Err(anyhow::anyhow!(
|
||||
"Invalid S3 URI format, missing bucket or key"
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl S3Uri {
|
||||
pub fn append(&self, suffix: &str) -> Self {
|
||||
Self {
|
||||
bucket: self.bucket.clone(),
|
||||
key: format!("{}{}", self.key, suffix),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for S3Uri {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "s3://{}/{}", self.bucket, self.key)
|
||||
}
|
||||
}
|
||||
|
||||
impl clap::builder::TypedValueParser for S3Uri {
|
||||
type Value = Self;
|
||||
|
||||
fn parse_ref(
|
||||
&self,
|
||||
_cmd: &clap::Command,
|
||||
_arg: Option<&clap::Arg>,
|
||||
value: &std::ffi::OsStr,
|
||||
) -> Result<Self::Value, clap::Error> {
|
||||
let value_str = value.to_str().ok_or_else(|| {
|
||||
clap::Error::raw(
|
||||
clap::error::ErrorKind::InvalidUtf8,
|
||||
"Invalid UTF-8 sequence",
|
||||
)
|
||||
})?;
|
||||
S3Uri::from_str(value_str).map_err(|e| {
|
||||
clap::Error::raw(
|
||||
clap::error::ErrorKind::InvalidValue,
|
||||
format!("Failed to parse S3 URI: {}", e),
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
use anyhow::Context;
|
||||
use camino::Utf8Path;
|
||||
|
||||
use super::s3_uri::S3Uri;
|
||||
|
||||
pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
|
||||
let mut builder = tokio::process::Command::new("s5cmd");
|
||||
// s5cmd uses aws-sdk-go v1, hence doesn't support AWS_ENDPOINT_URL
|
||||
if let Some(val) = std::env::var_os("AWS_ENDPOINT_URL") {
|
||||
builder.arg("--endpoint-url").arg(val);
|
||||
}
|
||||
builder
|
||||
.arg("sync")
|
||||
.arg(local.as_str())
|
||||
.arg(remote.to_string());
|
||||
let st = builder
|
||||
.spawn()
|
||||
.context("spawn s5cmd")?
|
||||
.wait()
|
||||
.await
|
||||
.context("wait for s5cmd")?;
|
||||
if st.success() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(anyhow::anyhow!("s5cmd failed"))
|
||||
}
|
||||
}
|
||||
@@ -1,40 +1,38 @@
|
||||
use compute_api::responses::CatalogObjects;
|
||||
use compute_api::{
|
||||
responses::CatalogObjects,
|
||||
spec::{Database, Role},
|
||||
};
|
||||
use futures::Stream;
|
||||
use postgres::NoTls;
|
||||
use postgres::{Client, NoTls};
|
||||
use std::{path::Path, process::Stdio, result::Result, sync::Arc};
|
||||
use tokio::{
|
||||
io::{AsyncBufReadExt, BufReader},
|
||||
process::Command,
|
||||
spawn,
|
||||
task,
|
||||
};
|
||||
use tokio_postgres::connect;
|
||||
use tokio_stream::{self as stream, StreamExt};
|
||||
use tokio_util::codec::{BytesCodec, FramedRead};
|
||||
use tracing::warn;
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
use crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async};
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
pg_helpers::{get_existing_dbs, get_existing_roles},
|
||||
};
|
||||
|
||||
pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
|
||||
let connstr = compute.connstr.clone();
|
||||
|
||||
let (client, connection): (tokio_postgres::Client, _) =
|
||||
connect(connstr.as_str(), NoTls).await?;
|
||||
|
||||
spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
task::spawn_blocking(move || {
|
||||
let mut client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
let roles: Vec<Role>;
|
||||
{
|
||||
let mut xact = client.transaction()?;
|
||||
roles = get_existing_roles(&mut xact)?;
|
||||
}
|
||||
});
|
||||
let databases: Vec<Database> = get_existing_dbs(&mut client)?.values().cloned().collect();
|
||||
|
||||
let roles = get_existing_roles_async(&client).await?;
|
||||
|
||||
let databases = get_existing_dbs_async(&client)
|
||||
.await?
|
||||
.into_values()
|
||||
.collect();
|
||||
|
||||
Ok(CatalogObjects { roles, databases })
|
||||
Ok(CatalogObjects { roles, databases })
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
||||
@@ -1,9 +1,37 @@
|
||||
use anyhow::{anyhow, Ok, Result};
|
||||
use postgres::Client;
|
||||
use tokio_postgres::NoTls;
|
||||
use tracing::{error, instrument, warn};
|
||||
|
||||
use crate::compute::ComputeNode;
|
||||
|
||||
/// Create a special service table for availability checks
|
||||
/// only if it does not exist already.
|
||||
pub fn create_availability_check_data(client: &mut Client) -> Result<()> {
|
||||
let query = "
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS(
|
||||
SELECT 1
|
||||
FROM pg_catalog.pg_tables
|
||||
WHERE tablename = 'health_check'
|
||||
)
|
||||
THEN
|
||||
CREATE TABLE health_check (
|
||||
id serial primary key,
|
||||
updated_at timestamptz default now()
|
||||
);
|
||||
INSERT INTO health_check VALUES (1, now())
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET updated_at = now();
|
||||
END IF;
|
||||
END
|
||||
$$;";
|
||||
client.execute(query, &[])?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update timestamp in a row in a special service table to check
|
||||
/// that we can actually write some data in this particular timeline.
|
||||
#[instrument(skip_all)]
|
||||
|
||||
@@ -1,21 +1,20 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::iter::once;
|
||||
use std::os::unix::fs::{symlink, PermissionsExt};
|
||||
use std::path::Path;
|
||||
use std::process::{Command, Stdio};
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::AtomicU32;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::{Arc, Condvar, Mutex, RwLock};
|
||||
use std::sync::{Condvar, Mutex, RwLock};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use compute_api::spec::{PgIdent, Role};
|
||||
use compute_api::spec::PgIdent;
|
||||
use futures::future::join_all;
|
||||
use futures::stream::FuturesUnordered;
|
||||
use futures::StreamExt;
|
||||
@@ -32,23 +31,15 @@ use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion};
|
||||
use utils::measured_stream::MeasuredReader;
|
||||
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use remote_storage::{DownloadError, RemotePath};
|
||||
use tokio::spawn;
|
||||
use url::Url;
|
||||
|
||||
use remote_storage::{DownloadError, RemotePath};
|
||||
|
||||
use crate::checker::create_availability_check_data;
|
||||
use crate::installed_extensions::get_installed_extensions_sync;
|
||||
use crate::local_proxy;
|
||||
use crate::logger::inlinify;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::spec::*;
|
||||
use crate::spec_apply::ApplySpecPhase::{
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSuperUser,
|
||||
DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions,
|
||||
RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase,
|
||||
};
|
||||
use crate::spec_apply::PerDatabasePhase::{
|
||||
ChangeSchemaPerms, DeleteDBRoleReferences, HandleAnonExtension,
|
||||
};
|
||||
use crate::spec_apply::{apply_operations, MutableApplyContext, DB};
|
||||
use crate::sync_sk::{check_if_synced, ping_safekeeper};
|
||||
use crate::{config, extension_server};
|
||||
|
||||
@@ -233,7 +224,10 @@ fn maybe_cgexec(cmd: &str) -> Command {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String {
|
||||
/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser
|
||||
/// that we give to customers
|
||||
#[instrument(skip_all)]
|
||||
fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
let roles = spec
|
||||
.cluster
|
||||
.roles
|
||||
@@ -302,8 +296,11 @@ pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String {
|
||||
$$;"#,
|
||||
roles_decl, database_decl,
|
||||
);
|
||||
|
||||
query
|
||||
info!("Neon superuser created: {}", inlinify(&query));
|
||||
client
|
||||
.simple_query(&query)
|
||||
.map_err(|e| anyhow::anyhow!(e).context(query))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl ComputeNode {
|
||||
@@ -816,14 +813,21 @@ impl ComputeNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_maintenance_client(url: &Url) -> Result<tokio_postgres::Client> {
|
||||
let mut connstr = url.clone();
|
||||
|
||||
/// Do initial configuration of the already started Postgres.
|
||||
#[instrument(skip_all)]
|
||||
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
|
||||
// If connection fails,
|
||||
// it may be the old node with `zenith_admin` superuser.
|
||||
//
|
||||
// In this case we need to connect with old `zenith_admin` name
|
||||
// and create new user. We cannot simply rename connected user,
|
||||
// but we can create a new one and grant it all privileges.
|
||||
let mut connstr = self.connstr.clone();
|
||||
connstr
|
||||
.query_pairs_mut()
|
||||
.append_pair("application_name", "apply_config");
|
||||
|
||||
let (client, conn) = match tokio_postgres::connect(connstr.as_str(), NoTls).await {
|
||||
let mut client = match Client::connect(connstr.as_str(), NoTls) {
|
||||
Err(e) => match e.code() {
|
||||
Some(&SqlState::INVALID_PASSWORD)
|
||||
| Some(&SqlState::INVALID_AUTHORIZATION_SPECIFICATION) => {
|
||||
@@ -841,8 +845,8 @@ impl ComputeNode {
|
||||
let mut client =
|
||||
Client::connect(zenith_admin_connstr.as_str(), NoTls)
|
||||
.context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
|
||||
|
||||
// Disable forwarding so that users don't get a cloud_admin role
|
||||
|
||||
let mut func = || {
|
||||
client.simple_query("SET neon.forward_ddl = false")?;
|
||||
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
|
||||
@@ -854,309 +858,49 @@ impl ComputeNode {
|
||||
drop(client);
|
||||
|
||||
// reconnect with connstring with expected name
|
||||
tokio_postgres::connect(connstr.as_str(), NoTls).await?
|
||||
Client::connect(connstr.as_str(), NoTls)?
|
||||
}
|
||||
_ => return Err(e.into()),
|
||||
},
|
||||
Ok((client, conn)) => (client, conn),
|
||||
Ok(client) => client,
|
||||
};
|
||||
|
||||
spawn(async move {
|
||||
if let Err(e) = conn.await {
|
||||
error!("maintenance client connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
// Disable DDL forwarding because control plane already knows about the roles/databases
|
||||
// we're about to modify.
|
||||
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
||||
client
|
||||
.simple_query("SET neon.forward_ddl = false")
|
||||
.await
|
||||
.context("apply_config SET neon.forward_ddl = false")?;
|
||||
|
||||
Ok(client)
|
||||
}
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
|
||||
create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?;
|
||||
cleanup_instance(&mut client).context("apply_config cleanup_instance")?;
|
||||
handle_roles(spec, &mut client).context("apply_config handle_roles")?;
|
||||
handle_databases(spec, &mut client).context("apply_config handle_databases")?;
|
||||
handle_role_deletions(spec, connstr.as_str(), &mut client)
|
||||
.context("apply_config handle_role_deletions")?;
|
||||
handle_grants(
|
||||
spec,
|
||||
&mut client,
|
||||
connstr.as_str(),
|
||||
self.has_feature(ComputeFeature::AnonExtension),
|
||||
)
|
||||
.context("apply_config handle_grants")?;
|
||||
handle_extensions(spec, &mut client).context("apply_config handle_extensions")?;
|
||||
handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?;
|
||||
create_availability_check_data(&mut client)
|
||||
.context("apply_config create_availability_check_data")?;
|
||||
|
||||
/// Apply the spec to the running PostgreSQL instance.
|
||||
/// The caller can decide to run with multiple clients in parallel, or
|
||||
/// single mode. Either way, the commands executed will be the same, and
|
||||
/// only commands run in different databases are parallelized.
|
||||
#[instrument(skip_all)]
|
||||
pub fn apply_spec_sql(
|
||||
&self,
|
||||
spec: Arc<ComputeSpec>,
|
||||
url: Arc<Url>,
|
||||
concurrency: usize,
|
||||
) -> Result<()> {
|
||||
let rt = tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()?;
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
|
||||
info!("Applying config with max {} concurrency", concurrency);
|
||||
debug!("Config: {:?}", spec);
|
||||
|
||||
rt.block_on(async {
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
let client = Self::get_maintenance_client(&url).await?;
|
||||
let spec = spec.clone();
|
||||
|
||||
let databases = get_existing_dbs_async(&client).await?;
|
||||
let roles = get_existing_roles_async(&client)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|role| (role.name.clone(), role))
|
||||
.collect::<HashMap<String, Role>>();
|
||||
|
||||
let jwks_roles = Arc::new(
|
||||
spec.as_ref()
|
||||
.local_proxy_config
|
||||
.iter()
|
||||
.flat_map(|it| &it.jwks)
|
||||
.flatten()
|
||||
.flat_map(|setting| &setting.role_names)
|
||||
.cloned()
|
||||
.collect::<HashSet<_>>(),
|
||||
);
|
||||
|
||||
let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext {
|
||||
roles,
|
||||
dbs: databases,
|
||||
}));
|
||||
|
||||
for phase in [
|
||||
CreateSuperUser,
|
||||
DropInvalidDatabases,
|
||||
RenameRoles,
|
||||
CreateAndAlterRoles,
|
||||
RenameAndDeleteDatabases,
|
||||
CreateAndAlterDatabases,
|
||||
] {
|
||||
debug!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
phase,
|
||||
|| async { Ok(&client) },
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
|
||||
|
||||
let db_processes = spec
|
||||
.cluster
|
||||
.databases
|
||||
.iter()
|
||||
.map(|db| DB::new(db.clone()))
|
||||
// include
|
||||
.chain(once(DB::SystemDB))
|
||||
.map(|db| {
|
||||
let spec = spec.clone();
|
||||
let ctx = ctx.clone();
|
||||
let jwks_roles = jwks_roles.clone();
|
||||
let mut url = url.as_ref().clone();
|
||||
let concurrency_token = concurrency_token.clone();
|
||||
let db = db.clone();
|
||||
|
||||
debug!("Applying per-database phases for Database {:?}", &db);
|
||||
|
||||
match &db {
|
||||
DB::SystemDB => {}
|
||||
DB::UserDB(db) => {
|
||||
url.set_path(db.name.as_str());
|
||||
}
|
||||
}
|
||||
|
||||
let url = Arc::new(url);
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
spec.clone(),
|
||||
url,
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
concurrency_token.clone(),
|
||||
db,
|
||||
);
|
||||
|
||||
Ok(spawn(fut))
|
||||
})
|
||||
.collect::<Vec<Result<_, anyhow::Error>>>();
|
||||
|
||||
for process in db_processes.into_iter() {
|
||||
let handle = process?;
|
||||
handle.await??;
|
||||
}
|
||||
|
||||
for phase in vec![
|
||||
HandleOtherExtensions,
|
||||
HandleNeonExtension,
|
||||
CreateAvailabilityCheck,
|
||||
DropRoles,
|
||||
] {
|
||||
debug!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
phase,
|
||||
|| async { Ok(&client) },
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
Ok::<(), anyhow::Error>(())
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Apply SQL migrations of the RunInEachDatabase phase.
|
||||
///
|
||||
/// May opt to not connect to databases that don't have any scheduled
|
||||
/// operations. The function is concurrency-controlled with the provided
|
||||
/// semaphore. The caller has to make sure the semaphore isn't exhausted.
|
||||
async fn apply_spec_sql_db(
|
||||
spec: Arc<ComputeSpec>,
|
||||
url: Arc<Url>,
|
||||
ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
|
||||
jwks_roles: Arc<HashSet<String>>,
|
||||
concurrency_token: Arc<tokio::sync::Semaphore>,
|
||||
db: DB,
|
||||
) -> Result<()> {
|
||||
let _permit = concurrency_token.acquire().await?;
|
||||
|
||||
let mut client_conn = None;
|
||||
|
||||
for subphase in [
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
] {
|
||||
apply_operations(
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
RunInEachDatabase {
|
||||
db: db.clone(),
|
||||
subphase,
|
||||
},
|
||||
// Only connect if apply_operation actually wants a connection.
|
||||
// It's quite possible this database doesn't need any queries,
|
||||
// so by not connecting we save time and effort connecting to
|
||||
// that database.
|
||||
|| async {
|
||||
if client_conn.is_none() {
|
||||
let db_client = Self::get_maintenance_client(&url).await?;
|
||||
client_conn.replace(db_client);
|
||||
}
|
||||
let client = client_conn.as_ref().unwrap();
|
||||
Ok(client)
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
drop(client_conn);
|
||||
|
||||
Ok::<(), anyhow::Error>(())
|
||||
}
|
||||
|
||||
/// Do initial configuration of the already started Postgres.
|
||||
#[instrument(skip_all)]
|
||||
pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
|
||||
// If connection fails,
|
||||
// it may be the old node with `zenith_admin` superuser.
|
||||
//
|
||||
// In this case we need to connect with old `zenith_admin` name
|
||||
// and create new user. We cannot simply rename connected user,
|
||||
// but we can create a new one and grant it all privileges.
|
||||
let mut url = self.connstr.clone();
|
||||
url.query_pairs_mut()
|
||||
.append_pair("application_name", "apply_config");
|
||||
|
||||
let url = Arc::new(url);
|
||||
let spec = Arc::new(
|
||||
compute_state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.expect("spec must be set")
|
||||
.spec
|
||||
.clone(),
|
||||
);
|
||||
|
||||
// Choose how many concurrent connections to use for applying the spec changes.
|
||||
// If the cluster is not currently Running we don't have to deal with user connections,
|
||||
// and can thus use all `max_connections` connection slots. However, that's generally not
|
||||
// very efficient, so we generally still limit it to a smaller number.
|
||||
let max_concurrent_connections = if compute_state.status != ComputeStatus::Running {
|
||||
// If the settings contain 'max_connections', use that as template
|
||||
if let Some(config) = spec.cluster.settings.find("max_connections") {
|
||||
config.parse::<usize>().ok()
|
||||
} else {
|
||||
// Otherwise, try to find the setting in the postgresql_conf string
|
||||
spec.cluster
|
||||
.postgresql_conf
|
||||
.iter()
|
||||
.flat_map(|conf| conf.split("\n"))
|
||||
.filter_map(|line| {
|
||||
if !line.contains("max_connections") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (key, value) = line.split_once("=")?;
|
||||
let key = key
|
||||
.trim_start_matches(char::is_whitespace)
|
||||
.trim_end_matches(char::is_whitespace);
|
||||
|
||||
let value = value
|
||||
.trim_start_matches(char::is_whitespace)
|
||||
.trim_end_matches(char::is_whitespace);
|
||||
|
||||
if key != "max_connections" {
|
||||
return None;
|
||||
}
|
||||
|
||||
value.parse::<usize>().ok()
|
||||
})
|
||||
.next()
|
||||
}
|
||||
// If max_connections is present, use at most 1/3rd of that.
|
||||
// When max_connections is lower than 30, try to use at least 10 connections, but
|
||||
// never more than max_connections.
|
||||
.map(|limit| match limit {
|
||||
0..10 => limit,
|
||||
10..30 => 10,
|
||||
30.. => limit / 3,
|
||||
})
|
||||
// If we didn't find max_connections, default to 10 concurrent connections.
|
||||
.unwrap_or(10)
|
||||
} else {
|
||||
// state == Running
|
||||
// Because the cluster is already in the Running state, we should assume users are
|
||||
// already connected to the cluster, and high concurrency could negatively
|
||||
// impact user connectivity. Therefore, we can limit concurrency to the number of
|
||||
// reserved superuser connections, which users wouldn't be able to use anyway.
|
||||
spec.cluster
|
||||
.settings
|
||||
.find("superuser_reserved_connections")
|
||||
.iter()
|
||||
.filter_map(|val| val.parse::<usize>().ok())
|
||||
.map(|val| if val > 1 { val - 1 } else { 1 })
|
||||
.last()
|
||||
.unwrap_or(3)
|
||||
};
|
||||
|
||||
// Merge-apply spec & changes to PostgreSQL state.
|
||||
self.apply_spec_sql(spec.clone(), url.clone(), max_concurrent_connections)?;
|
||||
|
||||
if let Some(ref local_proxy) = &spec.clone().local_proxy_config {
|
||||
if let Some(ref local_proxy) = spec.local_proxy_config {
|
||||
info!("configuring local_proxy");
|
||||
local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
|
||||
}
|
||||
|
||||
// Run migrations separately to not hold up cold starts
|
||||
thread::spawn(move || {
|
||||
let mut connstr = url.as_ref().clone();
|
||||
let mut connstr = connstr.clone();
|
||||
connstr
|
||||
.query_pairs_mut()
|
||||
.append_pair("application_name", "migrations");
|
||||
@@ -1164,8 +908,7 @@ impl ComputeNode {
|
||||
let mut client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
handle_migrations(&mut client).context("apply_config handle_migrations")
|
||||
});
|
||||
|
||||
Ok::<(), anyhow::Error>(())
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Wrapped this around `pg_ctl reload`, but right now we don't use
|
||||
@@ -1228,17 +971,33 @@ impl ComputeNode {
|
||||
config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
|
||||
self.pg_reload_conf()?;
|
||||
|
||||
let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
|
||||
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
||||
if spec.mode == ComputeMode::Primary {
|
||||
let mut url = self.connstr.clone();
|
||||
url.query_pairs_mut()
|
||||
.append_pair("application_name", "apply_config");
|
||||
let url = Arc::new(url);
|
||||
|
||||
let spec = Arc::new(spec.clone());
|
||||
|
||||
self.apply_spec_sql(spec, url, 1)?;
|
||||
client.simple_query("SET neon.forward_ddl = false")?;
|
||||
cleanup_instance(&mut client)?;
|
||||
handle_roles(&spec, &mut client)?;
|
||||
handle_databases(&spec, &mut client)?;
|
||||
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
|
||||
handle_grants(
|
||||
&spec,
|
||||
&mut client,
|
||||
self.connstr.as_str(),
|
||||
self.has_feature(ComputeFeature::AnonExtension),
|
||||
)?;
|
||||
handle_extensions(&spec, &mut client)?;
|
||||
handle_extension_neon(&mut client)?;
|
||||
// We can skip handle_migrations here because a new migration can only appear
|
||||
// if we have a new version of the compute_ctl binary, which can only happen
|
||||
// if compute got restarted, in which case we'll end up inside of apply_config
|
||||
// instead of reconfigure.
|
||||
}
|
||||
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
|
||||
@@ -116,7 +116,7 @@ pub fn write_postgres_conf(
|
||||
vartype: "enum".to_owned(),
|
||||
};
|
||||
|
||||
writeln!(file, "{}", opt.to_pg_setting())?;
|
||||
write!(file, "{}", opt.to_pg_setting())?;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@ use anyhow::Result;
|
||||
use hyper::header::CONTENT_TYPE;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Method, Request, Response, Server, StatusCode};
|
||||
use metrics::proto::MetricFamily;
|
||||
use metrics::Encoder;
|
||||
use metrics::TextEncoder;
|
||||
use tokio::task;
|
||||
@@ -73,22 +72,10 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
|
||||
(&Method::GET, "/metrics") => {
|
||||
debug!("serving /metrics GET request");
|
||||
|
||||
// When we call TextEncoder::encode() below, it will immediately
|
||||
// return an error if a metric family has no metrics, so we need to
|
||||
// preemptively filter out metric families with no metrics.
|
||||
let metrics = installed_extensions::collect()
|
||||
.into_iter()
|
||||
.filter(|m| !m.get_metric().is_empty())
|
||||
.collect::<Vec<MetricFamily>>();
|
||||
|
||||
let encoder = TextEncoder::new();
|
||||
let mut buffer = vec![];
|
||||
|
||||
if let Err(err) = encoder.encode(&metrics, &mut buffer) {
|
||||
let msg = format!("error handling /metrics request: {err}");
|
||||
error!(msg);
|
||||
return render_json_error(&msg, StatusCode::INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
let metrics = installed_extensions::collect();
|
||||
let encoder = TextEncoder::new();
|
||||
encoder.encode(&metrics, &mut buffer).unwrap();
|
||||
|
||||
match Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
|
||||
@@ -115,7 +115,7 @@ pub fn get_installed_extensions_sync(connstr: Url) -> Result<()> {
|
||||
|
||||
static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"compute_installed_extensions",
|
||||
"installed_extensions",
|
||||
"Number of databases where the version of extension is installed",
|
||||
&["extension_name", "version"]
|
||||
)
|
||||
|
||||
@@ -23,6 +23,5 @@ pub mod monitor;
|
||||
pub mod params;
|
||||
pub mod pg_helpers;
|
||||
pub mod spec;
|
||||
mod spec_apply;
|
||||
pub mod swap;
|
||||
pub mod sync_sk;
|
||||
|
||||
@@ -10,9 +10,9 @@ use std::thread::JoinHandle;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use futures::StreamExt;
|
||||
use ini::Ini;
|
||||
use notify::{RecursiveMode, Watcher};
|
||||
use postgres::{Client, Transaction};
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
use tokio::time::timeout;
|
||||
use tokio_postgres::NoTls;
|
||||
@@ -197,34 +197,27 @@ impl Escaping for PgIdent {
|
||||
}
|
||||
|
||||
/// Build a list of existing Postgres roles
|
||||
pub async fn get_existing_roles_async(client: &tokio_postgres::Client) -> Result<Vec<Role>> {
|
||||
let postgres_roles = client
|
||||
.query_raw::<str, &String, &[String; 0]>(
|
||||
"SELECT rolname, rolpassword FROM pg_catalog.pg_authid",
|
||||
&[],
|
||||
)
|
||||
.await?
|
||||
.filter_map(|row| async { row.ok() })
|
||||
pub fn get_existing_roles(xact: &mut Transaction<'_>) -> Result<Vec<Role>> {
|
||||
let postgres_roles = xact
|
||||
.query("SELECT rolname, rolpassword FROM pg_catalog.pg_authid", &[])?
|
||||
.iter()
|
||||
.map(|row| Role {
|
||||
name: row.get("rolname"),
|
||||
encrypted_password: row.get("rolpassword"),
|
||||
options: None,
|
||||
})
|
||||
.collect()
|
||||
.await;
|
||||
.collect();
|
||||
|
||||
Ok(postgres_roles)
|
||||
}
|
||||
|
||||
/// Build a list of existing Postgres databases
|
||||
pub async fn get_existing_dbs_async(
|
||||
client: &tokio_postgres::Client,
|
||||
) -> Result<HashMap<String, Database>> {
|
||||
pub fn get_existing_dbs(client: &mut Client) -> Result<HashMap<String, Database>> {
|
||||
// `pg_database.datconnlimit = -2` means that the database is in the
|
||||
// invalid state. See:
|
||||
// https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
|
||||
let rowstream = client
|
||||
.query_raw::<str, &String, &[String; 0]>(
|
||||
let postgres_dbs: Vec<Database> = client
|
||||
.query(
|
||||
"SELECT
|
||||
datname AS name,
|
||||
datdba::regrole::text AS owner,
|
||||
@@ -233,11 +226,8 @@ pub async fn get_existing_dbs_async(
|
||||
FROM
|
||||
pg_catalog.pg_database;",
|
||||
&[],
|
||||
)
|
||||
.await?;
|
||||
|
||||
let dbs_map = rowstream
|
||||
.filter_map(|r| async { r.ok() })
|
||||
)?
|
||||
.iter()
|
||||
.map(|row| Database {
|
||||
name: row.get("name"),
|
||||
owner: row.get("owner"),
|
||||
@@ -245,9 +235,12 @@ pub async fn get_existing_dbs_async(
|
||||
invalid: row.get("invalid"),
|
||||
options: None,
|
||||
})
|
||||
.collect();
|
||||
|
||||
let dbs_map = postgres_dbs
|
||||
.iter()
|
||||
.map(|db| (db.name.clone(), db.clone()))
|
||||
.collect::<HashMap<_, _>>()
|
||||
.await;
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
Ok(dbs_map)
|
||||
}
|
||||
|
||||
@@ -1,17 +1,22 @@
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use postgres::Client;
|
||||
use reqwest::StatusCode;
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use tracing::{error, info, instrument, warn};
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use postgres::config::Config;
|
||||
use postgres::{Client, NoTls};
|
||||
use reqwest::StatusCode;
|
||||
use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};
|
||||
|
||||
use crate::config;
|
||||
use crate::logger::inlinify;
|
||||
use crate::migration::MigrationRunner;
|
||||
use crate::params::PG_HBA_ALL_MD5;
|
||||
use crate::pg_helpers::*;
|
||||
|
||||
use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse};
|
||||
use compute_api::spec::ComputeSpec;
|
||||
use compute_api::spec::{ComputeSpec, PgIdent, Role};
|
||||
|
||||
// Do control plane request and return response if any. In case of error it
|
||||
// returns a bool flag indicating whether it makes sense to retry the request
|
||||
@@ -146,6 +151,625 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compute could be unexpectedly shut down, for example, during the
|
||||
/// database dropping. This leaves the database in the invalid state,
|
||||
/// which prevents new db creation with the same name. This function
|
||||
/// will clean it up before proceeding with catalog updates. All
|
||||
/// possible future cleanup operations may go here too.
|
||||
#[instrument(skip_all)]
|
||||
pub fn cleanup_instance(client: &mut Client) -> Result<()> {
|
||||
let existing_dbs = get_existing_dbs(client)?;
|
||||
|
||||
for (_, db) in existing_dbs {
|
||||
if db.invalid {
|
||||
// After recent commit in Postgres, interrupted DROP DATABASE
|
||||
// leaves the database in the invalid state. According to the
|
||||
// commit message, the only option for user is to drop it again.
|
||||
// See:
|
||||
// https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
|
||||
//
|
||||
// Postgres Neon extension is done the way, that db is de-registered
|
||||
// in the control plane metadata only after it is dropped. So there is
|
||||
// a chance that it still thinks that db should exist. This means
|
||||
// that it will be re-created by `handle_databases()`. Yet, it's fine
|
||||
// as user can just repeat drop (in vanilla Postgres they would need
|
||||
// to do the same, btw).
|
||||
let query = format!("DROP DATABASE IF EXISTS {}", db.name.pg_quote());
|
||||
info!("dropping invalid database {}", db.name);
|
||||
client.execute(query.as_str(), &[])?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Given a cluster spec json and open transaction it handles roles creation,
|
||||
/// deletion and update.
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
let mut xact = client.transaction()?;
|
||||
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
|
||||
|
||||
let mut jwks_roles = HashSet::new();
|
||||
if let Some(local_proxy) = &spec.local_proxy_config {
|
||||
for jwks_setting in local_proxy.jwks.iter().flatten() {
|
||||
for role_name in &jwks_setting.role_names {
|
||||
jwks_roles.insert(role_name.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print a list of existing Postgres roles (only in debug mode)
|
||||
if span_enabled!(Level::INFO) {
|
||||
let mut vec = Vec::new();
|
||||
for r in &existing_roles {
|
||||
vec.push(format!(
|
||||
"{}:{}",
|
||||
r.name,
|
||||
if r.encrypted_password.is_some() {
|
||||
"[FILTERED]"
|
||||
} else {
|
||||
"(null)"
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
info!("postgres roles (total {}): {:?}", vec.len(), vec);
|
||||
}
|
||||
|
||||
// Process delta operations first
|
||||
if let Some(ops) = &spec.delta_operations {
|
||||
info!("processing role renames");
|
||||
for op in ops {
|
||||
match op.action.as_ref() {
|
||||
"delete_role" => {
|
||||
// no-op now, roles will be deleted at the end of configuration
|
||||
}
|
||||
// Renaming role drops its password, since role name is
|
||||
// used as a salt there. It is important that this role
|
||||
// is recorded with a new `name` in the `roles` list.
|
||||
// Follow up roles update will set the new password.
|
||||
"rename_role" => {
|
||||
let new_name = op.new_name.as_ref().unwrap();
|
||||
|
||||
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
|
||||
if existing_roles.iter().any(|r| r.name == op.name) {
|
||||
let query: String = format!(
|
||||
"ALTER ROLE {} RENAME TO {}",
|
||||
op.name.pg_quote(),
|
||||
new_name.pg_quote()
|
||||
);
|
||||
|
||||
warn!("renaming role '{}' to '{}'", op.name, new_name);
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Refresh Postgres roles info to handle possible roles renaming
|
||||
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
|
||||
|
||||
info!(
|
||||
"handling cluster spec roles (total {})",
|
||||
spec.cluster.roles.len()
|
||||
);
|
||||
for role in &spec.cluster.roles {
|
||||
let name = &role.name;
|
||||
// XXX: with a limited number of roles it is fine, but consider making it a HashMap
|
||||
let pg_role = existing_roles.iter().find(|r| r.name == *name);
|
||||
|
||||
enum RoleAction {
|
||||
None,
|
||||
Update,
|
||||
Create,
|
||||
}
|
||||
let action = if let Some(r) = pg_role {
|
||||
if (r.encrypted_password.is_none() && role.encrypted_password.is_some())
|
||||
|| (r.encrypted_password.is_some() && role.encrypted_password.is_none())
|
||||
{
|
||||
RoleAction::Update
|
||||
} else if let Some(pg_pwd) = &r.encrypted_password {
|
||||
// Check whether password changed or not (trim 'md5' prefix first if any)
|
||||
//
|
||||
// This is a backward compatibility hack, which comes from the times when we were using
|
||||
// md5 for everyone and hashes were stored in the console db without md5 prefix. So when
|
||||
// role comes from the control-plane (json spec) `Role.encrypted_password` doesn't have md5 prefix,
|
||||
// but when role comes from Postgres (`get_existing_roles` / `existing_roles`) it has this prefix.
|
||||
// Here is the only place so far where we compare hashes, so it seems to be the best candidate
|
||||
// to place this compatibility layer.
|
||||
let pg_pwd = if let Some(stripped) = pg_pwd.strip_prefix("md5") {
|
||||
stripped
|
||||
} else {
|
||||
pg_pwd
|
||||
};
|
||||
if pg_pwd != *role.encrypted_password.as_ref().unwrap() {
|
||||
RoleAction::Update
|
||||
} else {
|
||||
RoleAction::None
|
||||
}
|
||||
} else {
|
||||
RoleAction::None
|
||||
}
|
||||
} else {
|
||||
RoleAction::Create
|
||||
};
|
||||
|
||||
match action {
|
||||
RoleAction::None => {}
|
||||
RoleAction::Update => {
|
||||
// This can be run on /every/ role! Not just ones created through the console.
|
||||
// This means that if you add some funny ALTER here that adds a permission,
|
||||
// this will get run even on user-created roles! This will result in different
|
||||
// behavior before and after a spec gets reapplied. The below ALTER as it stands
|
||||
// now only grants LOGIN and changes the password. Please do not allow this branch
|
||||
// to do anything silly.
|
||||
let mut query: String = format!("ALTER ROLE {} ", name.pg_quote());
|
||||
query.push_str(&role.to_pg_options());
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
}
|
||||
RoleAction::Create => {
|
||||
// This branch only runs when roles are created through the console, so it is
|
||||
// safe to add more permissions here. BYPASSRLS and REPLICATION are inherited
|
||||
// from neon_superuser.
|
||||
let mut query: String = format!(
|
||||
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
|
||||
name.pg_quote()
|
||||
);
|
||||
if jwks_roles.contains(name.as_str()) {
|
||||
query = format!("CREATE ROLE {}", name.pg_quote());
|
||||
}
|
||||
info!("running role create query: '{}'", &query);
|
||||
query.push_str(&role.to_pg_options());
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
}
|
||||
}
|
||||
|
||||
if span_enabled!(Level::INFO) {
|
||||
let pwd = if role.encrypted_password.is_some() {
|
||||
"[FILTERED]"
|
||||
} else {
|
||||
"(null)"
|
||||
};
|
||||
let action_str = match action {
|
||||
RoleAction::None => "",
|
||||
RoleAction::Create => " -> create",
|
||||
RoleAction::Update => " -> update",
|
||||
};
|
||||
info!(" - {}:{}{}", name, pwd, action_str);
|
||||
}
|
||||
}
|
||||
|
||||
xact.commit()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reassign all dependent objects and delete requested roles.
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
|
||||
if let Some(ops) = &spec.delta_operations {
|
||||
// First, reassign all dependent objects to db owners.
|
||||
info!("reassigning dependent objects of to-be-deleted roles");
|
||||
|
||||
// Fetch existing roles. We could've exported and used `existing_roles` from
|
||||
// `handle_roles()`, but we only make this list there before creating new roles.
|
||||
// Which is probably fine as we never create to-be-deleted roles, but that'd
|
||||
// just look a bit untidy. Anyway, the entire `pg_roles` should be in shared
|
||||
// buffers already, so this shouldn't be a big deal.
|
||||
let mut xact = client.transaction()?;
|
||||
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
|
||||
xact.commit()?;
|
||||
|
||||
for op in ops {
|
||||
// Check that role is still present in Postgres, as this could be a
|
||||
// restart with the same spec after role deletion.
|
||||
if op.action == "delete_role" && existing_roles.iter().any(|r| r.name == op.name) {
|
||||
reassign_owned_objects(spec, connstr, &op.name)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Second, proceed with role deletions.
|
||||
info!("processing role deletions");
|
||||
let mut xact = client.transaction()?;
|
||||
for op in ops {
|
||||
// We do not check either role exists or not,
|
||||
// Postgres will take care of it for us
|
||||
if op.action == "delete_role" {
|
||||
let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote());
|
||||
|
||||
warn!("deleting role '{}'", &op.name);
|
||||
xact.execute(query.as_str(), &[])?;
|
||||
}
|
||||
}
|
||||
xact.commit()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn reassign_owned_objects_in_one_db(
|
||||
conf: Config,
|
||||
role_name: &PgIdent,
|
||||
db_owner: &PgIdent,
|
||||
) -> Result<()> {
|
||||
let mut client = conf.connect(NoTls)?;
|
||||
|
||||
// This will reassign all dependent objects to the db owner
|
||||
let reassign_query = format!(
|
||||
"REASSIGN OWNED BY {} TO {}",
|
||||
role_name.pg_quote(),
|
||||
db_owner.pg_quote()
|
||||
);
|
||||
info!(
|
||||
"reassigning objects owned by '{}' in db '{}' to '{}'",
|
||||
role_name,
|
||||
conf.get_dbname().unwrap_or(""),
|
||||
db_owner
|
||||
);
|
||||
client.simple_query(&reassign_query)?;
|
||||
|
||||
// This now will only drop privileges of the role
|
||||
let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
|
||||
client.simple_query(&drop_query)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Reassign all owned objects in all databases to the owner of the database.
|
||||
fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent) -> Result<()> {
|
||||
for db in &spec.cluster.databases {
|
||||
if db.owner != *role_name {
|
||||
let mut conf = Config::from_str(connstr)?;
|
||||
conf.dbname(&db.name);
|
||||
reassign_owned_objects_in_one_db(conf, role_name, &db.owner)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Also handle case when there are no databases in the spec.
|
||||
// In this case we need to reassign objects in the default database.
|
||||
let conf = Config::from_str(connstr)?;
|
||||
let db_owner = PgIdent::from_str("cloud_admin")?;
|
||||
reassign_owned_objects_in_one_db(conf, role_name, &db_owner)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// It follows mostly the same logic as `handle_roles()` excepting that we
|
||||
/// does not use an explicit transactions block, since major database operations
|
||||
/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level
|
||||
/// atomicity should be enough here due to the order of operations and various checks,
|
||||
/// which together provide us idempotency.
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
let existing_dbs = get_existing_dbs(client)?;
|
||||
|
||||
// Print a list of existing Postgres databases (only in debug mode)
|
||||
if span_enabled!(Level::INFO) {
|
||||
let mut vec = Vec::new();
|
||||
for (dbname, db) in &existing_dbs {
|
||||
vec.push(format!("{}:{}", dbname, db.owner));
|
||||
}
|
||||
info!("postgres databases (total {}): {:?}", vec.len(), vec);
|
||||
}
|
||||
|
||||
// Process delta operations first
|
||||
if let Some(ops) = &spec.delta_operations {
|
||||
info!("processing delta operations on databases");
|
||||
for op in ops {
|
||||
match op.action.as_ref() {
|
||||
// We do not check either DB exists or not,
|
||||
// Postgres will take care of it for us
|
||||
"delete_db" => {
|
||||
// In Postgres we can't drop a database if it is a template.
|
||||
// So we need to unset the template flag first, but it could
|
||||
// be a retry, so we could've already dropped the database.
|
||||
// Check that database exists first to make it idempotent.
|
||||
let unset_template_query: String = format!(
|
||||
"
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS(
|
||||
SELECT 1
|
||||
FROM pg_catalog.pg_database
|
||||
WHERE datname = {}
|
||||
)
|
||||
THEN
|
||||
ALTER DATABASE {} is_template false;
|
||||
END IF;
|
||||
END
|
||||
$$;",
|
||||
escape_literal(&op.name),
|
||||
&op.name.pg_quote()
|
||||
);
|
||||
// Use FORCE to drop database even if there are active connections.
|
||||
// We run this from `cloud_admin`, so it should have enough privileges.
|
||||
// NB: there could be other db states, which prevent us from dropping
|
||||
// the database. For example, if db is used by any active subscription
|
||||
// or replication slot.
|
||||
// TODO: deal with it once we allow logical replication. Proper fix should
|
||||
// involve returning an error code to the control plane, so it could
|
||||
// figure out that this is a non-retryable error, return it to the user
|
||||
// and fail operation permanently.
|
||||
let drop_db_query: String = format!(
|
||||
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
|
||||
&op.name.pg_quote()
|
||||
);
|
||||
|
||||
warn!("deleting database '{}'", &op.name);
|
||||
client.execute(unset_template_query.as_str(), &[])?;
|
||||
client.execute(drop_db_query.as_str(), &[])?;
|
||||
}
|
||||
"rename_db" => {
|
||||
let new_name = op.new_name.as_ref().unwrap();
|
||||
|
||||
if existing_dbs.contains_key(&op.name) {
|
||||
let query: String = format!(
|
||||
"ALTER DATABASE {} RENAME TO {}",
|
||||
op.name.pg_quote(),
|
||||
new_name.pg_quote()
|
||||
);
|
||||
|
||||
warn!("renaming database '{}' to '{}'", op.name, new_name);
|
||||
client.execute(query.as_str(), &[])?;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Refresh Postgres databases info to handle possible renames
|
||||
let existing_dbs = get_existing_dbs(client)?;
|
||||
|
||||
info!(
|
||||
"handling cluster spec databases (total {})",
|
||||
spec.cluster.databases.len()
|
||||
);
|
||||
for db in &spec.cluster.databases {
|
||||
let name = &db.name;
|
||||
let pg_db = existing_dbs.get(name);
|
||||
|
||||
enum DatabaseAction {
|
||||
None,
|
||||
Update,
|
||||
Create,
|
||||
}
|
||||
let action = if let Some(r) = pg_db {
|
||||
// XXX: db owner name is returned as quoted string from Postgres,
|
||||
// when quoting is needed.
|
||||
let new_owner = if r.owner.starts_with('"') {
|
||||
db.owner.pg_quote()
|
||||
} else {
|
||||
db.owner.clone()
|
||||
};
|
||||
|
||||
if new_owner != r.owner {
|
||||
// Update the owner
|
||||
DatabaseAction::Update
|
||||
} else {
|
||||
DatabaseAction::None
|
||||
}
|
||||
} else {
|
||||
DatabaseAction::Create
|
||||
};
|
||||
|
||||
match action {
|
||||
DatabaseAction::None => {}
|
||||
DatabaseAction::Update => {
|
||||
let query: String = format!(
|
||||
"ALTER DATABASE {} OWNER TO {}",
|
||||
name.pg_quote(),
|
||||
db.owner.pg_quote()
|
||||
);
|
||||
let _guard = info_span!("executing", query).entered();
|
||||
client.execute(query.as_str(), &[])?;
|
||||
}
|
||||
DatabaseAction::Create => {
|
||||
let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
|
||||
query.push_str(&db.to_pg_options());
|
||||
let _guard = info_span!("executing", query).entered();
|
||||
client.execute(query.as_str(), &[])?;
|
||||
let grant_query: String = format!(
|
||||
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
|
||||
name.pg_quote()
|
||||
);
|
||||
client.execute(grant_query.as_str(), &[])?;
|
||||
}
|
||||
};
|
||||
|
||||
if span_enabled!(Level::INFO) {
|
||||
let action_str = match action {
|
||||
DatabaseAction::None => "",
|
||||
DatabaseAction::Create => " -> create",
|
||||
DatabaseAction::Update => " -> update",
|
||||
};
|
||||
info!(" - {}:{}{}", db.name, db.owner, action_str);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
|
||||
/// to allow users creating trusted extensions and re-creating `public` schema, for example.
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_grants(
|
||||
spec: &ComputeSpec,
|
||||
client: &mut Client,
|
||||
connstr: &str,
|
||||
enable_anon_extension: bool,
|
||||
) -> Result<()> {
|
||||
info!("modifying database permissions");
|
||||
let existing_dbs = get_existing_dbs(client)?;
|
||||
|
||||
// Do some per-database access adjustments. We'd better do this at db creation time,
|
||||
// but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
|
||||
// atomically.
|
||||
for db in &spec.cluster.databases {
|
||||
match existing_dbs.get(&db.name) {
|
||||
Some(pg_db) => {
|
||||
if pg_db.restrict_conn || pg_db.invalid {
|
||||
info!(
|
||||
"skipping grants for db {} (invalid: {}, connections not allowed: {})",
|
||||
db.name, pg_db.invalid, pg_db.restrict_conn
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
None => {
|
||||
bail!(
|
||||
"database {} doesn't exist in Postgres after handle_databases()",
|
||||
db.name
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let mut conf = Config::from_str(connstr)?;
|
||||
conf.dbname(&db.name);
|
||||
|
||||
let mut db_client = conf.connect(NoTls)?;
|
||||
|
||||
// This will only change ownership on the schema itself, not the objects
|
||||
// inside it. Without it owner of the `public` schema will be `cloud_admin`
|
||||
// and database owner cannot do anything with it. SQL procedure ensures
|
||||
// that it won't error out if schema `public` doesn't exist.
|
||||
let alter_query = format!(
|
||||
"DO $$\n\
|
||||
DECLARE\n\
|
||||
schema_owner TEXT;\n\
|
||||
BEGIN\n\
|
||||
IF EXISTS(\n\
|
||||
SELECT nspname\n\
|
||||
FROM pg_catalog.pg_namespace\n\
|
||||
WHERE nspname = 'public'\n\
|
||||
)\n\
|
||||
THEN\n\
|
||||
SELECT nspowner::regrole::text\n\
|
||||
FROM pg_catalog.pg_namespace\n\
|
||||
WHERE nspname = 'public'\n\
|
||||
INTO schema_owner;\n\
|
||||
\n\
|
||||
IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'\n\
|
||||
THEN\n\
|
||||
ALTER SCHEMA public OWNER TO {};\n\
|
||||
END IF;\n\
|
||||
END IF;\n\
|
||||
END\n\
|
||||
$$;",
|
||||
db.owner.pg_quote()
|
||||
);
|
||||
db_client.simple_query(&alter_query)?;
|
||||
|
||||
// Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user.
|
||||
// This is needed because since postgres 15 this privilege is removed by default.
|
||||
// TODO: web_access isn't created for almost 1 year. It could be that we have
|
||||
// active users of 1 year old projects, but hopefully not, so check it and
|
||||
// remove this code if possible. The worst thing that could happen is that
|
||||
// user won't be able to use public schema in NEW databases created in the
|
||||
// very OLD project.
|
||||
//
|
||||
// Also, alter default permissions so that relations created by extensions can be
|
||||
// used by neon_superuser without permission issues.
|
||||
let grant_query = "DO $$\n\
|
||||
BEGIN\n\
|
||||
IF EXISTS(\n\
|
||||
SELECT nspname\n\
|
||||
FROM pg_catalog.pg_namespace\n\
|
||||
WHERE nspname = 'public'\n\
|
||||
) AND\n\
|
||||
current_setting('server_version_num')::int/10000 >= 15\n\
|
||||
THEN\n\
|
||||
IF EXISTS(\n\
|
||||
SELECT rolname\n\
|
||||
FROM pg_catalog.pg_roles\n\
|
||||
WHERE rolname = 'web_access'\n\
|
||||
)\n\
|
||||
THEN\n\
|
||||
GRANT CREATE ON SCHEMA public TO web_access;\n\
|
||||
END IF;\n\
|
||||
END IF;\n\
|
||||
IF EXISTS(\n\
|
||||
SELECT nspname\n\
|
||||
FROM pg_catalog.pg_namespace\n\
|
||||
WHERE nspname = 'public'\n\
|
||||
)\n\
|
||||
THEN\n\
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;\n\
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;\n\
|
||||
END IF;\n\
|
||||
END\n\
|
||||
$$;"
|
||||
.to_string();
|
||||
|
||||
info!(
|
||||
"grant query for db {} : {}",
|
||||
&db.name,
|
||||
inlinify(&grant_query)
|
||||
);
|
||||
db_client.simple_query(&grant_query)?;
|
||||
|
||||
// it is important to run this after all grants
|
||||
if enable_anon_extension {
|
||||
handle_extension_anon(spec, &db.owner, &mut db_client, false)
|
||||
.context("handle_grants handle_extension_anon")?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create required system extensions
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
|
||||
if libs.contains("pg_stat_statements") {
|
||||
// Create extension only if this compute really needs it
|
||||
let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements";
|
||||
info!("creating system extensions with query: {}", query);
|
||||
client.simple_query(query)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run CREATE and ALTER EXTENSION neon UPDATE for postgres database
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
|
||||
info!("handle extension neon");
|
||||
|
||||
let mut query = "CREATE SCHEMA IF NOT EXISTS neon";
|
||||
client.simple_query(query)?;
|
||||
|
||||
query = "CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon";
|
||||
info!("create neon extension with query: {}", query);
|
||||
client.simple_query(query)?;
|
||||
|
||||
query = "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'";
|
||||
client.simple_query(query)?;
|
||||
|
||||
query = "ALTER EXTENSION neon SET SCHEMA neon";
|
||||
info!("alter neon extension schema with query: {}", query);
|
||||
client.simple_query(query)?;
|
||||
|
||||
// this will be a no-op if extension is already up to date,
|
||||
// which may happen in two cases:
|
||||
// - extension was just installed
|
||||
// - extension was already installed and is up to date
|
||||
let query = "ALTER EXTENSION neon UPDATE";
|
||||
info!("update neon extension version with query: {}", query);
|
||||
if let Err(e) = client.simple_query(query) {
|
||||
error!(
|
||||
"failed to upgrade neon extension during `handle_extension_neon`: {}",
|
||||
e
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
pub fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
|
||||
info!("handle neon extension upgrade");
|
||||
|
||||
@@ -1,680 +0,0 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::future::Future;
|
||||
use std::iter::empty;
|
||||
use std::iter::once;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::compute::construct_superuser_query;
|
||||
use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt};
|
||||
use anyhow::{bail, Result};
|
||||
use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role};
|
||||
use futures::future::join_all;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio_postgres::Client;
|
||||
use tracing::{debug, info_span, Instrument};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum DB {
|
||||
SystemDB,
|
||||
UserDB(Database),
|
||||
}
|
||||
|
||||
impl DB {
|
||||
pub fn new(db: Database) -> DB {
|
||||
Self::UserDB(db)
|
||||
}
|
||||
|
||||
pub fn is_owned_by(&self, role: &PgIdent) -> bool {
|
||||
match self {
|
||||
DB::SystemDB => false,
|
||||
DB::UserDB(db) => &db.owner == role,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for DB {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
DB::SystemDB => f.debug_tuple("SystemDB").finish(),
|
||||
DB::UserDB(db) => f.debug_tuple("UserDB").field(&db.name).finish(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum PerDatabasePhase {
|
||||
DeleteDBRoleReferences,
|
||||
ChangeSchemaPerms,
|
||||
HandleAnonExtension,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ApplySpecPhase {
|
||||
CreateSuperUser,
|
||||
DropInvalidDatabases,
|
||||
RenameRoles,
|
||||
CreateAndAlterRoles,
|
||||
RenameAndDeleteDatabases,
|
||||
CreateAndAlterDatabases,
|
||||
RunInEachDatabase { db: DB, subphase: PerDatabasePhase },
|
||||
HandleOtherExtensions,
|
||||
HandleNeonExtension,
|
||||
CreateAvailabilityCheck,
|
||||
DropRoles,
|
||||
}
|
||||
|
||||
pub struct Operation {
|
||||
pub query: String,
|
||||
pub comment: Option<String>,
|
||||
}
|
||||
|
||||
pub struct MutableApplyContext {
|
||||
pub roles: HashMap<String, Role>,
|
||||
pub dbs: HashMap<String, Database>,
|
||||
}
|
||||
|
||||
/// Appply the operations that belong to the given spec apply phase.
|
||||
///
|
||||
/// Commands within a single phase are executed in order of Iterator yield.
|
||||
/// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database
|
||||
/// indicated by its `db` field, and can share a single client for all changes
|
||||
/// to that database.
|
||||
///
|
||||
/// Notes:
|
||||
/// - Commands are pipelined, and thus may cause incomplete apply if one
|
||||
/// command of many fails.
|
||||
/// - Failing commands will fail the phase's apply step once the return value
|
||||
/// is processed.
|
||||
/// - No timeouts have (yet) been implemented.
|
||||
/// - The caller is responsible for limiting and/or applying concurrency.
|
||||
pub async fn apply_operations<'a, Fut, F>(
|
||||
spec: Arc<ComputeSpec>,
|
||||
ctx: Arc<RwLock<MutableApplyContext>>,
|
||||
jwks_roles: Arc<HashSet<String>>,
|
||||
apply_spec_phase: ApplySpecPhase,
|
||||
client: F,
|
||||
) -> Result<()>
|
||||
where
|
||||
F: FnOnce() -> Fut,
|
||||
Fut: Future<Output = Result<&'a Client>>,
|
||||
{
|
||||
debug!("Starting phase {:?}", &apply_spec_phase);
|
||||
let span = info_span!("db_apply_changes", phase=?apply_spec_phase);
|
||||
let span2 = span.clone();
|
||||
async move {
|
||||
debug!("Processing phase {:?}", &apply_spec_phase);
|
||||
let ctx = ctx;
|
||||
|
||||
let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase)
|
||||
.await?
|
||||
.peekable();
|
||||
|
||||
// Return (and by doing so, skip requesting the PostgreSQL client) if
|
||||
// we don't have any operations scheduled.
|
||||
if ops.peek().is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let client = client().await?;
|
||||
|
||||
debug!("Applying phase {:?}", &apply_spec_phase);
|
||||
|
||||
let active_queries = ops
|
||||
.map(|op| {
|
||||
let Operation { comment, query } = op;
|
||||
let inspan = match comment {
|
||||
None => span.clone(),
|
||||
Some(comment) => info_span!("phase {}: {}", comment),
|
||||
};
|
||||
|
||||
async {
|
||||
let query = query;
|
||||
let res = client.simple_query(&query).await;
|
||||
debug!(
|
||||
"{} {}",
|
||||
if res.is_ok() {
|
||||
"successfully executed"
|
||||
} else {
|
||||
"failed to execute"
|
||||
},
|
||||
query
|
||||
);
|
||||
res
|
||||
}
|
||||
.instrument(inspan)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
drop(ctx);
|
||||
|
||||
for it in join_all(active_queries).await {
|
||||
drop(it?);
|
||||
}
|
||||
|
||||
debug!("Completed phase {:?}", &apply_spec_phase);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
.instrument(span2)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Create a stream of operations to be executed for that phase of applying
|
||||
/// changes.
|
||||
///
|
||||
/// In the future we may generate a single stream of changes and then
|
||||
/// sort/merge/batch execution, but for now this is a nice way to improve
|
||||
/// batching behaviour of the commands.
|
||||
async fn get_operations<'a>(
|
||||
spec: &'a ComputeSpec,
|
||||
ctx: &'a RwLock<MutableApplyContext>,
|
||||
jwks_roles: &'a HashSet<String>,
|
||||
apply_spec_phase: &'a ApplySpecPhase,
|
||||
) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
|
||||
match apply_spec_phase {
|
||||
ApplySpecPhase::CreateSuperUser => {
|
||||
let query = construct_superuser_query(spec);
|
||||
|
||||
Ok(Box::new(once(Operation {
|
||||
query,
|
||||
comment: None,
|
||||
})))
|
||||
}
|
||||
ApplySpecPhase::DropInvalidDatabases => {
|
||||
let mut ctx = ctx.write().await;
|
||||
let databases = &mut ctx.dbs;
|
||||
|
||||
let keys: Vec<_> = databases
|
||||
.iter()
|
||||
.filter(|(_, db)| db.invalid)
|
||||
.map(|(dbname, _)| dbname.clone())
|
||||
.collect();
|
||||
|
||||
// After recent commit in Postgres, interrupted DROP DATABASE
|
||||
// leaves the database in the invalid state. According to the
|
||||
// commit message, the only option for user is to drop it again.
|
||||
// See:
|
||||
// https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9
|
||||
//
|
||||
// Postgres Neon extension is done the way, that db is de-registered
|
||||
// in the control plane metadata only after it is dropped. So there is
|
||||
// a chance that it still thinks that the db should exist. This means
|
||||
// that it will be re-created by the `CreateDatabases` phase. This
|
||||
// is fine, as user can just drop the table again (in vanilla
|
||||
// Postgres they would need to do the same).
|
||||
let operations = keys
|
||||
.into_iter()
|
||||
.filter_map(move |dbname| ctx.dbs.remove(&dbname))
|
||||
.map(|db| Operation {
|
||||
query: format!("DROP DATABASE IF EXISTS {}", db.name.pg_quote()),
|
||||
comment: Some(format!("Dropping invalid database {}", db.name)),
|
||||
});
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
ApplySpecPhase::RenameRoles => {
|
||||
let mut ctx = ctx.write().await;
|
||||
|
||||
let operations = spec
|
||||
.delta_operations
|
||||
.iter()
|
||||
.flatten()
|
||||
.filter(|op| op.action == "rename_role")
|
||||
.filter_map(move |op| {
|
||||
let roles = &mut ctx.roles;
|
||||
|
||||
if roles.contains_key(op.name.as_str()) {
|
||||
None
|
||||
} else {
|
||||
let new_name = op.new_name.as_ref().unwrap();
|
||||
let mut role = roles.remove(op.name.as_str()).unwrap();
|
||||
|
||||
role.name = new_name.clone();
|
||||
role.encrypted_password = None;
|
||||
roles.insert(role.name.clone(), role);
|
||||
|
||||
Some(Operation {
|
||||
query: format!(
|
||||
"ALTER ROLE {} RENAME TO {}",
|
||||
op.name.pg_quote(),
|
||||
new_name.pg_quote()
|
||||
),
|
||||
comment: Some(format!("renaming role '{}' to '{}'", op.name, new_name)),
|
||||
})
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
ApplySpecPhase::CreateAndAlterRoles => {
|
||||
let mut ctx = ctx.write().await;
|
||||
|
||||
let operations = spec.cluster.roles
|
||||
.iter()
|
||||
.filter_map(move |role| {
|
||||
let roles = &mut ctx.roles;
|
||||
let db_role = roles.get(&role.name);
|
||||
|
||||
match db_role {
|
||||
Some(db_role) => {
|
||||
if db_role.encrypted_password != role.encrypted_password {
|
||||
// This can be run on /every/ role! Not just ones created through the console.
|
||||
// This means that if you add some funny ALTER here that adds a permission,
|
||||
// this will get run even on user-created roles! This will result in different
|
||||
// behavior before and after a spec gets reapplied. The below ALTER as it stands
|
||||
// now only grants LOGIN and changes the password. Please do not allow this branch
|
||||
// to do anything silly.
|
||||
Some(Operation {
|
||||
query: format!(
|
||||
"ALTER ROLE {} {}",
|
||||
role.name.pg_quote(),
|
||||
role.to_pg_options(),
|
||||
),
|
||||
comment: None,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let query = if !jwks_roles.contains(role.name.as_str()) {
|
||||
format!(
|
||||
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}",
|
||||
role.name.pg_quote(),
|
||||
role.to_pg_options(),
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"CREATE ROLE {} {}",
|
||||
role.name.pg_quote(),
|
||||
role.to_pg_options(),
|
||||
)
|
||||
};
|
||||
Some(Operation {
|
||||
query,
|
||||
comment: Some(format!("creating role {}", role.name)),
|
||||
})
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
ApplySpecPhase::RenameAndDeleteDatabases => {
|
||||
let mut ctx = ctx.write().await;
|
||||
|
||||
let operations = spec
|
||||
.delta_operations
|
||||
.iter()
|
||||
.flatten()
|
||||
.filter_map(move |op| {
|
||||
let databases = &mut ctx.dbs;
|
||||
match op.action.as_str() {
|
||||
// We do not check whether the DB exists or not,
|
||||
// Postgres will take care of it for us
|
||||
"delete_db" => {
|
||||
// In Postgres we can't drop a database if it is a template.
|
||||
// So we need to unset the template flag first, but it could
|
||||
// be a retry, so we could've already dropped the database.
|
||||
// Check that database exists first to make it idempotent.
|
||||
let unset_template_query: String = format!(
|
||||
include_str!("sql/unset_template_for_drop_dbs.sql"),
|
||||
datname_str = escape_literal(&op.name),
|
||||
datname = &op.name.pg_quote()
|
||||
);
|
||||
|
||||
// Use FORCE to drop database even if there are active connections.
|
||||
// We run this from `cloud_admin`, so it should have enough privileges.
|
||||
// NB: there could be other db states, which prevent us from dropping
|
||||
// the database. For example, if db is used by any active subscription
|
||||
// or replication slot.
|
||||
// TODO: deal with it once we allow logical replication. Proper fix should
|
||||
// involve returning an error code to the control plane, so it could
|
||||
// figure out that this is a non-retryable error, return it to the user
|
||||
// and fail operation permanently.
|
||||
let drop_db_query: String = format!(
|
||||
"DROP DATABASE IF EXISTS {} WITH (FORCE)",
|
||||
&op.name.pg_quote()
|
||||
);
|
||||
|
||||
databases.remove(&op.name);
|
||||
|
||||
Some(vec![
|
||||
Operation {
|
||||
query: unset_template_query,
|
||||
comment: Some(format!(
|
||||
"optionally clearing template flags for DB {}",
|
||||
op.name,
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: drop_db_query,
|
||||
comment: Some(format!("deleting database {}", op.name,)),
|
||||
},
|
||||
])
|
||||
}
|
||||
"rename_db" => {
|
||||
if let Some(mut db) = databases.remove(&op.name) {
|
||||
// update state of known databases
|
||||
let new_name = op.new_name.as_ref().unwrap();
|
||||
db.name = new_name.clone();
|
||||
databases.insert(db.name.clone(), db);
|
||||
|
||||
Some(vec![Operation {
|
||||
query: format!(
|
||||
"ALTER DATABASE {} RENAME TO {}",
|
||||
op.name.pg_quote(),
|
||||
new_name.pg_quote(),
|
||||
),
|
||||
comment: Some(format!(
|
||||
"renaming database '{}' to '{}'",
|
||||
op.name, new_name
|
||||
)),
|
||||
}])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
.flatten();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
ApplySpecPhase::CreateAndAlterDatabases => {
|
||||
let mut ctx = ctx.write().await;
|
||||
|
||||
let operations = spec
|
||||
.cluster
|
||||
.databases
|
||||
.iter()
|
||||
.filter_map(move |db| {
|
||||
let databases = &mut ctx.dbs;
|
||||
if let Some(edb) = databases.get_mut(&db.name) {
|
||||
let change_owner = if edb.owner.starts_with('"') {
|
||||
db.owner.pg_quote() != edb.owner
|
||||
} else {
|
||||
db.owner != edb.owner
|
||||
};
|
||||
|
||||
edb.owner = db.owner.clone();
|
||||
|
||||
if change_owner {
|
||||
Some(vec![Operation {
|
||||
query: format!(
|
||||
"ALTER DATABASE {} OWNER TO {}",
|
||||
db.name.pg_quote(),
|
||||
db.owner.pg_quote()
|
||||
),
|
||||
comment: Some(format!(
|
||||
"changing database owner of database {} to {}",
|
||||
db.name, db.owner
|
||||
)),
|
||||
}])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
databases.insert(db.name.clone(), db.clone());
|
||||
|
||||
Some(vec![
|
||||
Operation {
|
||||
query: format!(
|
||||
"CREATE DATABASE {} {}",
|
||||
db.name.pg_quote(),
|
||||
db.to_pg_options(),
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
Operation {
|
||||
query: format!(
|
||||
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
|
||||
db.name.pg_quote()
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
])
|
||||
}
|
||||
})
|
||||
.flatten();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
ApplySpecPhase::RunInEachDatabase { db, subphase } => {
|
||||
match subphase {
|
||||
PerDatabasePhase::DeleteDBRoleReferences => {
|
||||
let ctx = ctx.read().await;
|
||||
|
||||
let operations =
|
||||
spec.delta_operations
|
||||
.iter()
|
||||
.flatten()
|
||||
.filter(|op| op.action == "delete_role")
|
||||
.filter_map(move |op| {
|
||||
if db.is_owned_by(&op.name) {
|
||||
return None;
|
||||
}
|
||||
if !ctx.roles.contains_key(&op.name) {
|
||||
return None;
|
||||
}
|
||||
let quoted = op.name.pg_quote();
|
||||
let new_owner = match &db {
|
||||
DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(),
|
||||
DB::UserDB(db) => db.owner.pg_quote(),
|
||||
};
|
||||
|
||||
Some(vec![
|
||||
// This will reassign all dependent objects to the db owner
|
||||
Operation {
|
||||
query: format!(
|
||||
"REASSIGN OWNED BY {} TO {}",
|
||||
quoted, new_owner,
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
// This now will only drop privileges of the role
|
||||
Operation {
|
||||
query: format!("DROP OWNED BY {}", quoted),
|
||||
comment: None,
|
||||
},
|
||||
])
|
||||
})
|
||||
.flatten();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
PerDatabasePhase::ChangeSchemaPerms => {
|
||||
let ctx = ctx.read().await;
|
||||
let databases = &ctx.dbs;
|
||||
|
||||
let db = match &db {
|
||||
// ignore schema permissions on the system database
|
||||
DB::SystemDB => return Ok(Box::new(empty())),
|
||||
DB::UserDB(db) => db,
|
||||
};
|
||||
|
||||
if databases.get(&db.name).is_none() {
|
||||
bail!("database {} doesn't exist in PostgreSQL", db.name);
|
||||
}
|
||||
|
||||
let edb = databases.get(&db.name).unwrap();
|
||||
|
||||
if edb.restrict_conn || edb.invalid {
|
||||
return Ok(Box::new(empty()));
|
||||
}
|
||||
|
||||
let operations = vec![
|
||||
Operation {
|
||||
query: format!(
|
||||
include_str!("sql/set_public_schema_owner.sql"),
|
||||
db_owner = db.owner.pg_quote()
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
Operation {
|
||||
query: String::from(include_str!("sql/default_grants.sql")),
|
||||
comment: None,
|
||||
},
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
PerDatabasePhase::HandleAnonExtension => {
|
||||
// Only install Anon into user databases
|
||||
let db = match &db {
|
||||
DB::SystemDB => return Ok(Box::new(empty())),
|
||||
DB::UserDB(db) => db,
|
||||
};
|
||||
// Never install Anon when it's not enabled as feature
|
||||
if !spec.features.contains(&ComputeFeature::AnonExtension) {
|
||||
return Ok(Box::new(empty()));
|
||||
}
|
||||
|
||||
// Only install Anon when it's added in preload libraries
|
||||
let opt_libs = spec.cluster.settings.find("shared_preload_libraries");
|
||||
|
||||
let libs = match opt_libs {
|
||||
Some(libs) => libs,
|
||||
None => return Ok(Box::new(empty())),
|
||||
};
|
||||
|
||||
if !libs.contains("anon") {
|
||||
return Ok(Box::new(empty()));
|
||||
}
|
||||
|
||||
let db_owner = db.owner.pg_quote();
|
||||
|
||||
let operations = vec![
|
||||
// Create anon extension if this compute needs it
|
||||
// Users cannot create it themselves, because superuser is required.
|
||||
Operation {
|
||||
query: String::from("CREATE EXTENSION IF NOT EXISTS anon CASCADE"),
|
||||
comment: Some(String::from("creating anon extension")),
|
||||
},
|
||||
// Initialize anon extension
|
||||
// This also requires superuser privileges, so users cannot do it themselves.
|
||||
Operation {
|
||||
query: String::from("SELECT anon.init()"),
|
||||
comment: Some(String::from("initializing anon extension data")),
|
||||
},
|
||||
Operation {
|
||||
query: format!("GRANT ALL ON SCHEMA anon TO {}", db_owner),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension schema permissions",
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: format!(
|
||||
"GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}",
|
||||
db_owner
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension schema functions permissions",
|
||||
)),
|
||||
},
|
||||
// We need this, because some functions are defined as SECURITY DEFINER.
|
||||
// In Postgres SECURITY DEFINER functions are executed with the privileges
|
||||
// of the owner.
|
||||
// In anon extension this it is needed to access some GUCs, which are only accessible to
|
||||
// superuser. But we've patched postgres to allow db_owner to access them as well.
|
||||
// So we need to change owner of these functions to db_owner.
|
||||
Operation {
|
||||
query: format!(
|
||||
include_str!("sql/anon_ext_fn_reassign.sql"),
|
||||
db_owner = db_owner,
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"change anon extension functions owner to database_owner",
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: format!(
|
||||
"GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}",
|
||||
db_owner,
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension tables permissions",
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: format!(
|
||||
"GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}",
|
||||
db_owner,
|
||||
),
|
||||
comment: Some(String::from(
|
||||
"granting anon extension sequences permissions",
|
||||
)),
|
||||
},
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
}
|
||||
}
|
||||
// Interestingly, we only install p_s_s in the main database, even when
|
||||
// it's preloaded.
|
||||
ApplySpecPhase::HandleOtherExtensions => {
|
||||
if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
|
||||
if libs.contains("pg_stat_statements") {
|
||||
return Ok(Box::new(once(Operation {
|
||||
query: String::from("CREATE EXTENSION IF NOT EXISTS pg_stat_statements"),
|
||||
comment: Some(String::from("create system extensions")),
|
||||
})));
|
||||
}
|
||||
}
|
||||
Ok(Box::new(empty()))
|
||||
}
|
||||
ApplySpecPhase::HandleNeonExtension => {
|
||||
let operations = vec![
|
||||
Operation {
|
||||
query: String::from("CREATE SCHEMA IF NOT EXISTS neon"),
|
||||
comment: Some(String::from("init: add schema for extension")),
|
||||
},
|
||||
Operation {
|
||||
query: String::from("CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"),
|
||||
comment: Some(String::from(
|
||||
"init: install the extension if not already installed",
|
||||
)),
|
||||
},
|
||||
Operation {
|
||||
query: String::from(
|
||||
"UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'",
|
||||
),
|
||||
comment: Some(String::from("compat/fix: make neon relocatable")),
|
||||
},
|
||||
Operation {
|
||||
query: String::from("ALTER EXTENSION neon SET SCHEMA neon"),
|
||||
comment: Some(String::from("compat/fix: alter neon extension schema")),
|
||||
},
|
||||
Operation {
|
||||
query: String::from("ALTER EXTENSION neon UPDATE"),
|
||||
comment: Some(String::from("compat/update: update neon extension version")),
|
||||
},
|
||||
]
|
||||
.into_iter();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
ApplySpecPhase::CreateAvailabilityCheck => Ok(Box::new(once(Operation {
|
||||
query: String::from(include_str!("sql/add_availabilitycheck_tables.sql")),
|
||||
comment: None,
|
||||
}))),
|
||||
ApplySpecPhase::DropRoles => {
|
||||
let operations = spec
|
||||
.delta_operations
|
||||
.iter()
|
||||
.flatten()
|
||||
.filter(|op| op.action == "delete_role")
|
||||
.map(|op| Operation {
|
||||
query: format!("DROP ROLE IF EXISTS {}", op.name.pg_quote()),
|
||||
comment: None,
|
||||
});
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS(
|
||||
SELECT 1
|
||||
FROM pg_catalog.pg_tables
|
||||
WHERE tablename = 'health_check'
|
||||
)
|
||||
THEN
|
||||
CREATE TABLE health_check (
|
||||
id serial primary key,
|
||||
updated_at timestamptz default now()
|
||||
);
|
||||
INSERT INTO health_check VALUES (1, now())
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET updated_at = now();
|
||||
END IF;
|
||||
END
|
||||
$$
|
||||
@@ -1,12 +0,0 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
query varchar;
|
||||
BEGIN
|
||||
FOR query IN SELECT 'ALTER FUNCTION '||nsp.nspname||'.'||p.proname||'('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {db_owner};'
|
||||
FROM pg_proc p
|
||||
JOIN pg_namespace nsp ON p.pronamespace = nsp.oid
|
||||
WHERE nsp.nspname = 'anon' LOOP
|
||||
EXECUTE query;
|
||||
END LOOP;
|
||||
END
|
||||
$$;
|
||||
@@ -1,30 +0,0 @@
|
||||
DO
|
||||
$$
|
||||
BEGIN
|
||||
IF EXISTS(
|
||||
SELECT nspname
|
||||
FROM pg_catalog.pg_namespace
|
||||
WHERE nspname = 'public'
|
||||
) AND
|
||||
current_setting('server_version_num')::int / 10000 >= 15
|
||||
THEN
|
||||
IF EXISTS(
|
||||
SELECT rolname
|
||||
FROM pg_catalog.pg_roles
|
||||
WHERE rolname = 'web_access'
|
||||
)
|
||||
THEN
|
||||
GRANT CREATE ON SCHEMA public TO web_access;
|
||||
END IF;
|
||||
END IF;
|
||||
IF EXISTS(
|
||||
SELECT nspname
|
||||
FROM pg_catalog.pg_namespace
|
||||
WHERE nspname = 'public'
|
||||
)
|
||||
THEN
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
@@ -1,23 +0,0 @@
|
||||
DO
|
||||
$$
|
||||
DECLARE
|
||||
schema_owner TEXT;
|
||||
BEGIN
|
||||
IF EXISTS(
|
||||
SELECT nspname
|
||||
FROM pg_catalog.pg_namespace
|
||||
WHERE nspname = 'public'
|
||||
)
|
||||
THEN
|
||||
SELECT nspowner::regrole::text
|
||||
FROM pg_catalog.pg_namespace
|
||||
WHERE nspname = 'public'
|
||||
INTO schema_owner;
|
||||
|
||||
IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'
|
||||
THEN
|
||||
ALTER SCHEMA public OWNER TO {db_owner};
|
||||
END IF;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
@@ -1,12 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS(
|
||||
SELECT 1
|
||||
FROM pg_catalog.pg_database
|
||||
WHERE datname = {datname_str}
|
||||
)
|
||||
THEN
|
||||
ALTER DATABASE {datname} is_template false;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
@@ -1153,7 +1153,6 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
|
||||
timeline_info.timeline_id
|
||||
);
|
||||
}
|
||||
// TODO: rename to import-basebackup-plus-wal
|
||||
TimelineCmd::Import(args) => {
|
||||
let tenant_id = get_tenant_id(args.tenant_id, env)?;
|
||||
let timeline_id = args.timeline_id;
|
||||
|
||||
@@ -33,6 +33,7 @@ reason = "the marvin attack only affects private key decryption, not public key
|
||||
[licenses]
|
||||
allow = [
|
||||
"Apache-2.0",
|
||||
"Artistic-2.0",
|
||||
"BSD-2-Clause",
|
||||
"BSD-3-Clause",
|
||||
"CC0-1.0",
|
||||
@@ -66,7 +67,7 @@ registries = []
|
||||
# More documentation about the 'bans' section can be found here:
|
||||
# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
|
||||
[bans]
|
||||
multiple-versions = "allow"
|
||||
multiple-versions = "warn"
|
||||
wildcards = "allow"
|
||||
highlight = "all"
|
||||
workspace-default-features = "allow"
|
||||
|
||||
@@ -113,21 +113,21 @@ so manual installation of dependencies is not recommended.
|
||||
A single virtual environment with all dependencies is described in the single `Pipfile`.
|
||||
|
||||
### Prerequisites
|
||||
- Install Python 3.11 (the minimal supported version) or greater.
|
||||
- Install Python 3.9 (the minimal supported version) or greater.
|
||||
- Our setup with poetry should work with newer python versions too. So feel free to open an issue with a `c/test-runner` label if something doesn't work as expected.
|
||||
- If you have some trouble with other version you can resolve it by installing Python 3.11 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
|
||||
- If you have some trouble with other version you can resolve it by installing Python 3.9 separately, via [pyenv](https://github.com/pyenv/pyenv) or via system package manager e.g.:
|
||||
```bash
|
||||
# In Ubuntu
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||
sudo apt update
|
||||
sudo apt install python3.11
|
||||
sudo apt install python3.9
|
||||
```
|
||||
- Install `poetry`
|
||||
- Exact version of `poetry` is not important, see installation instructions available at poetry's [website](https://python-poetry.org/docs/#installation).
|
||||
- Install dependencies via `./scripts/pysync`.
|
||||
- Note that CI uses specific Python version (look for `PYTHON_VERSION` [here](https://github.com/neondatabase/docker-images/blob/main/rust/Dockerfile))
|
||||
so if you have different version some linting tools can yield different result locally vs in the CI.
|
||||
- You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.11`.
|
||||
- You can explicitly specify which Python to use by running `poetry env use /path/to/python`, e.g. `poetry env use python3.9`.
|
||||
This may also disable the `The currently activated Python version X.Y.Z is not supported by the project` warning.
|
||||
|
||||
Run `poetry shell` to activate the virtual environment.
|
||||
|
||||
@@ -33,7 +33,6 @@ remote_storage.workspace = true
|
||||
postgres_backend.workspace = true
|
||||
nix = {workspace = true, optional = true}
|
||||
reqwest.workspace = true
|
||||
rand.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
bincode.workspace = true
|
||||
|
||||
@@ -97,15 +97,6 @@ pub struct ConfigToml {
|
||||
pub control_plane_api: Option<reqwest::Url>,
|
||||
pub control_plane_api_token: Option<String>,
|
||||
pub control_plane_emergency_mode: bool,
|
||||
/// Unstable feature: subject to change or removal without notice.
|
||||
/// See <https://github.com/neondatabase/neon/pull/9218>.
|
||||
pub import_pgdata_upcall_api: Option<reqwest::Url>,
|
||||
/// Unstable feature: subject to change or removal without notice.
|
||||
/// See <https://github.com/neondatabase/neon/pull/9218>.
|
||||
pub import_pgdata_upcall_api_token: Option<String>,
|
||||
/// Unstable feature: subject to change or removal without notice.
|
||||
/// See <https://github.com/neondatabase/neon/pull/9218>.
|
||||
pub import_pgdata_aws_endpoint_url: Option<reqwest::Url>,
|
||||
pub heatmap_upload_concurrency: usize,
|
||||
pub secondary_download_concurrency: usize,
|
||||
pub virtual_file_io_engine: Option<crate::models::virtual_file::IoEngineKind>,
|
||||
@@ -118,8 +109,6 @@ pub struct ConfigToml {
|
||||
pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub no_sync: Option<bool>,
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub server_side_batch_timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
@@ -328,8 +317,6 @@ pub mod defaults {
|
||||
pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
|
||||
|
||||
pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
|
||||
|
||||
pub const DEFAULT_SERVER_SIDE_BATCH_TIMEOUT: Option<&str> = None;
|
||||
}
|
||||
|
||||
impl Default for ConfigToml {
|
||||
@@ -395,10 +382,6 @@ impl Default for ConfigToml {
|
||||
control_plane_api_token: (None),
|
||||
control_plane_emergency_mode: (false),
|
||||
|
||||
import_pgdata_upcall_api: (None),
|
||||
import_pgdata_upcall_api_token: (None),
|
||||
import_pgdata_aws_endpoint_url: (None),
|
||||
|
||||
heatmap_upload_concurrency: (DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
|
||||
secondary_download_concurrency: (DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),
|
||||
|
||||
@@ -414,8 +397,6 @@ impl Default for ConfigToml {
|
||||
ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
|
||||
l0_flush: None,
|
||||
virtual_file_io_mode: None,
|
||||
server_side_batch_timeout: DEFAULT_SERVER_SIDE_BATCH_TIMEOUT
|
||||
.map(|duration| humantime::parse_duration(duration).unwrap()),
|
||||
tenant_config: TenantConfigToml::default(),
|
||||
no_sync: None,
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ pub struct ShardedRange<'a> {
|
||||
|
||||
// Calculate the size of a range within the blocks of the same relation, or spanning only the
|
||||
// top page in the previous relation's space.
|
||||
pub fn contiguous_range_len(range: &Range<Key>) -> u32 {
|
||||
fn contiguous_range_len(range: &Range<Key>) -> u32 {
|
||||
debug_assert!(is_contiguous_range(range));
|
||||
if range.start.field6 == 0xffffffff {
|
||||
range.end.field6 + 1
|
||||
@@ -67,7 +67,7 @@ pub fn contiguous_range_len(range: &Range<Key>) -> u32 {
|
||||
/// This matters, because:
|
||||
/// - Within such ranges, keys are used contiguously. Outside such ranges it is sparse.
|
||||
/// - Within such ranges, we may calculate distances using simple subtraction of field6.
|
||||
pub fn is_contiguous_range(range: &Range<Key>) -> bool {
|
||||
fn is_contiguous_range(range: &Range<Key>) -> bool {
|
||||
range.start.field1 == range.end.field1
|
||||
&& range.start.field2 == range.end.field2
|
||||
&& range.start.field3 == range.end.field3
|
||||
|
||||
@@ -2,8 +2,6 @@ pub mod detach_ancestor;
|
||||
pub mod partitioning;
|
||||
pub mod utilization;
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
use camino::Utf8PathBuf;
|
||||
pub use utilization::PageserverUtilization;
|
||||
|
||||
use std::{
|
||||
@@ -229,9 +227,6 @@ pub enum TimelineCreateRequestMode {
|
||||
// we continue to accept it by having it here.
|
||||
pg_version: Option<u32>,
|
||||
},
|
||||
ImportPgdata {
|
||||
import_pgdata: TimelineCreateRequestModeImportPgdata,
|
||||
},
|
||||
// NB: Bootstrap is all-optional, and thus the serde(untagged) will cause serde to stop at Bootstrap.
|
||||
// (serde picks the first matching enum variant, in declaration order).
|
||||
Bootstrap {
|
||||
@@ -241,42 +236,6 @@ pub enum TimelineCreateRequestMode {
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineCreateRequestModeImportPgdata {
|
||||
pub location: ImportPgdataLocation,
|
||||
pub idempotency_key: ImportPgdataIdempotencyKey,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||
pub enum ImportPgdataLocation {
|
||||
#[cfg(feature = "testing")]
|
||||
LocalFs { path: Utf8PathBuf },
|
||||
AwsS3 {
|
||||
region: String,
|
||||
bucket: String,
|
||||
/// A better name for this would be `prefix`; changing requires coordination with cplane.
|
||||
/// See <https://github.com/neondatabase/cloud/issues/20646>.
|
||||
key: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
#[serde(transparent)]
|
||||
pub struct ImportPgdataIdempotencyKey(pub String);
|
||||
|
||||
impl ImportPgdataIdempotencyKey {
|
||||
pub fn random() -> Self {
|
||||
use rand::{distributions::Alphanumeric, Rng};
|
||||
Self(
|
||||
rand::thread_rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
.take(20)
|
||||
.map(char::from)
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct LsnLeaseRequest {
|
||||
pub lsn: Lsn,
|
||||
|
||||
@@ -716,9 +716,6 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Proto looks like this:
|
||||
// FeMessage::Query("pagestream_v2{FeMessage::CopyData(PagesetreamFeMessage::GetPage(..))}")
|
||||
|
||||
async fn process_message(
|
||||
&mut self,
|
||||
handler: &mut impl Handler<IO>,
|
||||
@@ -834,7 +831,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
|
||||
use CopyStreamHandlerEnd::*;
|
||||
|
||||
let expected_end = match &end {
|
||||
ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF | Cancelled => true,
|
||||
ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF => true,
|
||||
CopyStreamHandlerEnd::Disconnected(ConnectionError::Io(io_error))
|
||||
if is_expected_io_error(io_error) =>
|
||||
{
|
||||
@@ -874,9 +871,6 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
|
||||
// message from server' when it receives ErrorResponse (anything but
|
||||
// CopyData/CopyDone) back.
|
||||
CopyFail => Some((end.to_string(), SQLSTATE_SUCCESSFUL_COMPLETION)),
|
||||
|
||||
// When cancelled, send no response: we must not risk blocking on sending that response
|
||||
Cancelled => None,
|
||||
_ => None,
|
||||
};
|
||||
if let Some((err, errcode)) = err_to_send_and_errcode {
|
||||
@@ -1054,8 +1048,6 @@ pub enum CopyStreamHandlerEnd {
|
||||
/// The connection was lost
|
||||
#[error("connection error: {0}")]
|
||||
Disconnected(#[from] ConnectionError),
|
||||
#[error("Shutdown")]
|
||||
Cancelled,
|
||||
/// Some other error
|
||||
#[error(transparent)]
|
||||
Other(#[from] anyhow::Error),
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
[package]
|
||||
name = "postgres_initdb"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
tokio.workspace = true
|
||||
camino.workspace = true
|
||||
thiserror.workspace = true
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
@@ -1,103 +0,0 @@
|
||||
//! The canonical way we run `initdb` in Neon.
|
||||
//!
|
||||
//! initdb has implicit defaults that are dependent on the environment, e.g., locales & collations.
|
||||
//!
|
||||
//! This module's job is to eliminate the environment-dependence as much as possible.
|
||||
|
||||
use std::fmt;
|
||||
|
||||
use camino::Utf8Path;
|
||||
|
||||
pub struct RunInitdbArgs<'a> {
|
||||
pub superuser: &'a str,
|
||||
pub locale: &'a str,
|
||||
pub initdb_bin: &'a Utf8Path,
|
||||
pub pg_version: u32,
|
||||
pub library_search_path: &'a Utf8Path,
|
||||
pub pgdata: &'a Utf8Path,
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub enum Error {
|
||||
Spawn(std::io::Error),
|
||||
Failed {
|
||||
status: std::process::ExitStatus,
|
||||
stderr: Vec<u8>,
|
||||
},
|
||||
WaitOutput(std::io::Error),
|
||||
Other(anyhow::Error),
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Error::Spawn(e) => write!(f, "Error spawning command: {:?}", e),
|
||||
Error::Failed { status, stderr } => write!(
|
||||
f,
|
||||
"Command failed with status {:?}: {}",
|
||||
status,
|
||||
String::from_utf8_lossy(stderr)
|
||||
),
|
||||
Error::WaitOutput(e) => write!(f, "Error waiting for command output: {:?}", e),
|
||||
Error::Other(e) => write!(f, "Error: {:?}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn do_run_initdb(args: RunInitdbArgs<'_>) -> Result<(), Error> {
|
||||
let RunInitdbArgs {
|
||||
superuser,
|
||||
locale,
|
||||
initdb_bin: initdb_bin_path,
|
||||
pg_version,
|
||||
library_search_path,
|
||||
pgdata,
|
||||
} = args;
|
||||
let mut initdb_command = tokio::process::Command::new(initdb_bin_path);
|
||||
initdb_command
|
||||
.args(["--pgdata", pgdata.as_ref()])
|
||||
.args(["--username", superuser])
|
||||
.args(["--encoding", "utf8"])
|
||||
.args(["--locale", locale])
|
||||
.arg("--no-instructions")
|
||||
.arg("--no-sync")
|
||||
.env_clear()
|
||||
.env("LD_LIBRARY_PATH", library_search_path)
|
||||
.env("DYLD_LIBRARY_PATH", library_search_path)
|
||||
.stdin(std::process::Stdio::null())
|
||||
// stdout invocation produces the same output every time, we don't need it
|
||||
.stdout(std::process::Stdio::null())
|
||||
// we would be interested in the stderr output, if there was any
|
||||
.stderr(std::process::Stdio::piped());
|
||||
|
||||
// Before version 14, only the libc provide was available.
|
||||
if pg_version > 14 {
|
||||
// Version 17 brought with it a builtin locale provider which only provides
|
||||
// C and C.UTF-8. While being safer for collation purposes since it is
|
||||
// guaranteed to be consistent throughout a major release, it is also more
|
||||
// performant.
|
||||
let locale_provider = if pg_version >= 17 { "builtin" } else { "libc" };
|
||||
|
||||
initdb_command.args(["--locale-provider", locale_provider]);
|
||||
}
|
||||
|
||||
let initdb_proc = initdb_command.spawn().map_err(Error::Spawn)?;
|
||||
|
||||
// Ideally we'd select here with the cancellation token, but the problem is that
|
||||
// we can't safely terminate initdb: it launches processes of its own, and killing
|
||||
// initdb doesn't kill them. After we return from this function, we want the target
|
||||
// directory to be able to be cleaned up.
|
||||
// See https://github.com/neondatabase/neon/issues/6385
|
||||
let initdb_output = initdb_proc
|
||||
.wait_with_output()
|
||||
.await
|
||||
.map_err(Error::WaitOutput)?;
|
||||
if !initdb_output.status.success() {
|
||||
return Err(Error::Failed {
|
||||
status: initdb_output.status,
|
||||
stderr: initdb_output.stderr,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -185,7 +185,7 @@ pub struct CancelKeyData {
|
||||
impl fmt::Display for CancelKeyData {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let hi = (self.backend_pid as u64) << 32;
|
||||
let lo = (self.cancel_key as u64) & 0xffffffff;
|
||||
let lo = self.cancel_key as u64;
|
||||
let id = hi | lo;
|
||||
|
||||
// This format is more compact and might work better for logs.
|
||||
@@ -1046,13 +1046,4 @@ mod tests {
|
||||
let data = [0, 0, 0, 7, 0, 0, 0, 0];
|
||||
FeStartupPacket::parse(&mut BytesMut::from_iter(data)).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancel_key_data() {
|
||||
let key = CancelKeyData {
|
||||
backend_pid: -1817212860,
|
||||
cancel_key: -1183897012,
|
||||
};
|
||||
assert_eq!(format!("{key}"), "CancelKeyData(93af8844b96f2a4c)");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,7 +24,6 @@ use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerCl
|
||||
use bytes::Bytes;
|
||||
use futures::future::Either;
|
||||
use futures::stream::Stream;
|
||||
use futures::FutureExt;
|
||||
use futures_util::StreamExt;
|
||||
use futures_util::TryStreamExt;
|
||||
use http_types::{StatusCode, Url};
|
||||
@@ -32,7 +31,6 @@ use scopeguard::ScopeGuard;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::debug;
|
||||
use utils::backoff;
|
||||
use utils::backoff::exponential_backoff_duration_seconds;
|
||||
|
||||
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
|
||||
use crate::{
|
||||
@@ -99,7 +97,10 @@ impl AzureBlobStorage {
|
||||
|
||||
pub fn relative_path_to_name(&self, path: &RemotePath) -> String {
|
||||
assert_eq!(std::path::MAIN_SEPARATOR, REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
let path_string = path.get_path().as_str();
|
||||
let path_string = path
|
||||
.get_path()
|
||||
.as_str()
|
||||
.trim_end_matches(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
match &self.prefix_in_container {
|
||||
Some(prefix) => {
|
||||
if prefix.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
|
||||
@@ -276,14 +277,19 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
cancel: &CancellationToken,
|
||||
) -> impl Stream<Item = Result<Listing, DownloadError>> {
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let list_prefix = prefix.map(|p| self.relative_path_to_name(p)).or_else(|| {
|
||||
self.prefix_in_container.clone().map(|mut s| {
|
||||
if !s.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
|
||||
s.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
let list_prefix = prefix
|
||||
.map(|p| self.relative_path_to_name(p))
|
||||
.or_else(|| self.prefix_in_container.clone())
|
||||
.map(|mut p| {
|
||||
// required to end with a separator
|
||||
// otherwise request will return only the entry of a prefix
|
||||
if matches!(mode, ListingMode::WithDelimiter)
|
||||
&& !p.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR)
|
||||
{
|
||||
p.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
}
|
||||
s
|
||||
})
|
||||
});
|
||||
p
|
||||
});
|
||||
|
||||
async_stream::stream! {
|
||||
let _permit = self.permit(RequestKind::List, cancel).await?;
|
||||
@@ -304,59 +310,40 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
|
||||
let mut next_marker = None;
|
||||
|
||||
let mut timeout_try_cnt = 1;
|
||||
|
||||
'outer: loop {
|
||||
let mut builder = builder.clone();
|
||||
if let Some(marker) = next_marker.clone() {
|
||||
builder = builder.marker(marker);
|
||||
}
|
||||
// Azure Blob Rust SDK does not expose the list blob API directly. Users have to use
|
||||
// their pageable iterator wrapper that returns all keys as a stream. We want to have
|
||||
// full control of paging, and therefore we only take the first item from the stream.
|
||||
let mut response_stream = builder.into_stream();
|
||||
let response = response_stream.next();
|
||||
// Timeout mechanism: Azure client will sometimes stuck on a request, but retrying that request
|
||||
// would immediately succeed. Therefore, we use exponential backoff timeout to retry the request.
|
||||
// (Usually, exponential backoff is used to determine the sleep time between two retries.) We
|
||||
// start with 10.0 second timeout, and double the timeout for each failure, up to 5 failures.
|
||||
// timeout = min(5 * (1.0+1.0)^n, self.timeout).
|
||||
let this_timeout = (5.0 * exponential_backoff_duration_seconds(timeout_try_cnt, 1.0, self.timeout.as_secs_f64())).min(self.timeout.as_secs_f64());
|
||||
let response = tokio::time::timeout(Duration::from_secs_f64(this_timeout), response);
|
||||
let response = response.map(|res| {
|
||||
match res {
|
||||
Ok(Some(Ok(res))) => Ok(Some(res)),
|
||||
Ok(Some(Err(e))) => Err(to_download_error(e)),
|
||||
Ok(None) => Ok(None),
|
||||
Err(_elasped) => Err(DownloadError::Timeout),
|
||||
}
|
||||
let response = builder.into_stream();
|
||||
let response = response.into_stream().map_err(to_download_error);
|
||||
let response = tokio_stream::StreamExt::timeout(response, self.timeout);
|
||||
let response = response.map(|res| match res {
|
||||
Ok(res) => res,
|
||||
Err(_elapsed) => Err(DownloadError::Timeout),
|
||||
});
|
||||
|
||||
let mut response = std::pin::pin!(response);
|
||||
|
||||
let mut max_keys = max_keys.map(|mk| mk.get());
|
||||
let next_item = tokio::select! {
|
||||
op = response => op,
|
||||
op = response.next() => Ok(op),
|
||||
_ = cancel.cancelled() => Err(DownloadError::Cancelled),
|
||||
};
|
||||
|
||||
if let Err(DownloadError::Timeout) = &next_item {
|
||||
timeout_try_cnt += 1;
|
||||
if timeout_try_cnt <= 5 {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let next_item = next_item?;
|
||||
|
||||
if timeout_try_cnt >= 2 {
|
||||
tracing::warn!("Azure Blob Storage list timed out and succeeded after {} tries", timeout_try_cnt);
|
||||
}
|
||||
timeout_try_cnt = 1;
|
||||
|
||||
}?;
|
||||
let Some(entry) = next_item else {
|
||||
// The list is complete, so yield it.
|
||||
break;
|
||||
};
|
||||
|
||||
let mut res = Listing::default();
|
||||
let entry = match entry {
|
||||
Ok(entry) => entry,
|
||||
Err(e) => {
|
||||
// The error is potentially retryable, so we must rewind the loop after yielding.
|
||||
yield Err(e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
next_marker = entry.continuation();
|
||||
let prefix_iter = entry
|
||||
.blobs
|
||||
@@ -372,7 +359,7 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
last_modified: k.properties.last_modified.into(),
|
||||
size: k.properties.content_length,
|
||||
}
|
||||
);
|
||||
);
|
||||
|
||||
for key in blob_iter {
|
||||
res.keys.push(key);
|
||||
|
||||
@@ -26,16 +26,6 @@ pub struct RemoteStorageConfig {
|
||||
pub timeout: Duration,
|
||||
}
|
||||
|
||||
impl RemoteStorageKind {
|
||||
pub fn bucket_name(&self) -> Option<&str> {
|
||||
match self {
|
||||
RemoteStorageKind::LocalFs { .. } => None,
|
||||
RemoteStorageKind::AwsS3(config) => Some(&config.bucket_name),
|
||||
RemoteStorageKind::AzureContainer(config) => Some(&config.container_name),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_timeout() -> Duration {
|
||||
RemoteStorageConfig::DEFAULT_TIMEOUT
|
||||
}
|
||||
@@ -188,14 +178,6 @@ impl RemoteStorageConfig {
|
||||
pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result<RemoteStorageConfig> {
|
||||
Ok(utils::toml_edit_ext::deserialize_item(toml)?)
|
||||
}
|
||||
|
||||
pub fn from_toml_str(input: &str) -> anyhow::Result<RemoteStorageConfig> {
|
||||
let toml_document = toml_edit::DocumentMut::from_str(input)?;
|
||||
if let Some(item) = toml_document.get("remote_storage") {
|
||||
return Self::from_toml(item);
|
||||
}
|
||||
Self::from_toml(toml_document.as_item())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -203,7 +185,8 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse(input: &str) -> anyhow::Result<RemoteStorageConfig> {
|
||||
RemoteStorageConfig::from_toml_str(input)
|
||||
let toml = input.parse::<toml_edit::DocumentMut>().unwrap();
|
||||
RemoteStorageConfig::from_toml(toml.as_item())
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -360,12 +360,7 @@ impl RemoteStorage for LocalFs {
|
||||
let mut objects = Vec::with_capacity(keys.len());
|
||||
for key in keys {
|
||||
let path = key.with_base(&self.storage_root);
|
||||
let metadata = file_metadata(&path).await;
|
||||
if let Err(DownloadError::NotFound) = metadata {
|
||||
// Race: if the file is deleted between listing and metadata check, ignore it.
|
||||
continue;
|
||||
}
|
||||
let metadata = metadata?;
|
||||
let metadata = file_metadata(&path).await?;
|
||||
if metadata.is_dir() {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -29,7 +29,6 @@ jsonwebtoken.workspace = true
|
||||
nix.workspace = true
|
||||
once_cell.workspace = true
|
||||
pin-project-lite.workspace = true
|
||||
pprof.workspace = true
|
||||
regex.workspace = true
|
||||
routerify.workspace = true
|
||||
serde.workspace = true
|
||||
|
||||
@@ -50,8 +50,8 @@ REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"|
|
||||
declare -i WAL_SIZE=$REDO_POS+114
|
||||
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start
|
||||
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate
|
||||
cp "$DATA_DIR"/pg_wal/000000010000000000000001 "$DATA_DIR"
|
||||
cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
|
||||
cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
|
||||
for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
|
||||
dd if="$DATA_DIR"/000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
|
||||
rm -f "$DATA_DIR"/000000010000000000000001
|
||||
dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
|
||||
rm -f 000000010000000000000001
|
||||
|
||||
@@ -14,8 +14,8 @@ REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"|
|
||||
declare -i WAL_SIZE=$REDO_POS+114
|
||||
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start
|
||||
"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate
|
||||
cp "$DATA_DIR"/pg_wal/000000010000000000000001 "$DATA_DIR"
|
||||
cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
|
||||
cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
|
||||
for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
|
||||
dd if="$DATA_DIR"/000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
|
||||
rm -f "$DATA_DIR"/000000010000000000000001
|
||||
dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
|
||||
rm -f 000000010000000000000001
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use crate::auth::{AuthError, Claims, SwappableJwtAuth};
|
||||
use crate::http::error::{api_error_handler, route_error_handler, ApiError};
|
||||
use crate::http::request::{get_query_param, parse_query_param};
|
||||
use anyhow::{anyhow, Context};
|
||||
use hyper::header::{HeaderName, AUTHORIZATION, CONTENT_DISPOSITION};
|
||||
use anyhow::Context;
|
||||
use hyper::header::{HeaderName, AUTHORIZATION};
|
||||
use hyper::http::HeaderValue;
|
||||
use hyper::Method;
|
||||
use hyper::{header::CONTENT_TYPE, Body, Request, Response};
|
||||
@@ -13,13 +12,11 @@ use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
|
||||
use tracing::{debug, info, info_span, warn, Instrument};
|
||||
|
||||
use std::future::Future;
|
||||
use std::io::Write as _;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use pprof::protos::Message as _;
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
use std::io::Write as _;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
|
||||
static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
|
||||
@@ -331,82 +328,6 @@ pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Generates CPU profiles.
|
||||
pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
enum Format {
|
||||
Pprof,
|
||||
Svg,
|
||||
}
|
||||
|
||||
// Parameters.
|
||||
let format = match get_query_param(&req, "format")?.as_deref() {
|
||||
None => Format::Pprof,
|
||||
Some("pprof") => Format::Pprof,
|
||||
Some("svg") => Format::Svg,
|
||||
Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))),
|
||||
};
|
||||
let seconds = match parse_query_param(&req, "seconds")? {
|
||||
None => 5,
|
||||
Some(seconds @ 1..=30) => seconds,
|
||||
Some(_) => return Err(ApiError::BadRequest(anyhow!("duration must be 1-30 secs"))),
|
||||
};
|
||||
let frequency_hz = match parse_query_param(&req, "frequency")? {
|
||||
None => 99,
|
||||
Some(1001..) => return Err(ApiError::BadRequest(anyhow!("frequency must be <=1000 Hz"))),
|
||||
Some(frequency) => frequency,
|
||||
};
|
||||
|
||||
// Only allow one profiler at a time.
|
||||
static PROFILE_LOCK: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));
|
||||
let _lock = PROFILE_LOCK
|
||||
.try_lock()
|
||||
.map_err(|_| ApiError::Conflict("profiler already running".into()))?;
|
||||
|
||||
// Take the profile.
|
||||
let report = tokio::task::spawn_blocking(move || {
|
||||
let guard = pprof::ProfilerGuardBuilder::default()
|
||||
.frequency(frequency_hz)
|
||||
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
|
||||
.build()?;
|
||||
std::thread::sleep(Duration::from_secs(seconds));
|
||||
guard.report().build()
|
||||
})
|
||||
.await
|
||||
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
|
||||
.map_err(|pprof_err| ApiError::InternalServerError(pprof_err.into()))?;
|
||||
|
||||
// Return the report in the requested format.
|
||||
match format {
|
||||
Format::Pprof => {
|
||||
let mut body = Vec::new();
|
||||
report
|
||||
.pprof()
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))?
|
||||
.write_to_vec(&mut body)
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))?;
|
||||
|
||||
Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, "application/octet-stream")
|
||||
.header(CONTENT_DISPOSITION, "attachment; filename=\"profile.pb\"")
|
||||
.body(Body::from(body))
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))
|
||||
}
|
||||
|
||||
Format::Svg => {
|
||||
let mut body = Vec::new();
|
||||
report
|
||||
.flamegraph(&mut body)
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))?;
|
||||
Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, "image/svg+xml")
|
||||
.body(Body::from(body))
|
||||
.map_err(|err| ApiError::InternalServerError(err.into()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
|
||||
) -> Middleware<B, ApiError> {
|
||||
Middleware::pre(move |req| async move {
|
||||
|
||||
@@ -5,7 +5,6 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::error::ApiError;
|
||||
|
||||
/// Parse a json request body and deserialize it to the type `T`.
|
||||
pub async fn json_request<T: for<'de> Deserialize<'de>>(
|
||||
request: &mut Request<Body>,
|
||||
) -> Result<T, ApiError> {
|
||||
@@ -28,27 +27,6 @@ pub async fn json_request<T: for<'de> Deserialize<'de>>(
|
||||
.map_err(ApiError::BadRequest)
|
||||
}
|
||||
|
||||
/// Parse a json request body and deserialize it to the type `T`. If the body is empty, return `T::default`.
|
||||
pub async fn json_request_maybe<T: for<'de> Deserialize<'de> + Default>(
|
||||
request: &mut Request<Body>,
|
||||
) -> Result<T, ApiError> {
|
||||
let body = hyper::body::aggregate(request.body_mut())
|
||||
.await
|
||||
.context("Failed to read request body")
|
||||
.map_err(ApiError::BadRequest)?;
|
||||
|
||||
if body.remaining() == 0 {
|
||||
return Ok(T::default());
|
||||
}
|
||||
|
||||
let mut deser = serde_json::de::Deserializer::from_reader(body.reader());
|
||||
|
||||
serde_path_to_error::deserialize(&mut deser)
|
||||
// intentionally stringify because the debug version is not helpful in python logs
|
||||
.map_err(|e| anyhow::anyhow!("Failed to parse json request: {e}"))
|
||||
.map_err(ApiError::BadRequest)
|
||||
}
|
||||
|
||||
pub fn json_response<T: Serialize>(
|
||||
status: StatusCode,
|
||||
data: T,
|
||||
|
||||
@@ -30,7 +30,7 @@ pub fn parse_request_param<T: FromStr>(
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_query_param<'a>(
|
||||
fn get_query_param<'a>(
|
||||
request: &'a Request<Body>,
|
||||
param_name: &str,
|
||||
) -> Result<Option<Cow<'a, str>>, ApiError> {
|
||||
|
||||
@@ -83,9 +83,7 @@ where
|
||||
}
|
||||
wake_these.push(self.heap.pop().unwrap().wake_channel);
|
||||
}
|
||||
if !wake_these.is_empty() {
|
||||
self.update_status();
|
||||
}
|
||||
self.update_status();
|
||||
wake_these
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,6 @@ postgres.workspace = true
|
||||
postgres_backend.workspace = true
|
||||
postgres-protocol.workspace = true
|
||||
postgres-types.workspace = true
|
||||
postgres_initdb.workspace = true
|
||||
rand.workspace = true
|
||||
range-set-blaze = { version = "0.1.16", features = ["alloc"] }
|
||||
regex.workspace = true
|
||||
@@ -69,7 +68,6 @@ url.workspace = true
|
||||
walkdir.workspace = true
|
||||
metrics.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
pageserver_client.workspace = true # for ResponseErrorMessageExt TOOD refactor that
|
||||
pageserver_compaction.workspace = true
|
||||
postgres_connection.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
@@ -86,7 +84,6 @@ enumset = { workspace = true, features = ["serde"]}
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
wal_decoder.workspace = true
|
||||
smallvec.workspace = true
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
procfs.workspace = true
|
||||
|
||||
@@ -167,7 +167,6 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
16384,
|
||||
virtual_file::io_engine_for_bench(),
|
||||
conf.virtual_file_io_mode,
|
||||
virtual_file::SyncMode::Sync,
|
||||
);
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ postgres_ffi.workspace = true
|
||||
thiserror.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
toml_edit.workspace = true
|
||||
utils.workspace = true
|
||||
svg_fmt.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
|
||||
@@ -138,7 +138,6 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
virtual_file::SyncMode::Sync,
|
||||
);
|
||||
pageserver::page_cache::init(100);
|
||||
|
||||
|
||||
@@ -51,7 +51,6 @@ async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
virtual_file::SyncMode::Sync,
|
||||
);
|
||||
page_cache::init(100);
|
||||
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
|
||||
@@ -66,7 +65,6 @@ async fn read_image_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
virtual_file::SyncMode::Sync,
|
||||
);
|
||||
page_cache::init(100);
|
||||
let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
|
||||
@@ -173,7 +171,6 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
virtual_file::SyncMode::Sync,
|
||||
);
|
||||
pageserver::page_cache::init(100);
|
||||
|
||||
|
||||
@@ -174,7 +174,11 @@ async fn main() -> anyhow::Result<()> {
|
||||
println!("specified prefix '{}' failed validation", cmd.prefix);
|
||||
return Ok(());
|
||||
};
|
||||
let config = RemoteStorageConfig::from_toml_str(&cmd.config_toml_str)?;
|
||||
let toml_document = toml_edit::DocumentMut::from_str(&cmd.config_toml_str)?;
|
||||
let toml_item = toml_document
|
||||
.get("remote_storage")
|
||||
.expect("need remote_storage");
|
||||
let config = RemoteStorageConfig::from_toml(toml_item)?;
|
||||
let storage = remote_storage::GenericRemoteStorage::from_config(&config).await;
|
||||
let cancel = CancellationToken::new();
|
||||
storage
|
||||
@@ -205,7 +209,6 @@ async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
|
||||
10,
|
||||
virtual_file::api::IoEngineKind::StdFs,
|
||||
IoMode::preferred(),
|
||||
virtual_file::SyncMode::Sync,
|
||||
);
|
||||
page_cache::init(100);
|
||||
let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
|
||||
|
||||
@@ -171,18 +171,11 @@ fn main() -> anyhow::Result<()> {
|
||||
let scenario = failpoint_support::init();
|
||||
|
||||
// Basic initialization of things that don't change after startup
|
||||
tracing::info!("Initializing virtual_file...");
|
||||
virtual_file::init(
|
||||
conf.max_file_descriptors,
|
||||
conf.virtual_file_io_engine,
|
||||
conf.virtual_file_io_mode,
|
||||
if conf.no_sync {
|
||||
virtual_file::SyncMode::UnsafeNoSync
|
||||
} else {
|
||||
virtual_file::SyncMode::Sync
|
||||
},
|
||||
);
|
||||
tracing::info!("Initializing page_cache...");
|
||||
page_cache::init(conf.page_cache_size);
|
||||
|
||||
start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
|
||||
|
||||
@@ -144,10 +144,6 @@ pub struct PageServerConf {
|
||||
/// JWT token for use with the control plane API.
|
||||
pub control_plane_api_token: Option<SecretString>,
|
||||
|
||||
pub import_pgdata_upcall_api: Option<Url>,
|
||||
pub import_pgdata_upcall_api_token: Option<SecretString>,
|
||||
pub import_pgdata_aws_endpoint_url: Option<Url>,
|
||||
|
||||
/// If true, pageserver will make best-effort to operate without a control plane: only
|
||||
/// for use in major incidents.
|
||||
pub control_plane_emergency_mode: bool,
|
||||
@@ -186,10 +182,6 @@ pub struct PageServerConf {
|
||||
|
||||
/// Optionally disable disk syncs (unsafe!)
|
||||
pub no_sync: bool,
|
||||
|
||||
/// Maximum amount of time for which a get page request request
|
||||
/// might be held up for request merging.
|
||||
pub server_side_batch_timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
/// Token for authentication to safekeepers
|
||||
@@ -332,9 +324,6 @@ impl PageServerConf {
|
||||
control_plane_api,
|
||||
control_plane_api_token,
|
||||
control_plane_emergency_mode,
|
||||
import_pgdata_upcall_api,
|
||||
import_pgdata_upcall_api_token,
|
||||
import_pgdata_aws_endpoint_url,
|
||||
heatmap_upload_concurrency,
|
||||
secondary_download_concurrency,
|
||||
ingest_batch_size,
|
||||
@@ -347,7 +336,6 @@ impl PageServerConf {
|
||||
concurrent_tenant_warmup,
|
||||
concurrent_tenant_size_logical_size_queries,
|
||||
virtual_file_io_engine,
|
||||
server_side_batch_timeout,
|
||||
tenant_config,
|
||||
no_sync,
|
||||
} = config_toml;
|
||||
@@ -389,10 +377,6 @@ impl PageServerConf {
|
||||
image_compression,
|
||||
timeline_offloading,
|
||||
ephemeral_bytes_per_memory_kb,
|
||||
server_side_batch_timeout,
|
||||
import_pgdata_upcall_api,
|
||||
import_pgdata_upcall_api_token: import_pgdata_upcall_api_token.map(SecretString::from),
|
||||
import_pgdata_aws_endpoint_url,
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// fields that require additional validation or custom handling
|
||||
|
||||
@@ -15,7 +15,6 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
use utils::backoff;
|
||||
use utils::pausable_failpoint;
|
||||
|
||||
use crate::metrics;
|
||||
|
||||
@@ -91,7 +90,6 @@ impl Deleter {
|
||||
/// Block until everything in accumulator has been executed
|
||||
async fn flush(&mut self) -> Result<(), DeletionQueueError> {
|
||||
while !self.accumulator.is_empty() && !self.cancel.is_cancelled() {
|
||||
pausable_failpoint!("deletion-queue-before-execute-pause");
|
||||
match self.remote_delete().await {
|
||||
Ok(()) => {
|
||||
// Note: we assume that the remote storage layer returns Ok(()) if some
|
||||
|
||||
@@ -623,8 +623,6 @@ paths:
|
||||
existing_initdb_timeline_id:
|
||||
type: string
|
||||
format: hex
|
||||
import_pgdata:
|
||||
$ref: "#/components/schemas/TimelineCreateRequestImportPgdata"
|
||||
responses:
|
||||
"201":
|
||||
description: Timeline was created, or already existed with matching parameters
|
||||
@@ -981,34 +979,6 @@ components:
|
||||
$ref: "#/components/schemas/TenantConfig"
|
||||
effective_config:
|
||||
$ref: "#/components/schemas/TenantConfig"
|
||||
TimelineCreateRequestImportPgdata:
|
||||
type: object
|
||||
required:
|
||||
- location
|
||||
- idempotency_key
|
||||
properties:
|
||||
idempotency_key:
|
||||
type: string
|
||||
location:
|
||||
$ref: "#/components/schemas/TimelineCreateRequestImportPgdataLocation"
|
||||
TimelineCreateRequestImportPgdataLocation:
|
||||
type: object
|
||||
properties:
|
||||
AwsS3:
|
||||
$ref: "#/components/schemas/TimelineCreateRequestImportPgdataLocationAwsS3"
|
||||
TimelineCreateRequestImportPgdataLocationAwsS3:
|
||||
type: object
|
||||
properties:
|
||||
region:
|
||||
type: string
|
||||
bucket:
|
||||
type: string
|
||||
key:
|
||||
type: string
|
||||
required:
|
||||
- region
|
||||
- bucket
|
||||
- key
|
||||
TimelineInfo:
|
||||
type: object
|
||||
required:
|
||||
|
||||
@@ -40,7 +40,6 @@ use pageserver_api::models::TenantSorting;
|
||||
use pageserver_api::models::TenantState;
|
||||
use pageserver_api::models::TimelineArchivalConfigRequest;
|
||||
use pageserver_api::models::TimelineCreateRequestMode;
|
||||
use pageserver_api::models::TimelineCreateRequestModeImportPgdata;
|
||||
use pageserver_api::models::TimelinesInfoAndOffloaded;
|
||||
use pageserver_api::models::TopTenantShardItem;
|
||||
use pageserver_api::models::TopTenantShardsRequest;
|
||||
@@ -56,7 +55,6 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
use utils::auth::JwtAuth;
|
||||
use utils::failpoint_support::failpoints_handler;
|
||||
use utils::http::endpoint::profile_cpu_handler;
|
||||
use utils::http::endpoint::prometheus_metrics_handler;
|
||||
use utils::http::endpoint::request_span;
|
||||
use utils::http::request::must_parse_query_param;
|
||||
@@ -82,12 +80,9 @@ use crate::tenant::secondary::SecondaryController;
|
||||
use crate::tenant::size::ModelInputs;
|
||||
use crate::tenant::storage_layer::LayerAccessStatsReset;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::timeline::import_pgdata;
|
||||
use crate::tenant::timeline::offload::offload_timeline;
|
||||
use crate::tenant::timeline::offload::OffloadError;
|
||||
use crate::tenant::timeline::CompactFlags;
|
||||
use crate::tenant::timeline::CompactOptions;
|
||||
use crate::tenant::timeline::CompactRange;
|
||||
use crate::tenant::timeline::CompactionError;
|
||||
use crate::tenant::timeline::Timeline;
|
||||
use crate::tenant::GetTimelineError;
|
||||
@@ -105,7 +100,7 @@ use utils::{
|
||||
http::{
|
||||
endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with},
|
||||
error::{ApiError, HttpErrorBody},
|
||||
json::{json_request, json_request_maybe, json_response},
|
||||
json::{json_request, json_response},
|
||||
request::parse_request_param,
|
||||
RequestExt, RouterBuilder,
|
||||
},
|
||||
@@ -128,7 +123,7 @@ pub struct State {
|
||||
conf: &'static PageServerConf,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
allowlist_routes: &'static [&'static str],
|
||||
allowlist_routes: Vec<Uri>,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
broker_client: storage_broker::BrokerClientChannel,
|
||||
disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
|
||||
@@ -149,13 +144,10 @@ impl State {
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
secondary_controller: SecondaryController,
|
||||
) -> anyhow::Result<Self> {
|
||||
let allowlist_routes = &[
|
||||
"/v1/status",
|
||||
"/v1/doc",
|
||||
"/swagger.yml",
|
||||
"/metrics",
|
||||
"/profile/cpu",
|
||||
];
|
||||
let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml", "/metrics"]
|
||||
.iter()
|
||||
.map(|v| v.parse().unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
Ok(Self {
|
||||
conf,
|
||||
tenant_manager,
|
||||
@@ -582,35 +574,6 @@ async fn timeline_create_handler(
|
||||
ancestor_timeline_id,
|
||||
ancestor_start_lsn,
|
||||
}),
|
||||
TimelineCreateRequestMode::ImportPgdata {
|
||||
import_pgdata:
|
||||
TimelineCreateRequestModeImportPgdata {
|
||||
location,
|
||||
idempotency_key,
|
||||
},
|
||||
} => tenant::CreateTimelineParams::ImportPgdata(tenant::CreateTimelineParamsImportPgdata {
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new(
|
||||
idempotency_key.0,
|
||||
),
|
||||
new_timeline_id,
|
||||
location: {
|
||||
use import_pgdata::index_part_format::Location;
|
||||
use pageserver_api::models::ImportPgdataLocation;
|
||||
match location {
|
||||
#[cfg(feature = "testing")]
|
||||
ImportPgdataLocation::LocalFs { path } => Location::LocalFs { path },
|
||||
ImportPgdataLocation::AwsS3 {
|
||||
region,
|
||||
bucket,
|
||||
key,
|
||||
} => Location::AwsS3 {
|
||||
region,
|
||||
bucket,
|
||||
key,
|
||||
},
|
||||
}
|
||||
},
|
||||
}),
|
||||
};
|
||||
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);
|
||||
@@ -1964,15 +1927,13 @@ async fn timeline_gc_handler(
|
||||
|
||||
// Run compaction immediately on given timeline.
|
||||
async fn timeline_compact_handler(
|
||||
mut request: Request<Body>,
|
||||
request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
|
||||
let compact_range = json_request_maybe::<Option<CompactRange>>(&mut request).await?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let mut flags = EnumSet::empty();
|
||||
@@ -1996,16 +1957,11 @@ async fn timeline_compact_handler(
|
||||
let wait_until_uploaded =
|
||||
parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false);
|
||||
|
||||
let options = CompactOptions {
|
||||
compact_range,
|
||||
flags,
|
||||
};
|
||||
|
||||
async {
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;
|
||||
timeline
|
||||
.compact_with_options(&cancel, options, &ctx)
|
||||
.compact(&cancel, flags, &ctx)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(e.into()))?;
|
||||
if wait_until_uploaded {
|
||||
@@ -3183,7 +3139,7 @@ pub fn make_router(
|
||||
if auth.is_some() {
|
||||
router = router.middleware(auth_middleware(|request| {
|
||||
let state = get_state(request);
|
||||
if state.allowlist_routes.contains(&request.uri().path()) {
|
||||
if state.allowlist_routes.contains(request.uri()) {
|
||||
None
|
||||
} else {
|
||||
state.auth.as_deref()
|
||||
@@ -3202,7 +3158,6 @@ pub fn make_router(
|
||||
Ok(router
|
||||
.data(state)
|
||||
.get("/metrics", |r| request_span(r, prometheus_metrics_handler))
|
||||
.get("/profile/cpu", |r| request_span(r, profile_cpu_handler))
|
||||
.get("/v1/status", |r| api_handler(r, status_handler))
|
||||
.put("/v1/failpoints", |r| {
|
||||
testing_api_handler("manage failpoints", r, failpoints_handler)
|
||||
|
||||
@@ -1187,7 +1187,6 @@ struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
|
||||
ctx: &'c RequestContext,
|
||||
start: std::time::Instant,
|
||||
op: SmgrQueryType,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> {
|
||||
@@ -1215,13 +1214,10 @@ impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> {
|
||||
elapsed
|
||||
}
|
||||
};
|
||||
|
||||
for _ in 0..self.count {
|
||||
self.global_latency_histo
|
||||
.observe(ex_throttled.as_secs_f64());
|
||||
if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo {
|
||||
per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64());
|
||||
}
|
||||
self.global_latency_histo
|
||||
.observe(ex_throttled.as_secs_f64());
|
||||
if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo {
|
||||
per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1389,14 +1385,6 @@ impl SmgrQueryTimePerTimeline {
|
||||
&'a self,
|
||||
op: SmgrQueryType,
|
||||
ctx: &'c RequestContext,
|
||||
) -> Option<impl Drop + 'a> {
|
||||
self.start_timer_many(op, 1, ctx)
|
||||
}
|
||||
pub(crate) fn start_timer_many<'c: 'a, 'a>(
|
||||
&'a self,
|
||||
op: SmgrQueryType,
|
||||
count: usize,
|
||||
ctx: &'c RequestContext,
|
||||
) -> Option<impl Drop + 'a> {
|
||||
let start = Instant::now();
|
||||
|
||||
@@ -1434,7 +1422,6 @@ impl SmgrQueryTimePerTimeline {
|
||||
ctx,
|
||||
start,
|
||||
op,
|
||||
count,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,13 +7,13 @@ use bytes::Buf;
|
||||
use futures::FutureExt;
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::models::{self, TenantState};
|
||||
use pageserver_api::models::TenantState;
|
||||
use pageserver_api::models::{
|
||||
PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
|
||||
PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||
PagestreamProtocolVersion,
|
||||
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
|
||||
PagestreamGetSlruSegmentRequest, PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest,
|
||||
PagestreamNblocksResponse, PagestreamProtocolVersion,
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use postgres_backend::{is_expected_io_error, AuthType, PostgresBackend, QueryError};
|
||||
@@ -44,7 +44,7 @@ use crate::basebackup;
|
||||
use crate::basebackup::BasebackupError;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::metrics::{self};
|
||||
use crate::metrics;
|
||||
use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS};
|
||||
use crate::pgdatadir_mapping::Version;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
@@ -59,7 +59,7 @@ use crate::tenant::GetTimelineError;
|
||||
use crate::tenant::PageReconstructError;
|
||||
use crate::tenant::Timeline;
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use pageserver_api::reltag::SlruKind;
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
@@ -105,7 +105,6 @@ pub fn spawn(
|
||||
pg_auth,
|
||||
tcp_listener,
|
||||
conf.pg_auth_type,
|
||||
conf.server_side_batch_timeout,
|
||||
libpq_ctx,
|
||||
cancel.clone(),
|
||||
)
|
||||
@@ -154,7 +153,6 @@ pub async fn libpq_listener_main(
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
listener: tokio::net::TcpListener,
|
||||
auth_type: AuthType,
|
||||
server_side_batch_timeout: Option<Duration>,
|
||||
listener_ctx: RequestContext,
|
||||
listener_cancel: CancellationToken,
|
||||
) -> Connections {
|
||||
@@ -185,7 +183,6 @@ pub async fn libpq_listener_main(
|
||||
local_auth,
|
||||
socket,
|
||||
auth_type,
|
||||
server_side_batch_timeout,
|
||||
connection_ctx,
|
||||
connections_cancel.child_token(),
|
||||
));
|
||||
@@ -213,7 +210,6 @@ async fn page_service_conn_main(
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
socket: tokio::net::TcpStream,
|
||||
auth_type: AuthType,
|
||||
server_side_batch_timeout: Option<Duration>,
|
||||
connection_ctx: RequestContext,
|
||||
cancel: CancellationToken,
|
||||
) -> ConnectionHandlerResult {
|
||||
@@ -264,13 +260,8 @@ async fn page_service_conn_main(
|
||||
// and create a child per-query context when it invokes process_query.
|
||||
// But it's in a shared crate, so, we store connection_ctx inside PageServerHandler
|
||||
// and create the per-query context in process_query ourselves.
|
||||
let mut conn_handler = PageServerHandler::new(
|
||||
tenant_manager,
|
||||
auth,
|
||||
server_side_batch_timeout,
|
||||
connection_ctx,
|
||||
cancel.clone(),
|
||||
);
|
||||
let mut conn_handler =
|
||||
PageServerHandler::new(tenant_manager, auth, connection_ctx, cancel.clone());
|
||||
let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?;
|
||||
|
||||
match pgbackend.run(&mut conn_handler, &cancel).await {
|
||||
@@ -313,12 +304,6 @@ struct PageServerHandler {
|
||||
cancel: CancellationToken,
|
||||
|
||||
timeline_handles: TimelineHandles,
|
||||
|
||||
/// Messages queued up for the next processing batch
|
||||
next_batch: Option<BatchedFeMessage>,
|
||||
|
||||
/// See [`PageServerConf::server_side_batch_timeout`]
|
||||
server_side_batch_timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
struct TimelineHandles {
|
||||
@@ -532,47 +517,10 @@ impl From<WaitLsnError> for QueryError {
|
||||
}
|
||||
}
|
||||
|
||||
enum BatchedFeMessage {
|
||||
Exists {
|
||||
span: Span,
|
||||
req: models::PagestreamExistsRequest,
|
||||
},
|
||||
Nblocks {
|
||||
span: Span,
|
||||
req: models::PagestreamNblocksRequest,
|
||||
},
|
||||
GetPage {
|
||||
span: Span,
|
||||
shard: timeline::handle::Handle<TenantManagerTypes>,
|
||||
effective_request_lsn: Lsn,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
|
||||
},
|
||||
DbSize {
|
||||
span: Span,
|
||||
req: models::PagestreamDbSizeRequest,
|
||||
},
|
||||
GetSlruSegment {
|
||||
span: Span,
|
||||
req: models::PagestreamGetSlruSegmentRequest,
|
||||
},
|
||||
RespondError {
|
||||
span: Span,
|
||||
error: PageStreamError,
|
||||
},
|
||||
}
|
||||
|
||||
enum BatchOrEof {
|
||||
/// In the common case, this has one entry.
|
||||
/// At most, it has two entries: the first is the leftover batch, the second is an error.
|
||||
Batch(smallvec::SmallVec<[BatchedFeMessage; 1]>),
|
||||
Eof,
|
||||
}
|
||||
|
||||
impl PageServerHandler {
|
||||
pub fn new(
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
server_side_batch_timeout: Option<Duration>,
|
||||
connection_ctx: RequestContext,
|
||||
cancel: CancellationToken,
|
||||
) -> Self {
|
||||
@@ -582,8 +530,6 @@ impl PageServerHandler {
|
||||
connection_ctx,
|
||||
timeline_handles: TimelineHandles::new(tenant_manager),
|
||||
cancel,
|
||||
next_batch: None,
|
||||
server_side_batch_timeout,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -611,221 +557,6 @@ impl PageServerHandler {
|
||||
)
|
||||
}
|
||||
|
||||
async fn read_batch_from_connection<IO>(
|
||||
&mut self,
|
||||
pgb: &mut PostgresBackend<IO>,
|
||||
tenant_id: &TenantId,
|
||||
timeline_id: &TimelineId,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Option<BatchOrEof>, QueryError>
|
||||
where
|
||||
IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
|
||||
{
|
||||
let mut batch = self.next_batch.take();
|
||||
let mut batch_started_at: Option<std::time::Instant> = None;
|
||||
|
||||
let next_batch: Option<BatchedFeMessage> = loop {
|
||||
let sleep_fut = match (self.server_side_batch_timeout, batch_started_at) {
|
||||
(Some(batch_timeout), Some(started_at)) => futures::future::Either::Left(
|
||||
tokio::time::sleep_until((started_at + batch_timeout).into()),
|
||||
),
|
||||
_ => futures::future::Either::Right(futures::future::pending()),
|
||||
};
|
||||
|
||||
let msg = tokio::select! {
|
||||
biased;
|
||||
_ = self.cancel.cancelled() => {
|
||||
return Err(QueryError::Shutdown)
|
||||
}
|
||||
msg = pgb.read_message() => {
|
||||
msg
|
||||
}
|
||||
_ = sleep_fut => {
|
||||
assert!(batch.is_some());
|
||||
break None;
|
||||
}
|
||||
};
|
||||
let copy_data_bytes = match msg? {
|
||||
Some(FeMessage::CopyData(bytes)) => bytes,
|
||||
Some(FeMessage::Terminate) => {
|
||||
return Ok(Some(BatchOrEof::Eof));
|
||||
}
|
||||
Some(m) => {
|
||||
return Err(QueryError::Other(anyhow::anyhow!(
|
||||
"unexpected message: {m:?} during COPY"
|
||||
)));
|
||||
}
|
||||
None => {
|
||||
return Ok(Some(BatchOrEof::Eof));
|
||||
} // client disconnected
|
||||
};
|
||||
trace!("query: {copy_data_bytes:?}");
|
||||
fail::fail_point!("ps::handle-pagerequest-message");
|
||||
|
||||
// parse request
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
|
||||
let this_msg = match neon_fe_msg {
|
||||
PagestreamFeMessage::Exists(req) => BatchedFeMessage::Exists {
|
||||
span: tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn),
|
||||
req,
|
||||
},
|
||||
PagestreamFeMessage::Nblocks(req) => BatchedFeMessage::Nblocks {
|
||||
span: tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn),
|
||||
req,
|
||||
},
|
||||
PagestreamFeMessage::DbSize(req) => BatchedFeMessage::DbSize {
|
||||
span: tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn),
|
||||
req,
|
||||
},
|
||||
PagestreamFeMessage::GetSlruSegment(req) => BatchedFeMessage::GetSlruSegment {
|
||||
span: tracing::info_span!("handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn),
|
||||
req,
|
||||
},
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
rel,
|
||||
blkno,
|
||||
}) => {
|
||||
// shard_id is filled in by the handler
|
||||
let span = tracing::info_span!(
|
||||
"handle_get_page_at_lsn_request_batched",
|
||||
%tenant_id, %timeline_id, shard_id = tracing::field::Empty, req_lsn = %request_lsn,
|
||||
batch_size = tracing::field::Empty, batch_id = tracing::field::Empty
|
||||
);
|
||||
|
||||
macro_rules! current_batch_and_error {
|
||||
($error:expr) => {{
|
||||
let error = BatchedFeMessage::RespondError {
|
||||
span,
|
||||
error: $error,
|
||||
};
|
||||
let batch_and_error = match batch {
|
||||
Some(b) => smallvec::smallvec![b, error],
|
||||
None => smallvec::smallvec![error],
|
||||
};
|
||||
Ok(Some(BatchOrEof::Batch(batch_and_error)))
|
||||
}};
|
||||
}
|
||||
|
||||
let key = rel_block_to_key(rel, blkno);
|
||||
let shard = match self
|
||||
.timeline_handles
|
||||
.get(*tenant_id, *timeline_id, ShardSelector::Page(key))
|
||||
.instrument(span.clone())
|
||||
.await
|
||||
{
|
||||
Ok(tl) => tl,
|
||||
Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => {
|
||||
// We already know this tenant exists in general, because we resolved it at
|
||||
// start of connection. Getting a NotFound here indicates that the shard containing
|
||||
// the requested page is not present on this node: the client's knowledge of shard->pageserver
|
||||
// mapping is out of date.
|
||||
//
|
||||
// Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
|
||||
// client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
|
||||
// and talk to a different pageserver.
|
||||
return current_batch_and_error!(PageStreamError::Reconnect(
|
||||
"getpage@lsn request routed to wrong shard".into()
|
||||
));
|
||||
}
|
||||
Err(e) => {
|
||||
return current_batch_and_error!(e.into());
|
||||
}
|
||||
};
|
||||
let effective_request_lsn = match Self::wait_or_get_last_lsn(
|
||||
&shard,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
&shard.get_latest_gc_cutoff_lsn(),
|
||||
ctx,
|
||||
)
|
||||
// TODO: if we actually need to wait for lsn here, it delays the entire batch which doesn't need to wait
|
||||
.await
|
||||
{
|
||||
Ok(lsn) => lsn,
|
||||
Err(e) => {
|
||||
return current_batch_and_error!(e);
|
||||
}
|
||||
};
|
||||
BatchedFeMessage::GetPage {
|
||||
span,
|
||||
shard,
|
||||
effective_request_lsn,
|
||||
pages: smallvec::smallvec![(rel, blkno)],
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let batch_timeout = match self.server_side_batch_timeout {
|
||||
Some(value) => value,
|
||||
None => {
|
||||
// Batching is not enabled - stop on the first message.
|
||||
return Ok(Some(BatchOrEof::Batch(smallvec::smallvec![this_msg])));
|
||||
}
|
||||
};
|
||||
|
||||
// check if we can batch
|
||||
match (&mut batch, this_msg) {
|
||||
(None, this_msg) => {
|
||||
batch = Some(this_msg);
|
||||
}
|
||||
(
|
||||
Some(BatchedFeMessage::GetPage {
|
||||
span: _,
|
||||
shard: accum_shard,
|
||||
pages: accum_pages,
|
||||
effective_request_lsn: accum_lsn,
|
||||
}),
|
||||
BatchedFeMessage::GetPage {
|
||||
span: _,
|
||||
shard: this_shard,
|
||||
pages: this_pages,
|
||||
effective_request_lsn: this_lsn,
|
||||
},
|
||||
) if async {
|
||||
assert_eq!(this_pages.len(), 1);
|
||||
if accum_pages.len() >= Timeline::MAX_GET_VECTORED_KEYS as usize {
|
||||
assert_eq!(accum_pages.len(), Timeline::MAX_GET_VECTORED_KEYS as usize);
|
||||
return false;
|
||||
}
|
||||
if (accum_shard.tenant_shard_id, accum_shard.timeline_id)
|
||||
!= (this_shard.tenant_shard_id, this_shard.timeline_id)
|
||||
{
|
||||
// TODO: we _could_ batch & execute each shard seperately (and in parallel).
|
||||
// But the current logic for keeping responses in order does not support that.
|
||||
return false;
|
||||
}
|
||||
// the vectored get currently only supports a single LSN, so, bounce as soon
|
||||
// as the effective request_lsn changes
|
||||
if *accum_lsn != this_lsn {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
.await =>
|
||||
{
|
||||
// ok to batch
|
||||
accum_pages.extend(this_pages);
|
||||
}
|
||||
(Some(_), this_msg) => {
|
||||
// by default, don't continue batching
|
||||
break Some(this_msg);
|
||||
}
|
||||
}
|
||||
|
||||
// batching impl piece
|
||||
let started_at = batch_started_at.get_or_insert_with(Instant::now);
|
||||
if started_at.elapsed() > batch_timeout {
|
||||
break None;
|
||||
}
|
||||
};
|
||||
|
||||
self.next_batch = next_batch;
|
||||
Ok(batch.map(|b| BatchOrEof::Batch(smallvec::smallvec![b])))
|
||||
}
|
||||
|
||||
/// Pagestream sub-protocol handler.
|
||||
///
|
||||
/// It is a simple request-response protocol inside a COPYBOTH session.
|
||||
@@ -861,165 +592,133 @@ impl PageServerHandler {
|
||||
}
|
||||
}
|
||||
|
||||
// If [`PageServerHandler`] is reused for multiple pagestreams,
|
||||
// then make sure to not process requests from the previous ones.
|
||||
self.next_batch = None;
|
||||
|
||||
loop {
|
||||
let maybe_batched = self
|
||||
.read_batch_from_connection(pgb, &tenant_id, &timeline_id, &ctx)
|
||||
.await?;
|
||||
let batched = match maybe_batched {
|
||||
Some(BatchOrEof::Batch(b)) => b,
|
||||
Some(BatchOrEof::Eof) => {
|
||||
break;
|
||||
// read request bytes (it's exactly 1 PagestreamFeMessage per CopyData)
|
||||
let msg = tokio::select! {
|
||||
biased;
|
||||
_ = self.cancel.cancelled() => {
|
||||
return Err(QueryError::Shutdown)
|
||||
}
|
||||
None => {
|
||||
continue;
|
||||
msg = pgb.read_message() => { msg }
|
||||
};
|
||||
let copy_data_bytes = match msg? {
|
||||
Some(FeMessage::CopyData(bytes)) => bytes,
|
||||
Some(FeMessage::Terminate) => break,
|
||||
Some(m) => {
|
||||
return Err(QueryError::Other(anyhow::anyhow!(
|
||||
"unexpected message: {m:?} during COPY"
|
||||
)));
|
||||
}
|
||||
None => break, // client disconnected
|
||||
};
|
||||
|
||||
trace!("query: {copy_data_bytes:?}");
|
||||
fail::fail_point!("ps::handle-pagerequest-message");
|
||||
|
||||
// parse request
|
||||
let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;
|
||||
|
||||
// invoke handler function
|
||||
let (handler_result, span) = match neon_fe_msg {
|
||||
PagestreamFeMessage::Exists(req) => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::exists");
|
||||
let span = tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn);
|
||||
(
|
||||
self.handle_get_rel_exists_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::Nblocks(req) => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
|
||||
let span = tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn);
|
||||
(
|
||||
self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::GetPage(req) => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::getpage");
|
||||
// shard_id is filled in by the handler
|
||||
let span = tracing::info_span!("handle_get_page_at_lsn_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.request_lsn);
|
||||
(
|
||||
self.handle_get_page_at_lsn_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::DbSize(req) => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
|
||||
let span = tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn);
|
||||
(
|
||||
self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
span,
|
||||
)
|
||||
}
|
||||
PagestreamFeMessage::GetSlruSegment(req) => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
|
||||
let span = tracing::info_span!("handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn);
|
||||
(
|
||||
self.handle_get_slru_segment_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
span,
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
for batch in batched {
|
||||
// invoke handler function
|
||||
let (handler_results, span): (
|
||||
Vec<Result<PagestreamBeMessage, PageStreamError>>,
|
||||
_,
|
||||
) = match batch {
|
||||
BatchedFeMessage::Exists { span, req } => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::exists");
|
||||
(
|
||||
vec![
|
||||
self.handle_get_rel_exists_request(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
&req,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
],
|
||||
span,
|
||||
)
|
||||
// Map handler result to protocol behavior.
|
||||
// Some handler errors cause exit from pagestream protocol.
|
||||
// Other handler errors are sent back as an error message and we stay in pagestream protocol.
|
||||
let response_msg = match handler_result {
|
||||
Err(e) => match &e {
|
||||
PageStreamError::Shutdown => {
|
||||
// If we fail to fulfil a request during shutdown, which may be _because_ of
|
||||
// shutdown, then do not send the error to the client. Instead just drop the
|
||||
// connection.
|
||||
span.in_scope(|| info!("dropping connection due to shutdown"));
|
||||
return Err(QueryError::Shutdown);
|
||||
}
|
||||
BatchedFeMessage::Nblocks { span, req } => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::nblocks");
|
||||
(
|
||||
vec![
|
||||
self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
],
|
||||
span,
|
||||
)
|
||||
PageStreamError::Reconnect(reason) => {
|
||||
span.in_scope(|| info!("handler requested reconnect: {reason}"));
|
||||
return Err(QueryError::Reconnect);
|
||||
}
|
||||
BatchedFeMessage::GetPage {
|
||||
span,
|
||||
shard,
|
||||
effective_request_lsn,
|
||||
pages,
|
||||
} => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::getpage");
|
||||
(
|
||||
{
|
||||
let npages = pages.len();
|
||||
let res = self
|
||||
.handle_get_page_at_lsn_request_batched(
|
||||
&shard,
|
||||
effective_request_lsn,
|
||||
pages,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(span.clone())
|
||||
.await;
|
||||
assert_eq!(res.len(), npages);
|
||||
res
|
||||
},
|
||||
span,
|
||||
)
|
||||
PageStreamError::Read(_)
|
||||
| PageStreamError::LsnTimeout(_)
|
||||
| PageStreamError::NotFound(_)
|
||||
| PageStreamError::BadRequest(_) => {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough. Do not log if shutting down, as the anyhow::Error
|
||||
// here includes cancellation which is not an error.
|
||||
let full = utils::error::report_compact_sources(&e);
|
||||
span.in_scope(|| {
|
||||
error!("error reading relation or page version: {full:#}")
|
||||
});
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
}
|
||||
BatchedFeMessage::DbSize { span, req } => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::dbsize");
|
||||
(
|
||||
vec![
|
||||
self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
],
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::GetSlruSegment { span, req } => {
|
||||
fail::fail_point!("ps::handle-pagerequest-message::slrusegment");
|
||||
(
|
||||
vec![
|
||||
self.handle_get_slru_segment_request(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
&req,
|
||||
&ctx,
|
||||
)
|
||||
.instrument(span.clone())
|
||||
.await,
|
||||
],
|
||||
span,
|
||||
)
|
||||
}
|
||||
BatchedFeMessage::RespondError { span, error } => {
|
||||
// We've already decided to respond with an error, so we don't need to
|
||||
// call the handler.
|
||||
(vec![Err(error)], span)
|
||||
}
|
||||
};
|
||||
},
|
||||
Ok(response_msg) => response_msg,
|
||||
};
|
||||
|
||||
// Map handler result to protocol behavior.
|
||||
// Some handler errors cause exit from pagestream protocol.
|
||||
// Other handler errors are sent back as an error message and we stay in pagestream protocol.
|
||||
for handler_result in handler_results {
|
||||
let response_msg = match handler_result {
|
||||
Err(e) => match &e {
|
||||
PageStreamError::Shutdown => {
|
||||
// If we fail to fulfil a request during shutdown, which may be _because_ of
|
||||
// shutdown, then do not send the error to the client. Instead just drop the
|
||||
// connection.
|
||||
span.in_scope(|| info!("dropping connection due to shutdown"));
|
||||
return Err(QueryError::Shutdown);
|
||||
}
|
||||
PageStreamError::Reconnect(reason) => {
|
||||
span.in_scope(|| info!("handler requested reconnect: {reason}"));
|
||||
return Err(QueryError::Reconnect);
|
||||
}
|
||||
PageStreamError::Read(_)
|
||||
| PageStreamError::LsnTimeout(_)
|
||||
| PageStreamError::NotFound(_)
|
||||
| PageStreamError::BadRequest(_) => {
|
||||
// print the all details to the log with {:#}, but for the client the
|
||||
// error message is enough. Do not log if shutting down, as the anyhow::Error
|
||||
// here includes cancellation which is not an error.
|
||||
let full = utils::error::report_compact_sources(&e);
|
||||
span.in_scope(|| {
|
||||
error!("error reading relation or page version: {full:#}")
|
||||
});
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
}
|
||||
},
|
||||
Ok(response_msg) => response_msg,
|
||||
};
|
||||
|
||||
// marshal & transmit response message
|
||||
pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
|
||||
// marshal & transmit response message
|
||||
pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
|
||||
tokio::select! {
|
||||
biased;
|
||||
_ = self.cancel.cancelled() => {
|
||||
// We were requested to shut down.
|
||||
info!("shutdown request received in page handler");
|
||||
return Err(QueryError::Shutdown)
|
||||
}
|
||||
tokio::select! {
|
||||
biased;
|
||||
_ = self.cancel.cancelled() => {
|
||||
// We were requested to shut down.
|
||||
info!("shutdown request received in page handler");
|
||||
return Err(QueryError::Shutdown)
|
||||
}
|
||||
res = pgb.flush() => {
|
||||
res?;
|
||||
}
|
||||
res = pgb.flush() => {
|
||||
res?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1068,26 +767,21 @@ impl PageServerHandler {
|
||||
));
|
||||
}
|
||||
|
||||
// Check explicitly for INVALID just to get a less scary error message if the request is obviously bogus
|
||||
if request_lsn == Lsn::INVALID {
|
||||
return Err(PageStreamError::BadRequest(
|
||||
"invalid LSN(0) in request".into(),
|
||||
));
|
||||
}
|
||||
|
||||
// Clients should only read from recent LSNs on their timeline, or from locations holding an LSN lease.
|
||||
//
|
||||
// We may have older data available, but we make a best effort to detect this case and return an error,
|
||||
// to distinguish a misbehaving client (asking for old LSN) from a storage issue (data missing at a legitimate LSN).
|
||||
if request_lsn < **latest_gc_cutoff_lsn && !timeline.is_gc_blocked_by_lsn_lease_deadline() {
|
||||
if request_lsn < **latest_gc_cutoff_lsn {
|
||||
let gc_info = &timeline.gc_info.read().unwrap();
|
||||
if !gc_info.leases.contains_key(&request_lsn) {
|
||||
return Err(
|
||||
// The requested LSN is below gc cutoff and is not guarded by a lease.
|
||||
|
||||
// Check explicitly for INVALID just to get a less scary error message if the
|
||||
// request is obviously bogus
|
||||
return Err(if request_lsn == Lsn::INVALID {
|
||||
PageStreamError::BadRequest("invalid LSN(0) in request".into())
|
||||
} else {
|
||||
PageStreamError::BadRequest(format!(
|
||||
"tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
|
||||
request_lsn, **latest_gc_cutoff_lsn
|
||||
).into())
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1270,30 +964,60 @@ impl PageServerHandler {
|
||||
}))
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
async fn handle_get_page_at_lsn_request_batched(
|
||||
#[instrument(skip_all, fields(shard_id))]
|
||||
async fn handle_get_page_at_lsn_request(
|
||||
&mut self,
|
||||
timeline: &Timeline,
|
||||
effective_lsn: Lsn,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
req: &PagestreamGetPageRequest,
|
||||
ctx: &RequestContext,
|
||||
) -> Vec<Result<PagestreamBeMessage, PageStreamError>> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
let _timer = timeline.query_metrics.start_timer_many(
|
||||
metrics::SmgrQueryType::GetPageAtLsn,
|
||||
pages.len(),
|
||||
) -> Result<PagestreamBeMessage, PageStreamError> {
|
||||
let timeline = match self
|
||||
.timeline_handles
|
||||
.get(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
ShardSelector::Page(rel_block_to_key(req.rel, req.blkno)),
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(tl) => tl,
|
||||
Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => {
|
||||
// We already know this tenant exists in general, because we resolved it at
|
||||
// start of connection. Getting a NotFound here indicates that the shard containing
|
||||
// the requested page is not present on this node: the client's knowledge of shard->pageserver
|
||||
// mapping is out of date.
|
||||
//
|
||||
// Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via
|
||||
// client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration
|
||||
// and talk to a different pageserver.
|
||||
return Err(PageStreamError::Reconnect(
|
||||
"getpage@lsn request routed to wrong shard".into(),
|
||||
));
|
||||
}
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
let _timer = timeline
|
||||
.query_metrics
|
||||
.start_timer(metrics::SmgrQueryType::GetPageAtLsn, ctx);
|
||||
|
||||
let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
|
||||
let lsn = Self::wait_or_get_last_lsn(
|
||||
&timeline,
|
||||
req.request_lsn,
|
||||
req.not_modified_since,
|
||||
&latest_gc_cutoff_lsn,
|
||||
ctx,
|
||||
);
|
||||
)
|
||||
.await?;
|
||||
|
||||
let pages = timeline
|
||||
.get_rel_page_at_lsn_batched(pages, effective_lsn, ctx)
|
||||
.await;
|
||||
let page = timeline
|
||||
.get_rel_page_at_lsn(req.rel, req.blkno, Version::Lsn(lsn), ctx)
|
||||
.await?;
|
||||
|
||||
Vec::from_iter(pages.into_iter().map(|page| {
|
||||
page.map(|page| {
|
||||
PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse { page })
|
||||
})
|
||||
.map_err(PageStreamError::from)
|
||||
Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
|
||||
page,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -1950,13 +1674,6 @@ fn set_tracing_field_shard_id(timeline: &Timeline) {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
}
|
||||
|
||||
struct WaitedForLsn(Lsn);
|
||||
impl From<WaitedForLsn> for Lsn {
|
||||
fn from(WaitedForLsn(lsn): WaitedForLsn) -> Self {
|
||||
lsn
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use utils::shard::ShardCount;
|
||||
|
||||
@@ -10,15 +10,10 @@ use super::tenant::{PageReconstructError, Timeline};
|
||||
use crate::aux_file;
|
||||
use crate::context::RequestContext;
|
||||
use crate::keyspace::{KeySpace, KeySpaceAccum};
|
||||
use crate::span::{
|
||||
debug_assert_current_span_has_tenant_and_timeline_id,
|
||||
debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id,
|
||||
};
|
||||
use crate::tenant::timeline::GetVectoredError;
|
||||
use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
|
||||
use anyhow::{ensure, Context};
|
||||
use bytes::{Buf, Bytes, BytesMut};
|
||||
use enum_map::Enum;
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::key::{
|
||||
dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key,
|
||||
@@ -35,7 +30,7 @@ use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{hash_map, BTreeMap, HashMap, HashSet};
|
||||
use std::collections::{hash_map, HashMap, HashSet};
|
||||
use std::ops::ControlFlow;
|
||||
use std::ops::Range;
|
||||
use strum::IntoEnumIterator;
|
||||
@@ -198,195 +193,26 @@ impl Timeline {
|
||||
version: Version<'_>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Bytes, PageReconstructError> {
|
||||
match version {
|
||||
Version::Lsn(effective_lsn) => {
|
||||
let pages = smallvec::smallvec![(tag, blknum)];
|
||||
let res = self
|
||||
.get_rel_page_at_lsn_batched(pages, effective_lsn, ctx)
|
||||
.await;
|
||||
assert_eq!(res.len(), 1);
|
||||
res.into_iter().next().unwrap()
|
||||
}
|
||||
Version::Modified(modification) => {
|
||||
if tag.relnode == 0 {
|
||||
return Err(PageReconstructError::Other(
|
||||
RelationError::InvalidRelnode.into(),
|
||||
));
|
||||
}
|
||||
|
||||
let nblocks = self.get_rel_size(tag, version, ctx).await?;
|
||||
if blknum >= nblocks {
|
||||
debug!(
|
||||
"read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
|
||||
tag,
|
||||
blknum,
|
||||
version.get_lsn(),
|
||||
nblocks
|
||||
);
|
||||
return Ok(ZERO_PAGE.clone());
|
||||
}
|
||||
|
||||
let key = rel_block_to_key(tag, blknum);
|
||||
modification.get(key, ctx).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Like [`Self::get_rel_page_at_lsn`], but returns a batch of pages.
|
||||
///
|
||||
/// The ordering of the returned vec corresponds to the ordering of `pages`.
|
||||
pub(crate) async fn get_rel_page_at_lsn_batched(
|
||||
&self,
|
||||
pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>,
|
||||
effective_lsn: Lsn,
|
||||
ctx: &RequestContext,
|
||||
) -> Vec<Result<Bytes, PageReconstructError>> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
|
||||
let mut slots_filled = 0;
|
||||
let page_count = pages.len();
|
||||
|
||||
// Would be nice to use smallvec here but it doesn't provide the spare_capacity_mut() API.
|
||||
let mut result = Vec::with_capacity(pages.len());
|
||||
let result_slots = result.spare_capacity_mut();
|
||||
|
||||
let mut keys_slots: BTreeMap<Key, smallvec::SmallVec<[usize; 1]>> = BTreeMap::default();
|
||||
for (response_slot_idx, (tag, blknum)) in pages.into_iter().enumerate() {
|
||||
if tag.relnode == 0 {
|
||||
result_slots[response_slot_idx].write(Err(PageReconstructError::Other(
|
||||
RelationError::InvalidRelnode.into(),
|
||||
)));
|
||||
|
||||
slots_filled += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let nblocks = match self
|
||||
.get_rel_size(tag, Version::Lsn(effective_lsn), ctx)
|
||||
.await
|
||||
{
|
||||
Ok(nblocks) => nblocks,
|
||||
Err(err) => {
|
||||
result_slots[response_slot_idx].write(Err(err));
|
||||
slots_filled += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if blknum >= nblocks {
|
||||
debug!(
|
||||
"read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
|
||||
tag, blknum, effective_lsn, nblocks
|
||||
);
|
||||
result_slots[response_slot_idx].write(Ok(ZERO_PAGE.clone()));
|
||||
slots_filled += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let key = rel_block_to_key(tag, blknum);
|
||||
|
||||
let key_slots = keys_slots.entry(key).or_default();
|
||||
key_slots.push(response_slot_idx);
|
||||
if tag.relnode == 0 {
|
||||
return Err(PageReconstructError::Other(
|
||||
RelationError::InvalidRelnode.into(),
|
||||
));
|
||||
}
|
||||
|
||||
let keyspace = {
|
||||
// add_key requires monotonicity
|
||||
let mut acc = KeySpaceAccum::new();
|
||||
for key in keys_slots
|
||||
.keys()
|
||||
// in fact it requires strong monotonicity
|
||||
.dedup()
|
||||
{
|
||||
acc.add_key(*key);
|
||||
}
|
||||
acc.to_keyspace()
|
||||
};
|
||||
|
||||
match self.get_vectored(keyspace, effective_lsn, ctx).await {
|
||||
Ok(results) => {
|
||||
for (key, res) in results {
|
||||
let mut key_slots = keys_slots.remove(&key).unwrap().into_iter();
|
||||
let first_slot = key_slots.next().unwrap();
|
||||
|
||||
for slot in key_slots {
|
||||
let clone = match &res {
|
||||
Ok(buf) => Ok(buf.clone()),
|
||||
Err(err) => Err(match err {
|
||||
PageReconstructError::Cancelled => {
|
||||
PageReconstructError::Cancelled
|
||||
}
|
||||
|
||||
x @ PageReconstructError::Other(_) |
|
||||
x @ PageReconstructError::AncestorLsnTimeout(_) |
|
||||
x @ PageReconstructError::WalRedo(_) |
|
||||
x @ PageReconstructError::MissingKey(_) => {
|
||||
PageReconstructError::Other(anyhow::anyhow!("there was more than one request for this key in the batch, error logged once: {x:?}"))
|
||||
},
|
||||
}),
|
||||
};
|
||||
|
||||
result_slots[slot].write(clone);
|
||||
slots_filled += 1;
|
||||
}
|
||||
|
||||
result_slots[first_slot].write(res);
|
||||
slots_filled += 1;
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
// this cannot really happen because get_vectored only errors globally on invalid LSN or too large batch size
|
||||
// (We enforce the max batch size outside of this function, in the code that constructs the batch request.)
|
||||
for slot in keys_slots.values().flatten() {
|
||||
// this whole `match` is a lot like `From<GetVectoredError> for PageReconstructError`
|
||||
// but without taking ownership of the GetVectoredError
|
||||
let err = match &err {
|
||||
GetVectoredError::Cancelled => {
|
||||
Err(PageReconstructError::Cancelled)
|
||||
}
|
||||
// TODO: restructure get_vectored API to make this error per-key
|
||||
GetVectoredError::MissingKey(err) => {
|
||||
Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more of the requested keys were missing: {err:?}")))
|
||||
}
|
||||
// TODO: restructure get_vectored API to make this error per-key
|
||||
GetVectoredError::GetReadyAncestorError(err) => {
|
||||
Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}")))
|
||||
}
|
||||
// TODO: restructure get_vectored API to make this error per-key
|
||||
GetVectoredError::Other(err) => {
|
||||
Err(PageReconstructError::Other(
|
||||
anyhow::anyhow!("whole vectored get request failed: {err:?}"),
|
||||
))
|
||||
}
|
||||
// TODO: we can prevent this error class by moving this check into the type system
|
||||
GetVectoredError::InvalidLsn(e) => {
|
||||
Err(anyhow::anyhow!("invalid LSN: {e:?}").into())
|
||||
}
|
||||
// NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS
|
||||
// TODO: we can prevent this error class by moving this check into the type system
|
||||
GetVectoredError::Oversized(err) => {
|
||||
Err(anyhow::anyhow!(
|
||||
"batching oversized: {err:?}"
|
||||
)
|
||||
.into())
|
||||
}
|
||||
};
|
||||
|
||||
result_slots[*slot].write(err);
|
||||
}
|
||||
|
||||
slots_filled += keys_slots.values().map(|slots| slots.len()).sum::<usize>();
|
||||
}
|
||||
};
|
||||
|
||||
assert_eq!(slots_filled, page_count);
|
||||
// SAFETY:
|
||||
// 1. `result` and any of its uninint members are not read from until this point
|
||||
// 2. The length below is tracked at run-time and matches the number of requested pages.
|
||||
unsafe {
|
||||
result.set_len(page_count);
|
||||
let nblocks = self.get_rel_size(tag, version, ctx).await?;
|
||||
if blknum >= nblocks {
|
||||
debug!(
|
||||
"read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
|
||||
tag,
|
||||
blknum,
|
||||
version.get_lsn(),
|
||||
nblocks
|
||||
);
|
||||
return Ok(ZERO_PAGE.clone());
|
||||
}
|
||||
|
||||
result
|
||||
let key = rel_block_to_key(tag, blknum);
|
||||
version.get(self, key, ctx).await
|
||||
}
|
||||
|
||||
// Get size of a database in blocks
|
||||
@@ -2276,9 +2102,9 @@ impl<'a> Version<'a> {
|
||||
//--- Metadata structs stored in key-value pairs in the repository.
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct DbDirectory {
|
||||
struct DbDirectory {
|
||||
// (spcnode, dbnode) -> (do relmapper and PG_VERSION files exist)
|
||||
pub(crate) dbdirs: HashMap<(Oid, Oid), bool>,
|
||||
dbdirs: HashMap<(Oid, Oid), bool>,
|
||||
}
|
||||
|
||||
// The format of TwoPhaseDirectory changed in PostgreSQL v17, because the filenames of
|
||||
@@ -2287,8 +2113,8 @@ pub(crate) struct DbDirectory {
|
||||
// "pg_twophsae/0000000A000002E4".
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub(crate) struct TwoPhaseDirectory {
|
||||
pub(crate) xids: HashSet<TransactionId>,
|
||||
struct TwoPhaseDirectory {
|
||||
xids: HashSet<TransactionId>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -2297,12 +2123,12 @@ struct TwoPhaseDirectoryV17 {
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
pub(crate) struct RelDirectory {
|
||||
struct RelDirectory {
|
||||
// Set of relations that exist. (relfilenode, forknum)
|
||||
//
|
||||
// TODO: Store it as a btree or radix tree or something else that spans multiple
|
||||
// key-value pairs, if you have a lot of relations
|
||||
pub(crate) rels: HashSet<(Oid, u8)>,
|
||||
rels: HashSet<(Oid, u8)>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
@@ -2311,9 +2137,9 @@ struct RelSizeEntry {
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Default)]
|
||||
pub(crate) struct SlruSegmentDirectory {
|
||||
struct SlruSegmentDirectory {
|
||||
// Set of SLRU segments that exist.
|
||||
pub(crate) segments: HashSet<u32>,
|
||||
segments: HashSet<u32>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug, enum_map::Enum)]
|
||||
|
||||
@@ -381,8 +381,6 @@ pub enum TaskKind {
|
||||
UnitTest,
|
||||
|
||||
DetachAncestor,
|
||||
|
||||
ImportPgdata,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1719,11 +1719,10 @@ impl TenantManager {
|
||||
parent_layers.push(relative_path.to_owned());
|
||||
}
|
||||
}
|
||||
|
||||
if parent_layers.is_empty() {
|
||||
tracing::info!("Ancestor shard has no resident layer to hard link");
|
||||
}
|
||||
|
||||
debug_assert!(
|
||||
!parent_layers.is_empty(),
|
||||
"shutdown cannot empty the layermap"
|
||||
);
|
||||
(parent_timelines, parent_layers)
|
||||
};
|
||||
|
||||
|
||||
@@ -197,9 +197,8 @@ use utils::backoff::{
|
||||
self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
|
||||
};
|
||||
use utils::pausable_failpoint;
|
||||
use utils::shard::ShardNumber;
|
||||
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, Mutex, OnceLock};
|
||||
use std::time::Duration;
|
||||
@@ -223,7 +222,7 @@ use crate::task_mgr::shutdown_token;
|
||||
use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
|
||||
use crate::tenant::remote_timeline_client::download::download_retry;
|
||||
use crate::tenant::storage_layer::AsLayerDesc;
|
||||
use crate::tenant::upload_queue::{Delete, OpType, UploadQueueStoppedDeletable};
|
||||
use crate::tenant::upload_queue::{Delete, UploadQueueStoppedDeletable};
|
||||
use crate::tenant::TIMELINES_SEGMENT_NAME;
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
@@ -241,10 +240,8 @@ use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use self::index::IndexPart;
|
||||
|
||||
use super::config::AttachedLocationConfig;
|
||||
use super::metadata::MetadataUpdate;
|
||||
use super::storage_layer::{Layer, LayerName, ResidentLayer};
|
||||
use super::timeline::import_pgdata;
|
||||
use super::upload_queue::{NotInitialized, SetDeletedFlagProgress};
|
||||
use super::{DeleteTimelineError, Generation};
|
||||
|
||||
@@ -304,36 +301,6 @@ pub enum WaitCompletionError {
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("Upload queue either in unexpected state or hasn't downloaded manifest yet")]
|
||||
pub struct UploadQueueNotReadyError;
|
||||
/// Behavioral modes that enable seamless live migration.
|
||||
///
|
||||
/// See docs/rfcs/028-pageserver-migration.md to understand how these fit in.
|
||||
struct RemoteTimelineClientConfig {
|
||||
/// If this is false, then update to remote_consistent_lsn are dropped rather
|
||||
/// than being submitted to DeletionQueue for validation. This behavior is
|
||||
/// used when a tenant attachment is known to have a stale generation number,
|
||||
/// such that validation attempts will always fail. This is not necessary
|
||||
/// for correctness, but avoids spamming error statistics with failed validations
|
||||
/// when doing migrations of tenants.
|
||||
process_remote_consistent_lsn_updates: bool,
|
||||
|
||||
/// If this is true, then object deletions are held in a buffer in RemoteTimelineClient
|
||||
/// rather than being submitted to the DeletionQueue. This behavior is used when a tenant
|
||||
/// is known to be multi-attached, in order to avoid disrupting other attached tenants
|
||||
/// whose generations' metadata refers to the deleted objects.
|
||||
block_deletions: bool,
|
||||
}
|
||||
|
||||
/// RemoteTimelineClientConfig's state is entirely driven by LocationConf, but we do
|
||||
/// not carry the entire LocationConf structure: it's much more than we need. The From
|
||||
/// impl extracts the subset of the LocationConf that is interesting to RemoteTimelineClient.
|
||||
impl From<&AttachedLocationConfig> for RemoteTimelineClientConfig {
|
||||
fn from(lc: &AttachedLocationConfig) -> Self {
|
||||
Self {
|
||||
block_deletions: !lc.may_delete_layers_hint(),
|
||||
process_remote_consistent_lsn_updates: lc.may_upload_layers_hint(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A client for accessing a timeline's data in remote storage.
|
||||
///
|
||||
@@ -354,7 +321,7 @@ impl From<&AttachedLocationConfig> for RemoteTimelineClientConfig {
|
||||
/// in the index part file, whenever timeline metadata is uploaded.
|
||||
///
|
||||
/// Downloads are not queued, they are performed immediately.
|
||||
pub(crate) struct RemoteTimelineClient {
|
||||
pub struct RemoteTimelineClient {
|
||||
conf: &'static PageServerConf,
|
||||
|
||||
runtime: tokio::runtime::Handle,
|
||||
@@ -371,9 +338,6 @@ pub(crate) struct RemoteTimelineClient {
|
||||
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
|
||||
/// Subset of tenant configuration used to control upload behaviors during migrations
|
||||
config: std::sync::RwLock<RemoteTimelineClientConfig>,
|
||||
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
@@ -384,14 +348,13 @@ impl RemoteTimelineClient {
|
||||
/// Note: the caller must initialize the upload queue before any uploads can be scheduled,
|
||||
/// by calling init_upload_queue.
|
||||
///
|
||||
pub(crate) fn new(
|
||||
pub fn new(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
conf: &'static PageServerConf,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
generation: Generation,
|
||||
location_conf: &AttachedLocationConfig,
|
||||
) -> RemoteTimelineClient {
|
||||
RemoteTimelineClient {
|
||||
conf,
|
||||
@@ -411,7 +374,6 @@ impl RemoteTimelineClient {
|
||||
&tenant_shard_id,
|
||||
&timeline_id,
|
||||
)),
|
||||
config: std::sync::RwLock::new(RemoteTimelineClientConfig::from(location_conf)),
|
||||
cancel: CancellationToken::new(),
|
||||
}
|
||||
}
|
||||
@@ -467,43 +429,6 @@ impl RemoteTimelineClient {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Notify this client of a change to its parent tenant's config, as this may cause us to
|
||||
/// take action (unblocking deletions when transitioning from AttachedMulti to AttachedSingle)
|
||||
pub(super) fn update_config(&self, location_conf: &AttachedLocationConfig) {
|
||||
let new_conf = RemoteTimelineClientConfig::from(location_conf);
|
||||
let unblocked = !new_conf.block_deletions;
|
||||
|
||||
// Update config before draining deletions, so that we don't race with more being
|
||||
// inserted. This can result in deletions happening our of order, but that does not
|
||||
// violate any invariants: deletions only need to be ordered relative to upload of the index
|
||||
// that dereferences the deleted objects, and we are not changing that order.
|
||||
*self.config.write().unwrap() = new_conf;
|
||||
|
||||
if unblocked {
|
||||
// If we may now delete layers, drain any that were blocked in our old
|
||||
// configuration state
|
||||
let mut queue_locked = self.upload_queue.lock().unwrap();
|
||||
|
||||
if let Ok(queue) = queue_locked.initialized_mut() {
|
||||
let blocked_deletions = std::mem::take(&mut queue.blocked_deletions);
|
||||
for d in blocked_deletions {
|
||||
if let Err(e) = self.deletion_queue_client.push_layers_sync(
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.generation,
|
||||
d.layers,
|
||||
) {
|
||||
// This could happen if the pageserver is shut down while a tenant
|
||||
// is transitioning from a deletion-blocked state: we will leak some
|
||||
// S3 objects in this case.
|
||||
warn!("Failed to drain blocked deletions: {}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `None` if nothing is yet uplodaded, `Some(disk_consistent_lsn)` otherwise.
|
||||
pub fn remote_consistent_lsn_projected(&self) -> Option<Lsn> {
|
||||
match &mut *self.upload_queue.lock().unwrap() {
|
||||
@@ -814,18 +739,6 @@ impl RemoteTimelineClient {
|
||||
Ok(need_wait)
|
||||
}
|
||||
|
||||
/// Launch an index-file upload operation in the background, setting `import_pgdata` field.
|
||||
pub(crate) fn schedule_index_upload_for_import_pgdata_state_update(
|
||||
self: &Arc<Self>,
|
||||
state: Option<import_pgdata::index_part_format::Root>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
upload_queue.dirty.import_pgdata = state;
|
||||
self.schedule_index_upload(upload_queue)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Launch an index-file upload operation in the background, if necessary.
|
||||
///
|
||||
@@ -1103,7 +1016,7 @@ impl RemoteTimelineClient {
|
||||
"scheduled layer file upload {layer}",
|
||||
);
|
||||
|
||||
let op = UploadOp::UploadLayer(layer, metadata, None);
|
||||
let op = UploadOp::UploadLayer(layer, metadata);
|
||||
self.metric_begin(&op);
|
||||
upload_queue.queued_operations.push_back(op);
|
||||
}
|
||||
@@ -1818,7 +1731,7 @@ impl RemoteTimelineClient {
|
||||
// have finished.
|
||||
upload_queue.inprogress_tasks.is_empty()
|
||||
}
|
||||
UploadOp::Delete(..) => {
|
||||
UploadOp::Delete(_) => {
|
||||
// Wait for preceding uploads to finish. Concurrent deletions are OK, though.
|
||||
upload_queue.num_inprogress_deletions == upload_queue.inprogress_tasks.len()
|
||||
}
|
||||
@@ -1846,32 +1759,19 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
|
||||
// We can launch this task. Remove it from the queue first.
|
||||
let mut next_op = upload_queue.queued_operations.pop_front().unwrap();
|
||||
let next_op = upload_queue.queued_operations.pop_front().unwrap();
|
||||
|
||||
debug!("starting op: {}", next_op);
|
||||
|
||||
// Update the counters and prepare
|
||||
match &mut next_op {
|
||||
UploadOp::UploadLayer(layer, meta, mode) => {
|
||||
if upload_queue
|
||||
.recently_deleted
|
||||
.remove(&(layer.layer_desc().layer_name().clone(), meta.generation))
|
||||
{
|
||||
*mode = Some(OpType::FlushDeletion);
|
||||
} else {
|
||||
*mode = Some(OpType::MayReorder)
|
||||
}
|
||||
// Update the counters
|
||||
match next_op {
|
||||
UploadOp::UploadLayer(_, _) => {
|
||||
upload_queue.num_inprogress_layer_uploads += 1;
|
||||
}
|
||||
UploadOp::UploadMetadata { .. } => {
|
||||
upload_queue.num_inprogress_metadata_uploads += 1;
|
||||
}
|
||||
UploadOp::Delete(Delete { layers }) => {
|
||||
for (name, meta) in layers {
|
||||
upload_queue
|
||||
.recently_deleted
|
||||
.insert((name.clone(), meta.generation));
|
||||
}
|
||||
UploadOp::Delete(_) => {
|
||||
upload_queue.num_inprogress_deletions += 1;
|
||||
}
|
||||
UploadOp::Barrier(sender) => {
|
||||
@@ -1947,66 +1847,7 @@ impl RemoteTimelineClient {
|
||||
}
|
||||
|
||||
let upload_result: anyhow::Result<()> = match &task.op {
|
||||
UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
|
||||
if let Some(OpType::FlushDeletion) = mode {
|
||||
if self.config.read().unwrap().block_deletions {
|
||||
// Of course, this is not efficient... but usually the queue should be empty.
|
||||
let mut queue_locked = self.upload_queue.lock().unwrap();
|
||||
let mut detected = false;
|
||||
if let Ok(queue) = queue_locked.initialized_mut() {
|
||||
for list in queue.blocked_deletions.iter_mut() {
|
||||
list.layers.retain(|(name, meta)| {
|
||||
if name == &layer.layer_desc().layer_name()
|
||||
&& meta.generation == layer_metadata.generation
|
||||
{
|
||||
detected = true;
|
||||
// remove the layer from deletion queue
|
||||
false
|
||||
} else {
|
||||
// keep the layer
|
||||
true
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
if detected {
|
||||
info!(
|
||||
"cancelled blocked deletion of layer {} at gen {:?}",
|
||||
layer.layer_desc().layer_name(),
|
||||
layer_metadata.generation
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// TODO: we did not guarantee that upload task starts after deletion task, so there could be possibly race conditions
|
||||
// that we still get the layer deleted. But this only happens if someone creates a layer immediately after it's deleted,
|
||||
// which is not possible in the current system.
|
||||
info!(
|
||||
"waiting for deletion queue flush to complete before uploading layer {} at gen {:?}",
|
||||
layer.layer_desc().layer_name(),
|
||||
layer_metadata.generation
|
||||
);
|
||||
{
|
||||
// We are going to flush, we can clean up the recently deleted list.
|
||||
let mut queue_locked = self.upload_queue.lock().unwrap();
|
||||
if let Ok(queue) = queue_locked.initialized_mut() {
|
||||
queue.recently_deleted.clear();
|
||||
}
|
||||
}
|
||||
if let Err(e) = self.deletion_queue_client.flush_execute().await {
|
||||
warn!(
|
||||
"failed to flush the deletion queue before uploading layer {} at gen {:?}, still proceeding to upload: {e:#} ",
|
||||
layer.layer_desc().layer_name(),
|
||||
layer_metadata.generation
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"done flushing deletion queue before uploading layer {} at gen {:?}",
|
||||
layer.layer_desc().layer_name(),
|
||||
layer_metadata.generation
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
UploadOp::UploadLayer(ref layer, ref layer_metadata) => {
|
||||
let local_path = layer.local_path();
|
||||
|
||||
// We should only be uploading layers created by this `Tenant`'s lifetime, so
|
||||
@@ -2071,24 +1912,16 @@ impl RemoteTimelineClient {
|
||||
res
|
||||
}
|
||||
UploadOp::Delete(delete) => {
|
||||
if self.config.read().unwrap().block_deletions {
|
||||
let mut queue_locked = self.upload_queue.lock().unwrap();
|
||||
if let Ok(queue) = queue_locked.initialized_mut() {
|
||||
queue.blocked_deletions.push(delete.clone());
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
pausable_failpoint!("before-delete-layer-pausable");
|
||||
self.deletion_queue_client
|
||||
.push_layers(
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.generation,
|
||||
delete.layers.clone(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!(e))
|
||||
}
|
||||
pausable_failpoint!("before-delete-layer-pausable");
|
||||
self.deletion_queue_client
|
||||
.push_layers(
|
||||
self.tenant_shard_id,
|
||||
self.timeline_id,
|
||||
self.generation,
|
||||
delete.layers.clone(),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| anyhow::anyhow!(e))
|
||||
}
|
||||
unexpected @ UploadOp::Barrier(_) | unexpected @ UploadOp::Shutdown => {
|
||||
// unreachable. Barrier operations are handled synchronously in
|
||||
@@ -2170,7 +2003,7 @@ impl RemoteTimelineClient {
|
||||
upload_queue.inprogress_tasks.remove(&task.task_id);
|
||||
|
||||
let lsn_update = match task.op {
|
||||
UploadOp::UploadLayer(_, _, _) => {
|
||||
UploadOp::UploadLayer(_, _) => {
|
||||
upload_queue.num_inprogress_layer_uploads -= 1;
|
||||
None
|
||||
}
|
||||
@@ -2195,16 +2028,8 @@ impl RemoteTimelineClient {
|
||||
// Legacy mode: skip validating generation
|
||||
upload_queue.visible_remote_consistent_lsn.store(lsn);
|
||||
None
|
||||
} else if self
|
||||
.config
|
||||
.read()
|
||||
.unwrap()
|
||||
.process_remote_consistent_lsn_updates
|
||||
{
|
||||
Some((lsn, upload_queue.visible_remote_consistent_lsn.clone()))
|
||||
} else {
|
||||
// Our config disables remote_consistent_lsn updates: drop it.
|
||||
None
|
||||
Some((lsn, upload_queue.visible_remote_consistent_lsn.clone()))
|
||||
}
|
||||
}
|
||||
UploadOp::Delete(_) => {
|
||||
@@ -2247,7 +2072,7 @@ impl RemoteTimelineClient {
|
||||
)> {
|
||||
use RemoteTimelineClientMetricsCallTrackSize::DontTrackSize;
|
||||
let res = match op {
|
||||
UploadOp::UploadLayer(_, m, _) => (
|
||||
UploadOp::UploadLayer(_, m) => (
|
||||
RemoteOpFileKind::Layer,
|
||||
RemoteOpKind::Upload,
|
||||
RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size),
|
||||
@@ -2341,10 +2166,8 @@ impl RemoteTimelineClient {
|
||||
queued_operations: VecDeque::default(),
|
||||
#[cfg(feature = "testing")]
|
||||
dangling_files: HashMap::default(),
|
||||
blocked_deletions: Vec::new(),
|
||||
shutting_down: false,
|
||||
shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
|
||||
recently_deleted: HashSet::new(),
|
||||
};
|
||||
|
||||
let upload_queue = std::mem::replace(
|
||||
@@ -2408,28 +2231,6 @@ impl RemoteTimelineClient {
|
||||
UploadQueue::Initialized(x) => x.no_pending_work(),
|
||||
}
|
||||
}
|
||||
|
||||
/// 'foreign' in the sense that it does not belong to this tenant shard. This method
|
||||
/// is used during GC for other shards to get the index of shard zero.
|
||||
pub(crate) async fn download_foreign_index(
|
||||
&self,
|
||||
shard_number: ShardNumber,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(IndexPart, Generation, std::time::SystemTime), DownloadError> {
|
||||
let foreign_shard_id = TenantShardId {
|
||||
shard_number,
|
||||
shard_count: self.tenant_shard_id.shard_count,
|
||||
tenant_id: self.tenant_shard_id.tenant_id,
|
||||
};
|
||||
download_index_part(
|
||||
&self.storage_impl,
|
||||
&foreign_shard_id,
|
||||
&self.timeline_id,
|
||||
Generation::MAX,
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct UploadQueueAccessor<'a> {
|
||||
@@ -2578,7 +2379,6 @@ mod tests {
|
||||
use crate::{
|
||||
context::RequestContext,
|
||||
tenant::{
|
||||
config::AttachmentMode,
|
||||
harness::{TenantHarness, TIMELINE_ID},
|
||||
storage_layer::layer::local_layer_path,
|
||||
Tenant, Timeline,
|
||||
@@ -2664,10 +2464,6 @@ mod tests {
|
||||
|
||||
/// Construct a RemoteTimelineClient in an arbitrary generation
|
||||
fn build_client(&self, generation: Generation) -> Arc<RemoteTimelineClient> {
|
||||
let location_conf = AttachedLocationConfig {
|
||||
generation,
|
||||
attach_mode: AttachmentMode::Single,
|
||||
};
|
||||
Arc::new(RemoteTimelineClient {
|
||||
conf: self.harness.conf,
|
||||
runtime: tokio::runtime::Handle::current(),
|
||||
@@ -2681,7 +2477,6 @@ mod tests {
|
||||
&self.harness.tenant_shard_id,
|
||||
&TIMELINE_ID,
|
||||
)),
|
||||
config: std::sync::RwLock::new(RemoteTimelineClientConfig::from(&location_conf)),
|
||||
cancel: CancellationToken::new(),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -706,7 +706,7 @@ where
|
||||
.and_then(|x| x)
|
||||
}
|
||||
|
||||
pub(crate) async fn download_retry_forever<T, O, F>(
|
||||
async fn download_retry_forever<T, O, F>(
|
||||
op: O,
|
||||
description: &str,
|
||||
cancel: &CancellationToken,
|
||||
|
||||
@@ -12,7 +12,6 @@ use utils::id::TimelineId;
|
||||
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
use crate::tenant::storage_layer::LayerName;
|
||||
use crate::tenant::timeline::import_pgdata;
|
||||
use crate::tenant::Generation;
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
|
||||
@@ -38,13 +37,6 @@ pub struct IndexPart {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub archived_at: Option<NaiveDateTime>,
|
||||
|
||||
/// This field supports import-from-pgdata ("fast imports" platform feature).
|
||||
/// We don't currently use fast imports, so, this field is None for all production timelines.
|
||||
/// See <https://github.com/neondatabase/neon/pull/9218> for more information.
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub import_pgdata: Option<import_pgdata::index_part_format::Root>,
|
||||
|
||||
/// Per layer file name metadata, which can be present for a present or missing layer file.
|
||||
///
|
||||
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
|
||||
@@ -98,11 +90,10 @@ impl IndexPart {
|
||||
/// - 7: metadata_bytes is no longer written, but still read
|
||||
/// - 8: added `archived_at`
|
||||
/// - 9: +gc_blocking
|
||||
/// - 10: +import_pgdata
|
||||
const LATEST_VERSION: usize = 10;
|
||||
const LATEST_VERSION: usize = 9;
|
||||
|
||||
// Versions we may see when reading from a bucket.
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
|
||||
pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9];
|
||||
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
@@ -117,7 +108,6 @@ impl IndexPart {
|
||||
lineage: Default::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -391,7 +381,6 @@ mod tests {
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -436,7 +425,6 @@ mod tests {
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -482,7 +470,6 @@ mod tests {
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -531,7 +518,6 @@ mod tests {
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap();
|
||||
@@ -575,7 +561,6 @@ mod tests {
|
||||
lineage: Lineage::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -622,7 +607,6 @@ mod tests {
|
||||
},
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -674,7 +658,6 @@ mod tests {
|
||||
},
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Some(AuxFilePolicy::V2),
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -731,7 +714,6 @@ mod tests {
|
||||
lineage: Default::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Default::default(),
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -789,7 +771,6 @@ mod tests {
|
||||
lineage: Default::default(),
|
||||
gc_blocking: None,
|
||||
last_aux_file_policy: Default::default(),
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
@@ -852,83 +833,6 @@ mod tests {
|
||||
}),
|
||||
last_aux_file_policy: Default::default(),
|
||||
archived_at: None,
|
||||
import_pgdata: None,
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v10_importpgdata_is_parsed() {
|
||||
let example = r#"{
|
||||
"version": 10,
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata": {
|
||||
"disk_consistent_lsn": "0/16960E8",
|
||||
"prev_record_lsn": "0/1696070",
|
||||
"ancestor_timeline": "e45a7f37d3ee2ff17dc14bf4f4e3f52e",
|
||||
"ancestor_lsn": "0/0",
|
||||
"latest_gc_cutoff_lsn": "0/1696070",
|
||||
"initdb_lsn": "0/1696070",
|
||||
"pg_version": 14
|
||||
},
|
||||
"gc_blocking": {
|
||||
"started_at": "2024-07-19T09:00:00.123",
|
||||
"reasons": ["DetachAncestor"]
|
||||
},
|
||||
"import_pgdata": {
|
||||
"V1": {
|
||||
"Done": {
|
||||
"idempotency_key": "specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5",
|
||||
"started_at": "2024-11-13T09:23:42.123",
|
||||
"finished_at": "2024-11-13T09:42:23.123"
|
||||
}
|
||||
}
|
||||
}
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 10,
|
||||
layer_metadata: HashMap::from([
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 25600000,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
}),
|
||||
("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), LayerFileMetadata {
|
||||
file_size: 9007199254741001,
|
||||
generation: Generation::none(),
|
||||
shard: ShardIndex::unsharded()
|
||||
})
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata: TimelineMetadata::new(
|
||||
Lsn::from_str("0/16960E8").unwrap(),
|
||||
Some(Lsn::from_str("0/1696070").unwrap()),
|
||||
Some(TimelineId::from_str("e45a7f37d3ee2ff17dc14bf4f4e3f52e").unwrap()),
|
||||
Lsn::INVALID,
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
Lsn::from_str("0/1696070").unwrap(),
|
||||
14,
|
||||
).with_recalculated_checksum().unwrap(),
|
||||
deleted_at: None,
|
||||
lineage: Default::default(),
|
||||
gc_blocking: Some(GcBlocking {
|
||||
started_at: parse_naive_datetime("2024-07-19T09:00:00.123000000"),
|
||||
reasons: enumset::EnumSet::from_iter([GcBlockingReason::DetachAncestor]),
|
||||
}),
|
||||
last_aux_file_policy: Default::default(),
|
||||
archived_at: None,
|
||||
import_pgdata: Some(import_pgdata::index_part_format::Root::V1(import_pgdata::index_part_format::V1::Done(import_pgdata::index_part_format::Done{
|
||||
started_at: parse_naive_datetime("2024-11-13T09:23:42.123000000"),
|
||||
finished_at: parse_naive_datetime("2024-11-13T09:42:23.123000000"),
|
||||
idempotency_key: import_pgdata::index_part_format::IdempotencyKey::new("specified-by-client-218a5213-5044-4562-a28d-d024c5f057f5".to_string()),
|
||||
})))
|
||||
};
|
||||
|
||||
let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
|
||||
|
||||
@@ -111,6 +111,15 @@ pub(crate) struct SecondaryTenant {
|
||||
pub(super) heatmap_total_size_metric: UIntGauge,
|
||||
}
|
||||
|
||||
impl Drop for SecondaryTenant {
|
||||
fn drop(&mut self) {
|
||||
let tenant_id = self.tenant_shard_id.tenant_id.to_string();
|
||||
let shard_id = format!("{}", self.tenant_shard_id.shard_slug());
|
||||
let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
|
||||
let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
|
||||
}
|
||||
}
|
||||
|
||||
impl SecondaryTenant {
|
||||
pub(crate) fn new(
|
||||
tenant_shard_id: TenantShardId,
|
||||
@@ -158,13 +167,6 @@ impl SecondaryTenant {
|
||||
|
||||
// Wait for any secondary downloader work to complete
|
||||
self.gate.close().await;
|
||||
|
||||
self.validate_metrics();
|
||||
|
||||
let tenant_id = self.tenant_shard_id.tenant_id.to_string();
|
||||
let shard_id = format!("{}", self.tenant_shard_id.shard_slug());
|
||||
let _ = SECONDARY_RESIDENT_PHYSICAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
|
||||
let _ = SECONDARY_HEATMAP_TOTAL_SIZE.remove_label_values(&[&tenant_id, &shard_id]);
|
||||
}
|
||||
|
||||
pub(crate) fn set_config(&self, config: &SecondaryLocationConfig) {
|
||||
@@ -252,20 +254,6 @@ impl SecondaryTenant {
|
||||
.await
|
||||
.expect("secondary eviction should not have panicked");
|
||||
}
|
||||
|
||||
/// Exhaustive check that incrementally updated metrics match the actual state.
|
||||
#[cfg(feature = "testing")]
|
||||
fn validate_metrics(&self) {
|
||||
let detail = self.detail.lock().unwrap();
|
||||
let resident_size = detail.total_resident_size();
|
||||
|
||||
assert_eq!(resident_size, self.resident_size_metric.get());
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "testing"))]
|
||||
fn validate_metrics(&self) {
|
||||
// No-op in non-testing builds
|
||||
}
|
||||
}
|
||||
|
||||
/// The SecondaryController is a pseudo-rpc client for administrative control of secondary mode downloads,
|
||||
|
||||
@@ -242,19 +242,6 @@ impl SecondaryDetail {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub(crate) fn total_resident_size(&self) -> u64 {
|
||||
self.timelines
|
||||
.values()
|
||||
.map(|tl| {
|
||||
tl.on_disk_layers
|
||||
.values()
|
||||
.map(|v| v.metadata.file_size)
|
||||
.sum::<u64>()
|
||||
})
|
||||
.sum::<u64>()
|
||||
}
|
||||
|
||||
pub(super) fn evict_layer(
|
||||
&mut self,
|
||||
name: LayerName,
|
||||
@@ -776,7 +763,24 @@ impl<'a> TenantDownloader<'a> {
|
||||
}
|
||||
|
||||
// Metrics consistency check in testing builds
|
||||
self.secondary_state.validate_metrics();
|
||||
if cfg!(feature = "testing") {
|
||||
let detail = self.secondary_state.detail.lock().unwrap();
|
||||
let resident_size = detail
|
||||
.timelines
|
||||
.values()
|
||||
.map(|tl| {
|
||||
tl.on_disk_layers
|
||||
.values()
|
||||
.map(|v| v.metadata.file_size)
|
||||
.sum::<u64>()
|
||||
})
|
||||
.sum::<u64>();
|
||||
assert_eq!(
|
||||
resident_size,
|
||||
self.secondary_state.resident_size_metric.get()
|
||||
);
|
||||
}
|
||||
|
||||
// Only update last_etag after a full successful download: this way will not skip
|
||||
// the next download, even if the heatmap's actual etag is unchanged.
|
||||
self.secondary_state.detail.lock().unwrap().last_download = Some(DownloadSummary {
|
||||
|
||||
@@ -4,7 +4,6 @@ pub mod delete;
|
||||
pub(crate) mod detach_ancestor;
|
||||
mod eviction_task;
|
||||
pub(crate) mod handle;
|
||||
pub(crate) mod import_pgdata;
|
||||
mod init;
|
||||
pub mod layer_manager;
|
||||
pub(crate) mod logical_size;
|
||||
@@ -39,7 +38,6 @@ use pageserver_api::{
|
||||
shard::{ShardIdentity, ShardNumber, TenantShardId},
|
||||
};
|
||||
use rand::Rng;
|
||||
use remote_storage::DownloadError;
|
||||
use serde_with::serde_as;
|
||||
use storage_broker::BrokerClientChannel;
|
||||
use tokio::{
|
||||
@@ -274,7 +272,7 @@ pub struct Timeline {
|
||||
|
||||
/// Remote storage client.
|
||||
/// See [`remote_timeline_client`](super::remote_timeline_client) module comment for details.
|
||||
pub(crate) remote_client: Arc<RemoteTimelineClient>,
|
||||
pub remote_client: Arc<RemoteTimelineClient>,
|
||||
|
||||
// What page versions do we hold in the repository? If we get a
|
||||
// request > last_record_lsn, we need to wait until we receive all
|
||||
@@ -483,27 +481,17 @@ impl GcInfo {
|
||||
&mut self,
|
||||
child_id: TimelineId,
|
||||
maybe_offloaded: MaybeOffloaded,
|
||||
) -> bool {
|
||||
// Remove at most one element. Needed for correctness if there is two live `Timeline` objects referencing
|
||||
// the same timeline. Shouldn't but maybe can occur when Arc's live longer than intended.
|
||||
let mut removed = false;
|
||||
self.retain_lsns.retain(|i| {
|
||||
if removed {
|
||||
return true;
|
||||
}
|
||||
let remove = i.1 == child_id && i.2 == maybe_offloaded;
|
||||
removed |= remove;
|
||||
!remove
|
||||
});
|
||||
removed
|
||||
) {
|
||||
self.retain_lsns
|
||||
.retain(|i| !(i.1 == child_id && i.2 == maybe_offloaded));
|
||||
}
|
||||
|
||||
pub(super) fn remove_child_not_offloaded(&mut self, child_id: TimelineId) -> bool {
|
||||
self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::No)
|
||||
pub(super) fn remove_child_not_offloaded(&mut self, child_id: TimelineId) {
|
||||
self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::No);
|
||||
}
|
||||
|
||||
pub(super) fn remove_child_offloaded(&mut self, child_id: TimelineId) -> bool {
|
||||
self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::Yes)
|
||||
pub(super) fn remove_child_offloaded(&mut self, child_id: TimelineId) {
|
||||
self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::Yes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -776,21 +764,6 @@ pub(crate) enum CompactFlags {
|
||||
DryRun,
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Clone, serde::Deserialize)]
|
||||
pub(crate) struct CompactRange {
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pub start: Key,
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pub end: Key,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub(crate) struct CompactOptions {
|
||||
pub flags: EnumSet<CompactFlags>,
|
||||
pub compact_range: Option<CompactRange>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Timeline {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "Timeline<{}>", self.timeline_id)
|
||||
@@ -1629,25 +1602,6 @@ impl Timeline {
|
||||
cancel: &CancellationToken,
|
||||
flags: EnumSet<CompactFlags>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, CompactionError> {
|
||||
self.compact_with_options(
|
||||
cancel,
|
||||
CompactOptions {
|
||||
flags,
|
||||
compact_range: None,
|
||||
},
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending
|
||||
/// compaction tasks.
|
||||
pub(crate) async fn compact_with_options(
|
||||
self: &Arc<Self>,
|
||||
cancel: &CancellationToken,
|
||||
options: CompactOptions,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, CompactionError> {
|
||||
// most likely the cancellation token is from background task, but in tests it could be the
|
||||
// request task as well.
|
||||
@@ -1685,7 +1639,7 @@ impl Timeline {
|
||||
self.compact_tiered(cancel, ctx).await?;
|
||||
Ok(false)
|
||||
}
|
||||
CompactionAlgorithm::Legacy => self.compact_legacy(cancel, options, ctx).await,
|
||||
CompactionAlgorithm::Legacy => self.compact_legacy(cancel, flags, ctx).await,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2086,11 +2040,6 @@ impl Timeline {
|
||||
.unwrap_or(self.conf.default_tenant_conf.lsn_lease_length_for_ts)
|
||||
}
|
||||
|
||||
pub(crate) fn is_gc_blocked_by_lsn_lease_deadline(&self) -> bool {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf.is_gc_blocked_by_lsn_lease_deadline()
|
||||
}
|
||||
|
||||
pub(crate) fn get_lazy_slru_download(&self) -> bool {
|
||||
let tenant_conf = self.tenant_conf.load();
|
||||
tenant_conf
|
||||
@@ -2178,14 +2127,14 @@ impl Timeline {
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn tenant_conf_updated(&self, new_conf: &AttachedTenantConf) {
|
||||
pub(super) fn tenant_conf_updated(&self, new_conf: &TenantConfOpt) {
|
||||
// NB: Most tenant conf options are read by background loops, so,
|
||||
// changes will automatically be picked up.
|
||||
|
||||
// The threshold is embedded in the metric. So, we need to update it.
|
||||
{
|
||||
let new_threshold = Self::get_evictions_low_residence_duration_metric_threshold(
|
||||
&new_conf.tenant_conf,
|
||||
new_conf,
|
||||
&self.conf.default_tenant_conf,
|
||||
);
|
||||
|
||||
@@ -2193,9 +2142,6 @@ impl Timeline {
|
||||
let shard_id_str = format!("{}", self.tenant_shard_id.shard_slug());
|
||||
|
||||
let timeline_id_str = self.timeline_id.to_string();
|
||||
|
||||
self.remote_client.update_config(&new_conf.location);
|
||||
|
||||
self.metrics
|
||||
.evictions_with_low_residence_duration
|
||||
.write()
|
||||
@@ -2653,7 +2599,6 @@ impl Timeline {
|
||||
//
|
||||
// NB: generation numbers naturally protect against this because they disambiguate
|
||||
// (1) and (4)
|
||||
// TODO: this is basically a no-op now, should we remove it?
|
||||
self.remote_client.schedule_barrier()?;
|
||||
// Tenant::create_timeline will wait for these uploads to happen before returning, or
|
||||
// on retry.
|
||||
@@ -2709,23 +2654,20 @@ impl Timeline {
|
||||
{
|
||||
Some(cancel) => cancel.cancel(),
|
||||
None => {
|
||||
match self.current_state() {
|
||||
TimelineState::Broken { .. } | TimelineState::Stopping => {
|
||||
// Can happen when timeline detail endpoint is used when deletion is ongoing (or its broken).
|
||||
// Don't make noise.
|
||||
}
|
||||
TimelineState::Loading => {
|
||||
// Import does not return an activated timeline.
|
||||
info!("discarding priority boost for logical size calculation because timeline is not yet active");
|
||||
}
|
||||
TimelineState::Active => {
|
||||
// activation should be setting the once cell
|
||||
warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
|
||||
debug_assert!(false);
|
||||
}
|
||||
let state = self.current_state();
|
||||
if matches!(
|
||||
state,
|
||||
TimelineState::Broken { .. } | TimelineState::Stopping
|
||||
) {
|
||||
|
||||
// Can happen when timeline detail endpoint is used when deletion is ongoing (or its broken).
|
||||
// Don't make noise.
|
||||
} else {
|
||||
warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work");
|
||||
debug_assert!(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4572,10 +4514,7 @@ impl Drop for Timeline {
|
||||
// This lock should never be poisoned, but in case it is we do a .map() instead of
|
||||
// an unwrap(), to avoid panicking in a destructor and thereby aborting the process.
|
||||
if let Ok(mut gc_info) = ancestor.gc_info.write() {
|
||||
if !gc_info.remove_child_not_offloaded(self.timeline_id) {
|
||||
tracing::error!(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id,
|
||||
"Couldn't remove retain_lsn entry from offloaded timeline's parent: already removed");
|
||||
}
|
||||
gc_info.remove_child_not_offloaded(self.timeline_id)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4835,86 +4774,6 @@ impl Timeline {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn find_gc_time_cutoff(
|
||||
&self,
|
||||
pitr: Duration,
|
||||
cancel: &CancellationToken,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<Option<Lsn>, PageReconstructError> {
|
||||
debug_assert_current_span_has_tenant_and_timeline_id();
|
||||
if self.shard_identity.is_shard_zero() {
|
||||
// Shard Zero has SLRU data and can calculate the PITR time -> LSN mapping itself
|
||||
let now = SystemTime::now();
|
||||
let time_range = if pitr == Duration::ZERO {
|
||||
humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid")
|
||||
} else {
|
||||
pitr
|
||||
};
|
||||
|
||||
// If PITR is so large or `now` is so small that this underflows, we will retain no history (highly unexpected case)
|
||||
let time_cutoff = now.checked_sub(time_range).unwrap_or(now);
|
||||
let timestamp = to_pg_timestamp(time_cutoff);
|
||||
|
||||
let time_cutoff = match self.find_lsn_for_timestamp(timestamp, cancel, ctx).await? {
|
||||
LsnForTimestamp::Present(lsn) => Some(lsn),
|
||||
LsnForTimestamp::Future(lsn) => {
|
||||
// The timestamp is in the future. That sounds impossible,
|
||||
// but what it really means is that there hasn't been
|
||||
// any commits since the cutoff timestamp.
|
||||
//
|
||||
// In this case we should use the LSN of the most recent commit,
|
||||
// which is implicitly the last LSN in the log.
|
||||
debug!("future({})", lsn);
|
||||
Some(self.get_last_record_lsn())
|
||||
}
|
||||
LsnForTimestamp::Past(lsn) => {
|
||||
debug!("past({})", lsn);
|
||||
None
|
||||
}
|
||||
LsnForTimestamp::NoData(lsn) => {
|
||||
debug!("nodata({})", lsn);
|
||||
None
|
||||
}
|
||||
};
|
||||
Ok(time_cutoff)
|
||||
} else {
|
||||
// Shards other than shard zero cannot do timestamp->lsn lookups, and must instead learn their GC cutoff
|
||||
// from shard zero's index. The index doesn't explicitly tell us the time cutoff, but we may assume that
|
||||
// the point up to which shard zero's last_gc_cutoff has advanced will either be the time cutoff, or a
|
||||
// space cutoff that we would also have respected ourselves.
|
||||
match self
|
||||
.remote_client
|
||||
.download_foreign_index(ShardNumber(0), cancel)
|
||||
.await
|
||||
{
|
||||
Ok((index_part, index_generation, _index_mtime)) => {
|
||||
tracing::info!("GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}",
|
||||
index_part.metadata.latest_gc_cutoff_lsn());
|
||||
Ok(Some(index_part.metadata.latest_gc_cutoff_lsn()))
|
||||
}
|
||||
Err(DownloadError::NotFound) => {
|
||||
// This is unexpected, because during timeline creations shard zero persists to remote
|
||||
// storage before other shards are called, and during timeline deletion non-zeroth shards are
|
||||
// deleted before the zeroth one. However, it should be harmless: if we somehow end up in this
|
||||
// state, then shard zero should _eventually_ write an index when it GCs.
|
||||
tracing::warn!("GC couldn't find shard zero's index for timeline");
|
||||
Ok(None)
|
||||
}
|
||||
Err(e) => {
|
||||
// TODO: this function should return a different error type than page reconstruct error
|
||||
Err(PageReconstructError::Other(anyhow::anyhow!(e)))
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: after reading shard zero's GC cutoff, we should validate its generation with the storage
|
||||
// controller. Otherwise, it is possible that we see the GC cutoff go backwards while shard zero
|
||||
// is going through a migration if we read the old location's index and it has GC'd ahead of the
|
||||
// new location. This is legal in principle, but problematic in practice because it might result
|
||||
// in a timeline creation succeeding on shard zero ('s new location) but then failing on other shards
|
||||
// because they have GC'd past the branch point.
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the Lsns above which layer files need to be retained on
|
||||
/// garbage collection.
|
||||
///
|
||||
@@ -4957,7 +4816,40 @@ impl Timeline {
|
||||
// - if PITR interval is set, then this is our cutoff.
|
||||
// - if PITR interval is not set, then we do a lookup
|
||||
// based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases.
|
||||
let time_cutoff = self.find_gc_time_cutoff(pitr, cancel, ctx).await?;
|
||||
let time_cutoff = {
|
||||
let now = SystemTime::now();
|
||||
let time_range = if pitr == Duration::ZERO {
|
||||
humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid")
|
||||
} else {
|
||||
pitr
|
||||
};
|
||||
|
||||
// If PITR is so large or `now` is so small that this underflows, we will retain no history (highly unexpected case)
|
||||
let time_cutoff = now.checked_sub(time_range).unwrap_or(now);
|
||||
let timestamp = to_pg_timestamp(time_cutoff);
|
||||
|
||||
match self.find_lsn_for_timestamp(timestamp, cancel, ctx).await? {
|
||||
LsnForTimestamp::Present(lsn) => Some(lsn),
|
||||
LsnForTimestamp::Future(lsn) => {
|
||||
// The timestamp is in the future. That sounds impossible,
|
||||
// but what it really means is that there hasn't been
|
||||
// any commits since the cutoff timestamp.
|
||||
//
|
||||
// In this case we should use the LSN of the most recent commit,
|
||||
// which is implicitly the last LSN in the log.
|
||||
debug!("future({})", lsn);
|
||||
Some(self.get_last_record_lsn())
|
||||
}
|
||||
LsnForTimestamp::Past(lsn) => {
|
||||
debug!("past({})", lsn);
|
||||
None
|
||||
}
|
||||
LsnForTimestamp::NoData(lsn) => {
|
||||
debug!("nodata({})", lsn);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(match (pitr, time_cutoff) {
|
||||
(Duration::ZERO, Some(time_cutoff)) => {
|
||||
|
||||
@@ -10,7 +10,7 @@ use std::sync::Arc;
|
||||
|
||||
use super::layer_manager::LayerManager;
|
||||
use super::{
|
||||
CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode,
|
||||
CompactFlags, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode,
|
||||
RecordedDuration, Timeline,
|
||||
};
|
||||
|
||||
@@ -273,32 +273,22 @@ impl Timeline {
|
||||
pub(crate) async fn compact_legacy(
|
||||
self: &Arc<Self>,
|
||||
cancel: &CancellationToken,
|
||||
options: CompactOptions,
|
||||
flags: EnumSet<CompactFlags>,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<bool, CompactionError> {
|
||||
if options
|
||||
.flags
|
||||
.contains(CompactFlags::EnhancedGcBottomMostCompaction)
|
||||
{
|
||||
self.compact_with_gc(cancel, options, ctx)
|
||||
if flags.contains(CompactFlags::EnhancedGcBottomMostCompaction) {
|
||||
self.compact_with_gc(cancel, flags, ctx)
|
||||
.await
|
||||
.map_err(CompactionError::Other)?;
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
if options.flags.contains(CompactFlags::DryRun) {
|
||||
if flags.contains(CompactFlags::DryRun) {
|
||||
return Err(CompactionError::Other(anyhow!(
|
||||
"dry-run mode is not supported for legacy compaction for now"
|
||||
)));
|
||||
}
|
||||
|
||||
if options.compact_range.is_some() {
|
||||
// maybe useful in the future? could implement this at some point
|
||||
return Err(CompactionError::Other(anyhow!(
|
||||
"compaction range is not supported for legacy compaction for now"
|
||||
)));
|
||||
}
|
||||
|
||||
// High level strategy for compaction / image creation:
|
||||
//
|
||||
// 1. First, calculate the desired "partitioning" of the
|
||||
@@ -348,7 +338,7 @@ impl Timeline {
|
||||
.repartition(
|
||||
self.get_last_record_lsn(),
|
||||
self.get_compaction_target_size(),
|
||||
options.flags,
|
||||
flags,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
@@ -364,7 +354,7 @@ impl Timeline {
|
||||
let fully_compacted = self
|
||||
.compact_level0(
|
||||
target_file_size,
|
||||
options.flags.contains(CompactFlags::ForceL0Compaction),
|
||||
flags.contains(CompactFlags::ForceL0Compaction),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
@@ -382,10 +372,7 @@ impl Timeline {
|
||||
.create_image_layers(
|
||||
&partitioning,
|
||||
lsn,
|
||||
if options
|
||||
.flags
|
||||
.contains(CompactFlags::ForceImageLayerCreation)
|
||||
{
|
||||
if flags.contains(CompactFlags::ForceImageLayerCreation) {
|
||||
ImageLayerCreationMode::Force
|
||||
} else {
|
||||
ImageLayerCreationMode::Try
|
||||
@@ -1749,19 +1736,11 @@ impl Timeline {
|
||||
pub(crate) async fn compact_with_gc(
|
||||
self: &Arc<Self>,
|
||||
cancel: &CancellationToken,
|
||||
options: CompactOptions,
|
||||
flags: EnumSet<CompactFlags>,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
self.partial_compact_with_gc(
|
||||
options
|
||||
.compact_range
|
||||
.map(|range| range.start..range.end)
|
||||
.unwrap_or_else(|| Key::MIN..Key::MAX),
|
||||
cancel,
|
||||
options.flags,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
self.partial_compact_with_gc(Key::MIN..Key::MAX, cancel, flags, ctx)
|
||||
.await
|
||||
}
|
||||
|
||||
/// An experimental compaction building block that combines compaction with garbage collection.
|
||||
@@ -2042,14 +2021,6 @@ impl Timeline {
|
||||
if cancel.is_cancelled() {
|
||||
return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error
|
||||
}
|
||||
if self.shard_identity.is_key_disposable(&key) {
|
||||
// If this shard does not need to store this key, simply skip it.
|
||||
//
|
||||
// This is not handled in the filter iterator because shard is determined by hash.
|
||||
// Therefore, it does not give us any performance benefit to do things like skip
|
||||
// a whole layer file as handling key spaces (ranges).
|
||||
continue;
|
||||
}
|
||||
if !job_desc.compaction_key_range.contains(&key) {
|
||||
if !desc.is_delta {
|
||||
continue;
|
||||
|
||||
@@ -283,7 +283,7 @@ impl DeleteTimelineFlow {
|
||||
|
||||
/// Shortcut to create Timeline in stopping state and spawn deletion task.
|
||||
#[instrument(skip_all, fields(%timeline_id))]
|
||||
pub(crate) async fn resume_deletion(
|
||||
pub async fn resume_deletion(
|
||||
tenant: Arc<Tenant>,
|
||||
timeline_id: TimelineId,
|
||||
local_metadata: &TimelineMetadata,
|
||||
|
||||
@@ -1,218 +0,0 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use remote_storage::RemotePath;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{info, info_span, Instrument};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::{context::RequestContext, tenant::metadata::TimelineMetadata};
|
||||
|
||||
use super::Timeline;
|
||||
|
||||
mod flow;
|
||||
mod importbucket_client;
|
||||
mod importbucket_format;
|
||||
pub(crate) mod index_part_format;
|
||||
pub(crate) mod upcall_api;
|
||||
|
||||
pub async fn doit(
|
||||
timeline: &Arc<Timeline>,
|
||||
index_part: index_part_format::Root,
|
||||
ctx: &RequestContext,
|
||||
cancel: CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let index_part_format::Root::V1(v1) = index_part;
|
||||
let index_part_format::InProgress {
|
||||
location,
|
||||
idempotency_key,
|
||||
started_at,
|
||||
} = match v1 {
|
||||
index_part_format::V1::Done(_) => return Ok(()),
|
||||
index_part_format::V1::InProgress(in_progress) => in_progress,
|
||||
};
|
||||
|
||||
let storage = importbucket_client::new(timeline.conf, &location, cancel.clone()).await?;
|
||||
|
||||
info!("get spec early so we know we'll be able to upcall when done");
|
||||
let Some(spec) = storage.get_spec().await? else {
|
||||
bail!("spec not found")
|
||||
};
|
||||
|
||||
let upcall_client =
|
||||
upcall_api::Client::new(timeline.conf, cancel.clone()).context("create upcall client")?;
|
||||
|
||||
//
|
||||
// send an early progress update to clean up k8s job early and generate potentially useful logs
|
||||
//
|
||||
info!("send early progress update");
|
||||
upcall_client
|
||||
.send_progress_until_success(&spec)
|
||||
.instrument(info_span!("early_progress_update"))
|
||||
.await?;
|
||||
|
||||
let status_prefix = RemotePath::from_string("status").unwrap();
|
||||
|
||||
//
|
||||
// See if shard is done.
|
||||
// TODO: incorporate generations into status key for split brain safety. Figure out together with checkpointing.
|
||||
//
|
||||
let shard_status_key =
|
||||
status_prefix.join(format!("shard-{}", timeline.tenant_shard_id.shard_slug()));
|
||||
let shard_status: Option<importbucket_format::ShardStatus> =
|
||||
storage.get_json(&shard_status_key).await?;
|
||||
info!(?shard_status, "peeking shard status");
|
||||
if shard_status.map(|st| st.done).unwrap_or(false) {
|
||||
info!("shard status indicates that the shard is done, skipping import");
|
||||
} else {
|
||||
// TODO: checkpoint the progress into the IndexPart instead of restarting
|
||||
// from the beginning.
|
||||
|
||||
//
|
||||
// Wipe the slate clean - the flow does not allow resuming.
|
||||
// We can implement resuming in the future by checkpointing the progress into the IndexPart.
|
||||
//
|
||||
info!("wipe the slate clean");
|
||||
{
|
||||
// TODO: do we need to hold GC lock for this?
|
||||
let mut guard = timeline.layers.write().await;
|
||||
assert!(
|
||||
guard.layer_map()?.open_layer.is_none(),
|
||||
"while importing, there should be no in-memory layer" // this just seems like a good place to assert it
|
||||
);
|
||||
let all_layers_keys = guard.all_persistent_layers();
|
||||
let all_layers: Vec<_> = all_layers_keys
|
||||
.iter()
|
||||
.map(|key| guard.get_from_key(key))
|
||||
.collect();
|
||||
let open = guard.open_mut().context("open_mut")?;
|
||||
|
||||
timeline.remote_client.schedule_gc_update(&all_layers)?;
|
||||
open.finish_gc_timeline(&all_layers);
|
||||
}
|
||||
|
||||
//
|
||||
// Wait for pgdata to finish uploading
|
||||
//
|
||||
info!("wait for pgdata to reach status 'done'");
|
||||
let pgdata_status_key = status_prefix.join("pgdata");
|
||||
loop {
|
||||
let res = async {
|
||||
let pgdata_status: Option<importbucket_format::PgdataStatus> = storage
|
||||
.get_json(&pgdata_status_key)
|
||||
.await
|
||||
.context("get pgdata status")?;
|
||||
info!(?pgdata_status, "peeking pgdata status");
|
||||
if pgdata_status.map(|st| st.done).unwrap_or(false) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(anyhow::anyhow!("pgdata not done yet"))
|
||||
}
|
||||
}
|
||||
.await;
|
||||
match res {
|
||||
Ok(_) => break,
|
||||
Err(err) => {
|
||||
info!(?err, "indefintely waiting for pgdata to finish");
|
||||
if tokio::time::timeout(std::time::Duration::from_secs(10), cancel.cancelled())
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
bail!("cancelled while waiting for pgdata");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Do the import
|
||||
//
|
||||
info!("do the import");
|
||||
let control_file = storage.get_control_file().await?;
|
||||
let base_lsn = control_file.base_lsn();
|
||||
|
||||
info!("update TimelineMetadata based on LSNs from control file");
|
||||
{
|
||||
let pg_version = control_file.pg_version();
|
||||
let _ctx: &RequestContext = ctx;
|
||||
async move {
|
||||
// FIXME: The 'disk_consistent_lsn' should be the LSN at the *end* of the
|
||||
// checkpoint record, and prev_record_lsn should point to its beginning.
|
||||
// We should read the real end of the record from the WAL, but here we
|
||||
// just fake it.
|
||||
let disk_consistent_lsn = Lsn(base_lsn.0 + 8);
|
||||
let prev_record_lsn = base_lsn;
|
||||
let metadata = TimelineMetadata::new(
|
||||
disk_consistent_lsn,
|
||||
Some(prev_record_lsn),
|
||||
None, // no ancestor
|
||||
Lsn(0), // no ancestor lsn
|
||||
base_lsn, // latest_gc_cutoff_lsn
|
||||
base_lsn, // initdb_lsn
|
||||
pg_version,
|
||||
);
|
||||
|
||||
let _start_lsn = disk_consistent_lsn + 1;
|
||||
|
||||
timeline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_full_metadata_update(&metadata)?;
|
||||
|
||||
timeline.remote_client.wait_completion().await?;
|
||||
|
||||
anyhow::Ok(())
|
||||
}
|
||||
}
|
||||
.await?;
|
||||
|
||||
flow::run(
|
||||
timeline.clone(),
|
||||
base_lsn,
|
||||
control_file,
|
||||
storage.clone(),
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
//
|
||||
// Communicate that shard is done.
|
||||
//
|
||||
storage
|
||||
.put_json(
|
||||
&shard_status_key,
|
||||
&importbucket_format::ShardStatus { done: true },
|
||||
)
|
||||
.await
|
||||
.context("put shard status")?;
|
||||
}
|
||||
|
||||
//
|
||||
// Ensure at-least-once deliver of the upcall to cplane
|
||||
// before we mark the task as done and never come here again.
|
||||
//
|
||||
info!("send final progress update");
|
||||
upcall_client
|
||||
.send_progress_until_success(&spec)
|
||||
.instrument(info_span!("final_progress_update"))
|
||||
.await?;
|
||||
|
||||
//
|
||||
// Mark as done in index_part.
|
||||
// This makes subsequent timeline loads enter the normal load code path
|
||||
// instead of spawning the import task and calling this here function.
|
||||
//
|
||||
info!("mark import as complete in index part");
|
||||
timeline
|
||||
.remote_client
|
||||
.schedule_index_upload_for_import_pgdata_state_update(Some(index_part_format::Root::V1(
|
||||
index_part_format::V1::Done(index_part_format::Done {
|
||||
idempotency_key,
|
||||
started_at,
|
||||
finished_at: chrono::Utc::now().naive_utc(),
|
||||
}),
|
||||
)))?;
|
||||
|
||||
timeline.remote_client.wait_completion().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,798 +0,0 @@
|
||||
//! Import a PGDATA directory into an empty root timeline.
|
||||
//!
|
||||
//! This module is adapted hackathon code by Heikki and Stas.
|
||||
//! Other code in the parent module was written by Christian as part of a customer PoC.
|
||||
//!
|
||||
//! The hackathon code was producing image layer files as a free-standing program.
|
||||
//!
|
||||
//! It has been modified to
|
||||
//! - run inside a running Pageserver, within the proper lifecycles of Timeline -> Tenant(Shard)
|
||||
//! - => sharding-awareness: produce image layers with only the data relevant for this shard
|
||||
//! - => S3 as the source for the PGDATA instead of local filesystem
|
||||
//!
|
||||
//! TODOs before productionization:
|
||||
//! - ChunkProcessingJob size / ImportJob::total_size does not account for sharding.
|
||||
//! => produced image layers likely too small.
|
||||
//! - ChunkProcessingJob should cut up an ImportJob to hit exactly target image layer size.
|
||||
//! - asserts / unwraps need to be replaced with errors
|
||||
//! - don't trust remote objects will be small (=prevent OOMs in those cases)
|
||||
//! - limit all in-memory buffers in size, or download to disk and read from there
|
||||
//! - limit task concurrency
|
||||
//! - generally play nice with other tenants in the system
|
||||
//! - importbucket is different bucket than main pageserver storage, so, should be fine wrt S3 rate limits
|
||||
//! - but concerns like network bandwidth, local disk write bandwidth, local disk capacity, etc
|
||||
//! - integrate with layer eviction system
|
||||
//! - audit for Tenant::cancel nor Timeline::cancel responsivity
|
||||
//! - audit for Tenant/Timeline gate holding (we spawn tokio tasks during this flow!)
|
||||
//!
|
||||
//! An incomplete set of TODOs from the Hackathon:
|
||||
//! - version-specific CheckPointData (=> pgv abstraction, already exists for regular walingest)
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{bail, ensure};
|
||||
use bytes::Bytes;
|
||||
|
||||
use itertools::Itertools;
|
||||
use pageserver_api::{
|
||||
key::{rel_block_to_key, rel_dir_to_key, rel_size_to_key, relmap_file_key, DBDIR_KEY},
|
||||
reltag::RelTag,
|
||||
shard::ShardIdentity,
|
||||
};
|
||||
use postgres_ffi::{pg_constants, relfile_utils::parse_relfilename, BLCKSZ};
|
||||
use tokio::task::JoinSet;
|
||||
use tracing::{debug, info_span, instrument, Instrument};
|
||||
|
||||
use crate::{
|
||||
assert_u64_eq_usize::UsizeIsU64,
|
||||
pgdatadir_mapping::{SlruSegmentDirectory, TwoPhaseDirectory},
|
||||
};
|
||||
use crate::{
|
||||
context::{DownloadBehavior, RequestContext},
|
||||
pgdatadir_mapping::{DbDirectory, RelDirectory},
|
||||
task_mgr::TaskKind,
|
||||
tenant::storage_layer::{ImageLayerWriter, Layer},
|
||||
};
|
||||
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::key::{
|
||||
slru_block_to_key, slru_dir_to_key, slru_segment_size_to_key, CHECKPOINT_KEY, CONTROLFILE_KEY,
|
||||
TWOPHASEDIR_KEY,
|
||||
};
|
||||
use pageserver_api::keyspace::singleton_range;
|
||||
use pageserver_api::keyspace::{contiguous_range_len, is_contiguous_range};
|
||||
use pageserver_api::reltag::SlruKind;
|
||||
use utils::bin_ser::BeSer;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::ops::Range;
|
||||
|
||||
use super::{
|
||||
importbucket_client::{ControlFile, RemoteStorageWrapper},
|
||||
Timeline,
|
||||
};
|
||||
|
||||
use remote_storage::RemotePath;
|
||||
|
||||
pub async fn run(
|
||||
timeline: Arc<Timeline>,
|
||||
pgdata_lsn: Lsn,
|
||||
control_file: ControlFile,
|
||||
storage: RemoteStorageWrapper,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<()> {
|
||||
Flow {
|
||||
timeline,
|
||||
pgdata_lsn,
|
||||
control_file,
|
||||
tasks: Vec::new(),
|
||||
storage,
|
||||
}
|
||||
.run(ctx)
|
||||
.await
|
||||
}
|
||||
|
||||
struct Flow {
|
||||
timeline: Arc<Timeline>,
|
||||
pgdata_lsn: Lsn,
|
||||
control_file: ControlFile,
|
||||
tasks: Vec<AnyImportTask>,
|
||||
storage: RemoteStorageWrapper,
|
||||
}
|
||||
|
||||
impl Flow {
|
||||
/// Perform the ingestion into [`Self::timeline`].
|
||||
/// Assumes the timeline is empty (= no layers).
|
||||
pub async fn run(mut self, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
let pgdata_lsn = Lsn(self.control_file.control_file_data().checkPoint).align();
|
||||
|
||||
self.pgdata_lsn = pgdata_lsn;
|
||||
|
||||
let datadir = PgDataDir::new(&self.storage).await?;
|
||||
|
||||
// Import dbdir (00:00:00 keyspace)
|
||||
// This is just constructed here, but will be written to the image layer in the first call to import_db()
|
||||
let dbdir_buf = Bytes::from(DbDirectory::ser(&DbDirectory {
|
||||
dbdirs: datadir
|
||||
.dbs
|
||||
.iter()
|
||||
.map(|db| ((db.spcnode, db.dboid), true))
|
||||
.collect(),
|
||||
})?);
|
||||
self.tasks
|
||||
.push(ImportSingleKeyTask::new(DBDIR_KEY, dbdir_buf).into());
|
||||
|
||||
// Import databases (00:spcnode:dbnode keyspace for each db)
|
||||
for db in datadir.dbs {
|
||||
self.import_db(&db).await?;
|
||||
}
|
||||
|
||||
// Import SLRUs
|
||||
|
||||
// pg_xact (01:00 keyspace)
|
||||
self.import_slru(SlruKind::Clog, &self.storage.pgdata().join("pg_xact"))
|
||||
.await?;
|
||||
// pg_multixact/members (01:01 keyspace)
|
||||
self.import_slru(
|
||||
SlruKind::MultiXactMembers,
|
||||
&self.storage.pgdata().join("pg_multixact/members"),
|
||||
)
|
||||
.await?;
|
||||
// pg_multixact/offsets (01:02 keyspace)
|
||||
self.import_slru(
|
||||
SlruKind::MultiXactOffsets,
|
||||
&self.storage.pgdata().join("pg_multixact/offsets"),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Import pg_twophase.
|
||||
// TODO: as empty
|
||||
let twophasedir_buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
|
||||
xids: HashSet::new(),
|
||||
})?;
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
TWOPHASEDIR_KEY,
|
||||
Bytes::from(twophasedir_buf),
|
||||
)));
|
||||
|
||||
// Controlfile, checkpoint
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
CONTROLFILE_KEY,
|
||||
self.control_file.control_file_buf().clone(),
|
||||
)));
|
||||
|
||||
let checkpoint_buf = self
|
||||
.control_file
|
||||
.control_file_data()
|
||||
.checkPointCopy
|
||||
.encode()?;
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
CHECKPOINT_KEY,
|
||||
checkpoint_buf,
|
||||
)));
|
||||
|
||||
// Assigns parts of key space to later parallel jobs
|
||||
let mut last_end_key = Key::MIN;
|
||||
let mut current_chunk = Vec::new();
|
||||
let mut current_chunk_size: usize = 0;
|
||||
let mut parallel_jobs = Vec::new();
|
||||
for task in std::mem::take(&mut self.tasks).into_iter() {
|
||||
if current_chunk_size + task.total_size() > 1024 * 1024 * 1024 {
|
||||
let key_range = last_end_key..task.key_range().start;
|
||||
parallel_jobs.push(ChunkProcessingJob::new(
|
||||
key_range.clone(),
|
||||
std::mem::take(&mut current_chunk),
|
||||
&self,
|
||||
));
|
||||
last_end_key = key_range.end;
|
||||
current_chunk_size = 0;
|
||||
}
|
||||
current_chunk_size += task.total_size();
|
||||
current_chunk.push(task);
|
||||
}
|
||||
parallel_jobs.push(ChunkProcessingJob::new(
|
||||
last_end_key..Key::MAX,
|
||||
current_chunk,
|
||||
&self,
|
||||
));
|
||||
|
||||
// Start all jobs simultaneosly
|
||||
let mut work = JoinSet::new();
|
||||
// TODO: semaphore?
|
||||
for job in parallel_jobs {
|
||||
let ctx: RequestContext =
|
||||
ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Error);
|
||||
work.spawn(async move { job.run(&ctx).await }.instrument(info_span!("parallel_job")));
|
||||
}
|
||||
let mut results = Vec::new();
|
||||
while let Some(result) = work.join_next().await {
|
||||
match result {
|
||||
Ok(res) => {
|
||||
results.push(res);
|
||||
}
|
||||
Err(_joinset_err) => {
|
||||
results.push(Err(anyhow::anyhow!(
|
||||
"parallel job panicked or cancelled, check pageserver logs"
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if results.iter().all(|r| r.is_ok()) {
|
||||
Ok(())
|
||||
} else {
|
||||
let mut msg = String::new();
|
||||
for result in results {
|
||||
if let Err(err) = result {
|
||||
msg.push_str(&format!("{err:?}\n\n"));
|
||||
}
|
||||
}
|
||||
bail!("Some parallel jobs failed:\n\n{msg}");
|
||||
}
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(dboid=%db.dboid, tablespace=%db.spcnode, path=%db.path))]
|
||||
async fn import_db(&mut self, db: &PgDataDirDb) -> anyhow::Result<()> {
|
||||
debug!("start");
|
||||
scopeguard::defer! {
|
||||
debug!("return");
|
||||
}
|
||||
|
||||
// Import relmap (00:spcnode:dbnode:00:*:00)
|
||||
let relmap_key = relmap_file_key(db.spcnode, db.dboid);
|
||||
debug!("Constructing relmap entry, key {relmap_key}");
|
||||
let relmap_path = db.path.join("pg_filenode.map");
|
||||
let relmap_buf = self.storage.get(&relmap_path).await?;
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
relmap_key, relmap_buf,
|
||||
)));
|
||||
|
||||
// Import reldir (00:spcnode:dbnode:00:*:01)
|
||||
let reldir_key = rel_dir_to_key(db.spcnode, db.dboid);
|
||||
debug!("Constructing reldirs entry, key {reldir_key}");
|
||||
let reldir_buf = RelDirectory::ser(&RelDirectory {
|
||||
rels: db
|
||||
.files
|
||||
.iter()
|
||||
.map(|f| (f.rel_tag.relnode, f.rel_tag.forknum))
|
||||
.collect(),
|
||||
})?;
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
reldir_key,
|
||||
Bytes::from(reldir_buf),
|
||||
)));
|
||||
|
||||
// Import data (00:spcnode:dbnode:reloid:fork:blk) and set sizes for each last
|
||||
// segment in a given relation (00:spcnode:dbnode:reloid:fork:ff)
|
||||
for file in &db.files {
|
||||
debug!(%file.path, %file.filesize, "importing file");
|
||||
let len = file.filesize;
|
||||
ensure!(len % 8192 == 0);
|
||||
let start_blk: u32 = file.segno * (1024 * 1024 * 1024 / 8192);
|
||||
let start_key = rel_block_to_key(file.rel_tag, start_blk);
|
||||
let end_key = rel_block_to_key(file.rel_tag, start_blk + (len / 8192) as u32);
|
||||
self.tasks
|
||||
.push(AnyImportTask::RelBlocks(ImportRelBlocksTask::new(
|
||||
*self.timeline.get_shard_identity(),
|
||||
start_key..end_key,
|
||||
&file.path,
|
||||
self.storage.clone(),
|
||||
)));
|
||||
|
||||
// Set relsize for the last segment (00:spcnode:dbnode:reloid:fork:ff)
|
||||
if let Some(nblocks) = file.nblocks {
|
||||
let size_key = rel_size_to_key(file.rel_tag);
|
||||
//debug!("Setting relation size (path={path}, rel_tag={rel_tag}, segno={segno}) to {nblocks}, key {size_key}");
|
||||
let buf = nblocks.to_le_bytes();
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
size_key,
|
||||
Bytes::from(buf.to_vec()),
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn import_slru(&mut self, kind: SlruKind, path: &RemotePath) -> anyhow::Result<()> {
|
||||
let segments = self.storage.listfilesindir(path).await?;
|
||||
let segments: Vec<(String, u32, usize)> = segments
|
||||
.into_iter()
|
||||
.filter_map(|(path, size)| {
|
||||
let filename = path.object_name()?;
|
||||
let segno = u32::from_str_radix(filename, 16).ok()?;
|
||||
Some((filename.to_string(), segno, size))
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Write SlruDir
|
||||
let slrudir_key = slru_dir_to_key(kind);
|
||||
let segnos: HashSet<u32> = segments
|
||||
.iter()
|
||||
.map(|(_path, segno, _size)| *segno)
|
||||
.collect();
|
||||
let slrudir = SlruSegmentDirectory { segments: segnos };
|
||||
let slrudir_buf = SlruSegmentDirectory::ser(&slrudir)?;
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
slrudir_key,
|
||||
Bytes::from(slrudir_buf),
|
||||
)));
|
||||
|
||||
for (segpath, segno, size) in segments {
|
||||
// SlruSegBlocks for each segment
|
||||
let p = path.join(&segpath);
|
||||
let file_size = size;
|
||||
ensure!(file_size % 8192 == 0);
|
||||
let nblocks = u32::try_from(file_size / 8192)?;
|
||||
let start_key = slru_block_to_key(kind, segno, 0);
|
||||
let end_key = slru_block_to_key(kind, segno, nblocks);
|
||||
debug!(%p, segno=%segno, %size, %start_key, %end_key, "scheduling SLRU segment");
|
||||
self.tasks
|
||||
.push(AnyImportTask::SlruBlocks(ImportSlruBlocksTask::new(
|
||||
*self.timeline.get_shard_identity(),
|
||||
start_key..end_key,
|
||||
&p,
|
||||
self.storage.clone(),
|
||||
)));
|
||||
|
||||
// Followed by SlruSegSize
|
||||
let segsize_key = slru_segment_size_to_key(kind, segno);
|
||||
let segsize_buf = nblocks.to_le_bytes();
|
||||
self.tasks
|
||||
.push(AnyImportTask::SingleKey(ImportSingleKeyTask::new(
|
||||
segsize_key,
|
||||
Bytes::copy_from_slice(&segsize_buf),
|
||||
)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// dbdir iteration tools
|
||||
//
|
||||
|
||||
struct PgDataDir {
|
||||
pub dbs: Vec<PgDataDirDb>, // spcnode, dboid, path
|
||||
}
|
||||
|
||||
struct PgDataDirDb {
|
||||
pub spcnode: u32,
|
||||
pub dboid: u32,
|
||||
pub path: RemotePath,
|
||||
pub files: Vec<PgDataDirDbFile>,
|
||||
}
|
||||
|
||||
struct PgDataDirDbFile {
|
||||
pub path: RemotePath,
|
||||
pub rel_tag: RelTag,
|
||||
pub segno: u32,
|
||||
pub filesize: usize,
|
||||
// Cummulative size of the given fork, set only for the last segment of that fork
|
||||
pub nblocks: Option<usize>,
|
||||
}
|
||||
|
||||
impl PgDataDir {
|
||||
async fn new(storage: &RemoteStorageWrapper) -> anyhow::Result<Self> {
|
||||
let datadir_path = storage.pgdata();
|
||||
// Import ordinary databases, DEFAULTTABLESPACE_OID is smaller than GLOBALTABLESPACE_OID, so import them first
|
||||
// Traverse database in increasing oid order
|
||||
|
||||
let basedir = &datadir_path.join("base");
|
||||
let db_oids: Vec<_> = storage
|
||||
.listdir(basedir)
|
||||
.await?
|
||||
.into_iter()
|
||||
.filter_map(|path| path.object_name().and_then(|name| name.parse::<u32>().ok()))
|
||||
.sorted()
|
||||
.collect();
|
||||
debug!(?db_oids, "found databases");
|
||||
let mut databases = Vec::new();
|
||||
for dboid in db_oids {
|
||||
databases.push(
|
||||
PgDataDirDb::new(
|
||||
storage,
|
||||
&basedir.join(dboid.to_string()),
|
||||
pg_constants::DEFAULTTABLESPACE_OID,
|
||||
dboid,
|
||||
&datadir_path,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
}
|
||||
|
||||
// special case for global catalogs
|
||||
databases.push(
|
||||
PgDataDirDb::new(
|
||||
storage,
|
||||
&datadir_path.join("global"),
|
||||
postgres_ffi::pg_constants::GLOBALTABLESPACE_OID,
|
||||
0,
|
||||
&datadir_path,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
|
||||
databases.sort_by_key(|db| (db.spcnode, db.dboid));
|
||||
|
||||
Ok(Self { dbs: databases })
|
||||
}
|
||||
}
|
||||
|
||||
impl PgDataDirDb {
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%dboid, %db_path))]
|
||||
async fn new(
|
||||
storage: &RemoteStorageWrapper,
|
||||
db_path: &RemotePath,
|
||||
spcnode: u32,
|
||||
dboid: u32,
|
||||
datadir_path: &RemotePath,
|
||||
) -> anyhow::Result<Self> {
|
||||
let mut files: Vec<PgDataDirDbFile> = storage
|
||||
.listfilesindir(db_path)
|
||||
.await?
|
||||
.into_iter()
|
||||
.filter_map(|(path, size)| {
|
||||
debug!(%path, %size, "found file in dbdir");
|
||||
path.object_name().and_then(|name| {
|
||||
// returns (relnode, forknum, segno)
|
||||
parse_relfilename(name).ok().map(|x| (size, x))
|
||||
})
|
||||
})
|
||||
.sorted_by_key(|(_, relfilename)| *relfilename)
|
||||
.map(|(filesize, (relnode, forknum, segno))| {
|
||||
let rel_tag = RelTag {
|
||||
spcnode,
|
||||
dbnode: dboid,
|
||||
relnode,
|
||||
forknum,
|
||||
};
|
||||
|
||||
let path = datadir_path.join(rel_tag.to_segfile_name(segno));
|
||||
assert!(filesize % BLCKSZ as usize == 0); // TODO: this should result in an error
|
||||
let nblocks = filesize / BLCKSZ as usize;
|
||||
|
||||
PgDataDirDbFile {
|
||||
path,
|
||||
filesize,
|
||||
rel_tag,
|
||||
segno,
|
||||
nblocks: Some(nblocks), // first non-cummulative sizes
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Set cummulative sizes. Do all of that math here, so that later we could easier
|
||||
// parallelize over segments and know with which segments we need to write relsize
|
||||
// entry.
|
||||
let mut cumulative_nblocks: usize = 0;
|
||||
let mut prev_rel_tag: Option<RelTag> = None;
|
||||
for i in 0..files.len() {
|
||||
if prev_rel_tag == Some(files[i].rel_tag) {
|
||||
cumulative_nblocks += files[i].nblocks.unwrap();
|
||||
} else {
|
||||
cumulative_nblocks = files[i].nblocks.unwrap();
|
||||
}
|
||||
|
||||
files[i].nblocks = if i == files.len() - 1 || files[i + 1].rel_tag != files[i].rel_tag {
|
||||
Some(cumulative_nblocks)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
prev_rel_tag = Some(files[i].rel_tag);
|
||||
}
|
||||
|
||||
Ok(PgDataDirDb {
|
||||
files,
|
||||
path: db_path.clone(),
|
||||
spcnode,
|
||||
dboid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
trait ImportTask {
|
||||
fn key_range(&self) -> Range<Key>;
|
||||
|
||||
fn total_size(&self) -> usize {
|
||||
// TODO: revisit this
|
||||
if is_contiguous_range(&self.key_range()) {
|
||||
contiguous_range_len(&self.key_range()) as usize * 8192
|
||||
} else {
|
||||
u32::MAX as usize
|
||||
}
|
||||
}
|
||||
|
||||
async fn doit(
|
||||
self,
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize>;
|
||||
}
|
||||
|
||||
struct ImportSingleKeyTask {
|
||||
key: Key,
|
||||
buf: Bytes,
|
||||
}
|
||||
|
||||
impl ImportSingleKeyTask {
|
||||
fn new(key: Key, buf: Bytes) -> Self {
|
||||
ImportSingleKeyTask { key, buf }
|
||||
}
|
||||
}
|
||||
|
||||
impl ImportTask for ImportSingleKeyTask {
|
||||
fn key_range(&self) -> Range<Key> {
|
||||
singleton_range(self.key)
|
||||
}
|
||||
|
||||
async fn doit(
|
||||
self,
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize> {
|
||||
layer_writer.put_image(self.key, self.buf, ctx).await?;
|
||||
Ok(1)
|
||||
}
|
||||
}
|
||||
|
||||
struct ImportRelBlocksTask {
|
||||
shard_identity: ShardIdentity,
|
||||
key_range: Range<Key>,
|
||||
path: RemotePath,
|
||||
storage: RemoteStorageWrapper,
|
||||
}
|
||||
|
||||
impl ImportRelBlocksTask {
|
||||
fn new(
|
||||
shard_identity: ShardIdentity,
|
||||
key_range: Range<Key>,
|
||||
path: &RemotePath,
|
||||
storage: RemoteStorageWrapper,
|
||||
) -> Self {
|
||||
ImportRelBlocksTask {
|
||||
shard_identity,
|
||||
key_range,
|
||||
path: path.clone(),
|
||||
storage,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ImportTask for ImportRelBlocksTask {
|
||||
fn key_range(&self) -> Range<Key> {
|
||||
self.key_range.clone()
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%self.path))]
|
||||
async fn doit(
|
||||
self,
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize> {
|
||||
debug!("Importing relation file");
|
||||
|
||||
let (rel_tag, start_blk) = self.key_range.start.to_rel_block()?;
|
||||
let (rel_tag_end, end_blk) = self.key_range.end.to_rel_block()?;
|
||||
assert_eq!(rel_tag, rel_tag_end);
|
||||
|
||||
let ranges = (start_blk..end_blk)
|
||||
.enumerate()
|
||||
.filter_map(|(i, blknum)| {
|
||||
let key = rel_block_to_key(rel_tag, blknum);
|
||||
if self.shard_identity.is_key_disposable(&key) {
|
||||
return None;
|
||||
}
|
||||
let file_offset = i.checked_mul(8192).unwrap();
|
||||
Some((
|
||||
vec![key],
|
||||
file_offset,
|
||||
file_offset.checked_add(8192).unwrap(),
|
||||
))
|
||||
})
|
||||
.coalesce(|(mut acc, acc_start, acc_end), (mut key, start, end)| {
|
||||
assert_eq!(key.len(), 1);
|
||||
assert!(!acc.is_empty());
|
||||
assert!(acc_end > acc_start);
|
||||
if acc_end == start /* TODO additional max range check here, to limit memory consumption per task to X */ {
|
||||
acc.push(key.pop().unwrap());
|
||||
Ok((acc, acc_start, end))
|
||||
} else {
|
||||
Err(((acc, acc_start, acc_end), (key, start, end)))
|
||||
}
|
||||
});
|
||||
|
||||
let mut nimages = 0;
|
||||
for (keys, range_start, range_end) in ranges {
|
||||
let range_buf = self
|
||||
.storage
|
||||
.get_range(&self.path, range_start.into_u64(), range_end.into_u64())
|
||||
.await?;
|
||||
let mut buf = Bytes::from(range_buf);
|
||||
// TODO: batched writes
|
||||
for key in keys {
|
||||
let image = buf.split_to(8192);
|
||||
layer_writer.put_image(key, image, ctx).await?;
|
||||
nimages += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(nimages)
|
||||
}
|
||||
}
|
||||
|
||||
struct ImportSlruBlocksTask {
|
||||
shard_identity: ShardIdentity,
|
||||
key_range: Range<Key>,
|
||||
path: RemotePath,
|
||||
storage: RemoteStorageWrapper,
|
||||
}
|
||||
|
||||
impl ImportSlruBlocksTask {
|
||||
fn new(
|
||||
shard_identity: ShardIdentity,
|
||||
key_range: Range<Key>,
|
||||
path: &RemotePath,
|
||||
storage: RemoteStorageWrapper,
|
||||
) -> Self {
|
||||
ImportSlruBlocksTask {
|
||||
shard_identity,
|
||||
key_range,
|
||||
path: path.clone(),
|
||||
storage,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ImportTask for ImportSlruBlocksTask {
|
||||
fn key_range(&self) -> Range<Key> {
|
||||
self.key_range.clone()
|
||||
}
|
||||
|
||||
async fn doit(
|
||||
self,
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize> {
|
||||
debug!("Importing SLRU segment file {}", self.path);
|
||||
let buf = self.storage.get(&self.path).await?;
|
||||
|
||||
let (kind, segno, start_blk) = self.key_range.start.to_slru_block()?;
|
||||
let (_kind, _segno, end_blk) = self.key_range.end.to_slru_block()?;
|
||||
let mut blknum = start_blk;
|
||||
let mut nimages = 0;
|
||||
let mut file_offset = 0;
|
||||
while blknum < end_blk {
|
||||
let key = slru_block_to_key(kind, segno, blknum);
|
||||
assert!(
|
||||
!self.shard_identity.is_key_disposable(&key),
|
||||
"SLRU keys need to go into every shard"
|
||||
);
|
||||
let buf = &buf[file_offset..(file_offset + 8192)];
|
||||
file_offset += 8192;
|
||||
layer_writer
|
||||
.put_image(key, Bytes::copy_from_slice(buf), ctx)
|
||||
.await?;
|
||||
blknum += 1;
|
||||
nimages += 1;
|
||||
}
|
||||
Ok(nimages)
|
||||
}
|
||||
}
|
||||
|
||||
enum AnyImportTask {
|
||||
SingleKey(ImportSingleKeyTask),
|
||||
RelBlocks(ImportRelBlocksTask),
|
||||
SlruBlocks(ImportSlruBlocksTask),
|
||||
}
|
||||
|
||||
impl ImportTask for AnyImportTask {
|
||||
fn key_range(&self) -> Range<Key> {
|
||||
match self {
|
||||
Self::SingleKey(t) => t.key_range(),
|
||||
Self::RelBlocks(t) => t.key_range(),
|
||||
Self::SlruBlocks(t) => t.key_range(),
|
||||
}
|
||||
}
|
||||
/// returns the number of images put into the `layer_writer`
|
||||
async fn doit(
|
||||
self,
|
||||
layer_writer: &mut ImageLayerWriter,
|
||||
ctx: &RequestContext,
|
||||
) -> anyhow::Result<usize> {
|
||||
match self {
|
||||
Self::SingleKey(t) => t.doit(layer_writer, ctx).await,
|
||||
Self::RelBlocks(t) => t.doit(layer_writer, ctx).await,
|
||||
Self::SlruBlocks(t) => t.doit(layer_writer, ctx).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ImportSingleKeyTask> for AnyImportTask {
|
||||
fn from(t: ImportSingleKeyTask) -> Self {
|
||||
Self::SingleKey(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ImportRelBlocksTask> for AnyImportTask {
|
||||
fn from(t: ImportRelBlocksTask) -> Self {
|
||||
Self::RelBlocks(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ImportSlruBlocksTask> for AnyImportTask {
|
||||
fn from(t: ImportSlruBlocksTask) -> Self {
|
||||
Self::SlruBlocks(t)
|
||||
}
|
||||
}
|
||||
|
||||
struct ChunkProcessingJob {
|
||||
timeline: Arc<Timeline>,
|
||||
range: Range<Key>,
|
||||
tasks: Vec<AnyImportTask>,
|
||||
|
||||
pgdata_lsn: Lsn,
|
||||
}
|
||||
|
||||
impl ChunkProcessingJob {
|
||||
fn new(range: Range<Key>, tasks: Vec<AnyImportTask>, env: &Flow) -> Self {
|
||||
assert!(env.pgdata_lsn.is_valid());
|
||||
Self {
|
||||
timeline: env.timeline.clone(),
|
||||
range,
|
||||
tasks,
|
||||
pgdata_lsn: env.pgdata_lsn,
|
||||
}
|
||||
}
|
||||
|
||||
async fn run(self, ctx: &RequestContext) -> anyhow::Result<()> {
|
||||
let mut writer = ImageLayerWriter::new(
|
||||
self.timeline.conf,
|
||||
self.timeline.timeline_id,
|
||||
self.timeline.tenant_shard_id,
|
||||
&self.range,
|
||||
self.pgdata_lsn,
|
||||
ctx,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut nimages = 0;
|
||||
for task in self.tasks {
|
||||
nimages += task.doit(&mut writer, ctx).await?;
|
||||
}
|
||||
|
||||
let resident_layer = if nimages > 0 {
|
||||
let (desc, path) = writer.finish(ctx).await?;
|
||||
Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?
|
||||
} else {
|
||||
// dropping the writer cleans up
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
// this is sharing the same code as create_image_layers
|
||||
let mut guard = self.timeline.layers.write().await;
|
||||
guard
|
||||
.open_mut()?
|
||||
.track_new_image_layers(&[resident_layer.clone()], &self.timeline.metrics);
|
||||
crate::tenant::timeline::drop_wlock(guard);
|
||||
|
||||
// Schedule the layer for upload but don't add barriers such as
|
||||
// wait for completion or index upload, so we don't inhibit upload parallelism.
|
||||
// TODO: limit upload parallelism somehow (e.g. by limiting concurrency of jobs?)
|
||||
// TODO: or regulate parallelism by upload queue depth? Prob should happen at a higher level.
|
||||
self.timeline
|
||||
.remote_client
|
||||
.schedule_layer_file_upload(resident_layer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,315 +0,0 @@
|
||||
use std::{ops::Bound, sync::Arc};
|
||||
|
||||
use anyhow::Context;
|
||||
use bytes::Bytes;
|
||||
use postgres_ffi::ControlFileData;
|
||||
use remote_storage::{
|
||||
Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingObject, RemotePath,
|
||||
};
|
||||
use serde::de::DeserializeOwned;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, info, instrument};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::{assert_u64_eq_usize::U64IsUsize, config::PageServerConf};
|
||||
|
||||
use super::{importbucket_format, index_part_format};
|
||||
|
||||
pub async fn new(
|
||||
conf: &'static PageServerConf,
|
||||
location: &index_part_format::Location,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<RemoteStorageWrapper, anyhow::Error> {
|
||||
// FIXME: we probably want some timeout, and we might be able to assume the max file
|
||||
// size on S3 is 1GiB (postgres segment size). But the problem is that the individual
|
||||
// downloaders don't know enough about concurrent downloads to make a guess on the
|
||||
// expected bandwidth and resulting best timeout.
|
||||
let timeout = std::time::Duration::from_secs(24 * 60 * 60);
|
||||
let location_storage = match location {
|
||||
#[cfg(feature = "testing")]
|
||||
index_part_format::Location::LocalFs { path } => {
|
||||
GenericRemoteStorage::LocalFs(remote_storage::LocalFs::new(path.clone(), timeout)?)
|
||||
}
|
||||
index_part_format::Location::AwsS3 {
|
||||
region,
|
||||
bucket,
|
||||
key,
|
||||
} => {
|
||||
// TODO: think about security implications of letting the client specify the bucket & prefix.
|
||||
// It's the most flexible right now, but, possibly we want to move bucket name into PS conf
|
||||
// and force the timeline_id into the prefix?
|
||||
GenericRemoteStorage::AwsS3(Arc::new(
|
||||
remote_storage::S3Bucket::new(
|
||||
&remote_storage::S3Config {
|
||||
bucket_name: bucket.clone(),
|
||||
prefix_in_bucket: Some(key.clone()),
|
||||
bucket_region: region.clone(),
|
||||
endpoint: conf
|
||||
.import_pgdata_aws_endpoint_url
|
||||
.clone()
|
||||
.map(|url| url.to_string()), // by specifying None here, remote_storage/aws-sdk-rust will infer from env
|
||||
concurrency_limit: 100.try_into().unwrap(), // TODO: think about this
|
||||
max_keys_per_list_response: Some(1000), // TODO: think about this
|
||||
upload_storage_class: None, // irrelevant
|
||||
},
|
||||
timeout,
|
||||
)
|
||||
.await
|
||||
.context("setup s3 bucket")?,
|
||||
))
|
||||
}
|
||||
};
|
||||
let storage_wrapper = RemoteStorageWrapper::new(location_storage, cancel);
|
||||
Ok(storage_wrapper)
|
||||
}
|
||||
|
||||
/// Wrap [`remote_storage`] APIs to make it look a bit more like a filesystem API
|
||||
/// such as [`tokio::fs`], which was used in the original implementation of the import code.
|
||||
#[derive(Clone)]
|
||||
pub struct RemoteStorageWrapper {
|
||||
storage: GenericRemoteStorage,
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
impl RemoteStorageWrapper {
|
||||
pub fn new(storage: GenericRemoteStorage, cancel: CancellationToken) -> Self {
|
||||
Self { storage, cancel }
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn listfilesindir(
|
||||
&self,
|
||||
path: &RemotePath,
|
||||
) -> Result<Vec<(RemotePath, usize)>, DownloadError> {
|
||||
assert!(
|
||||
path.object_name().is_some(),
|
||||
"must specify dirname, without trailing slash"
|
||||
);
|
||||
let path = path.add_trailing_slash();
|
||||
|
||||
let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
|
||||
|| async {
|
||||
let Listing { keys, prefixes: _ } = self
|
||||
.storage
|
||||
.list(
|
||||
Some(&path),
|
||||
remote_storage::ListingMode::WithDelimiter,
|
||||
None,
|
||||
&self.cancel,
|
||||
)
|
||||
.await?;
|
||||
let res = keys
|
||||
.into_iter()
|
||||
.map(|ListingObject { key, size, .. }| (key, size.into_usize()))
|
||||
.collect();
|
||||
Ok(res)
|
||||
},
|
||||
&format!("listfilesindir {path:?}"),
|
||||
&self.cancel,
|
||||
)
|
||||
.await;
|
||||
debug!(?res, "returning");
|
||||
res
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn listdir(&self, path: &RemotePath) -> Result<Vec<RemotePath>, DownloadError> {
|
||||
assert!(
|
||||
path.object_name().is_some(),
|
||||
"must specify dirname, without trailing slash"
|
||||
);
|
||||
let path = path.add_trailing_slash();
|
||||
|
||||
let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
|
||||
|| async {
|
||||
let Listing { keys, prefixes } = self
|
||||
.storage
|
||||
.list(
|
||||
Some(&path),
|
||||
remote_storage::ListingMode::WithDelimiter,
|
||||
None,
|
||||
&self.cancel,
|
||||
)
|
||||
.await?;
|
||||
let res = keys
|
||||
.into_iter()
|
||||
.map(|ListingObject { key, .. }| key)
|
||||
.chain(prefixes.into_iter())
|
||||
.collect();
|
||||
Ok(res)
|
||||
},
|
||||
&format!("listdir {path:?}"),
|
||||
&self.cancel,
|
||||
)
|
||||
.await;
|
||||
debug!(?res, "returning");
|
||||
res
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn get(&self, path: &RemotePath) -> Result<Bytes, DownloadError> {
|
||||
let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
|
||||
|| async {
|
||||
let Download {
|
||||
download_stream, ..
|
||||
} = self
|
||||
.storage
|
||||
.download(path, &DownloadOpts::default(), &self.cancel)
|
||||
.await?;
|
||||
let mut reader = tokio_util::io::StreamReader::new(download_stream);
|
||||
|
||||
// XXX optimize this, can we get the capacity hint from somewhere?
|
||||
let mut buf = Vec::new();
|
||||
tokio::io::copy_buf(&mut reader, &mut buf).await?;
|
||||
Ok(Bytes::from(buf))
|
||||
},
|
||||
&format!("download {path:?}"),
|
||||
&self.cancel,
|
||||
)
|
||||
.await;
|
||||
debug!(len = res.as_ref().ok().map(|buf| buf.len()), "done");
|
||||
res
|
||||
}
|
||||
|
||||
pub async fn get_spec(&self) -> Result<Option<importbucket_format::Spec>, anyhow::Error> {
|
||||
self.get_json(&RemotePath::from_string("spec.json").unwrap())
|
||||
.await
|
||||
.context("get spec")
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn get_json<T: DeserializeOwned>(
|
||||
&self,
|
||||
path: &RemotePath,
|
||||
) -> Result<Option<T>, DownloadError> {
|
||||
let buf = match self.get(path).await {
|
||||
Ok(buf) => buf,
|
||||
Err(DownloadError::NotFound) => return Ok(None),
|
||||
Err(err) => return Err(err),
|
||||
};
|
||||
let res = serde_json::from_slice(&buf)
|
||||
.context("serialize")
|
||||
// TODO: own error type
|
||||
.map_err(DownloadError::Other)?;
|
||||
Ok(Some(res))
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn put_json<T>(&self, path: &RemotePath, value: &T) -> anyhow::Result<()>
|
||||
where
|
||||
T: serde::Serialize,
|
||||
{
|
||||
let buf = serde_json::to_vec(value)?;
|
||||
let bytes = Bytes::from(buf);
|
||||
utils::backoff::retry(
|
||||
|| async {
|
||||
let size = bytes.len();
|
||||
let bytes = futures::stream::once(futures::future::ready(Ok(bytes.clone())));
|
||||
self.storage
|
||||
.upload_storage_object(bytes, size, path, &self.cancel)
|
||||
.await
|
||||
},
|
||||
remote_storage::TimeoutOrCancel::caused_by_cancel,
|
||||
1,
|
||||
u32::MAX,
|
||||
&format!("put json {path}"),
|
||||
&self.cancel,
|
||||
)
|
||||
.await
|
||||
.expect("practically infinite retries")
|
||||
}
|
||||
|
||||
#[instrument(level = tracing::Level::DEBUG, skip_all, fields(%path))]
|
||||
pub async fn get_range(
|
||||
&self,
|
||||
path: &RemotePath,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: u64,
|
||||
) -> Result<Vec<u8>, DownloadError> {
|
||||
let len = end_exclusive
|
||||
.checked_sub(start_inclusive)
|
||||
.unwrap()
|
||||
.into_usize();
|
||||
let res = crate::tenant::remote_timeline_client::download::download_retry_forever(
|
||||
|| async {
|
||||
let Download {
|
||||
download_stream, ..
|
||||
} = self
|
||||
.storage
|
||||
.download(
|
||||
path,
|
||||
&DownloadOpts {
|
||||
etag: None,
|
||||
byte_start: Bound::Included(start_inclusive),
|
||||
byte_end: Bound::Excluded(end_exclusive)
|
||||
},
|
||||
&self.cancel)
|
||||
.await?;
|
||||
let mut reader = tokio_util::io::StreamReader::new(download_stream);
|
||||
|
||||
let mut buf = Vec::with_capacity(len);
|
||||
tokio::io::copy_buf(&mut reader, &mut buf).await?;
|
||||
Ok(buf)
|
||||
},
|
||||
&format!("download range len=0x{len:x} [0x{start_inclusive:x},0x{end_exclusive:x}) from {path:?}"),
|
||||
&self.cancel,
|
||||
)
|
||||
.await;
|
||||
debug!(len = res.as_ref().ok().map(|buf| buf.len()), "done");
|
||||
res
|
||||
}
|
||||
|
||||
pub fn pgdata(&self) -> RemotePath {
|
||||
RemotePath::from_string("pgdata").unwrap()
|
||||
}
|
||||
|
||||
pub async fn get_control_file(&self) -> Result<ControlFile, anyhow::Error> {
|
||||
let control_file_path = self.pgdata().join("global/pg_control");
|
||||
info!("get control file from {control_file_path}");
|
||||
let control_file_buf = self.get(&control_file_path).await?;
|
||||
ControlFile::new(control_file_buf)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ControlFile {
|
||||
control_file_data: ControlFileData,
|
||||
control_file_buf: Bytes,
|
||||
}
|
||||
|
||||
impl ControlFile {
|
||||
pub(crate) fn new(control_file_buf: Bytes) -> Result<Self, anyhow::Error> {
|
||||
// XXX ControlFileData is version-specific, we're always using v14 here. v17 had changes.
|
||||
let control_file_data = ControlFileData::decode(&control_file_buf)?;
|
||||
let control_file = ControlFile {
|
||||
control_file_data,
|
||||
control_file_buf,
|
||||
};
|
||||
control_file.try_pg_version()?; // so that we can offer infallible pg_version()
|
||||
Ok(control_file)
|
||||
}
|
||||
pub(crate) fn base_lsn(&self) -> Lsn {
|
||||
Lsn(self.control_file_data.checkPoint).align()
|
||||
}
|
||||
pub(crate) fn pg_version(&self) -> u32 {
|
||||
self.try_pg_version()
|
||||
.expect("prepare() checks that try_pg_version doesn't error")
|
||||
}
|
||||
pub(crate) fn control_file_data(&self) -> &ControlFileData {
|
||||
&self.control_file_data
|
||||
}
|
||||
pub(crate) fn control_file_buf(&self) -> &Bytes {
|
||||
&self.control_file_buf
|
||||
}
|
||||
fn try_pg_version(&self) -> anyhow::Result<u32> {
|
||||
Ok(match self.control_file_data.catalog_version_no {
|
||||
// thesea are from catversion.h
|
||||
202107181 => 14,
|
||||
202209061 => 15,
|
||||
202307071 => 16,
|
||||
/* XXX pg17 */
|
||||
catversion => {
|
||||
anyhow::bail!("unrecognized catalog version {catversion}")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct PgdataStatus {
|
||||
pub done: bool,
|
||||
// TODO: remaining fields
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct ShardStatus {
|
||||
pub done: bool,
|
||||
// TODO: remaining fields
|
||||
}
|
||||
|
||||
// TODO: dedupe with fast_import code
|
||||
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Spec {
|
||||
pub project_id: String,
|
||||
pub branch_id: String,
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
use camino::Utf8PathBuf;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Root {
|
||||
V1(V1),
|
||||
}
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub enum V1 {
|
||||
InProgress(InProgress),
|
||||
Done(Done),
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(transparent)]
|
||||
pub struct IdempotencyKey(String);
|
||||
|
||||
impl IdempotencyKey {
|
||||
pub fn new(s: String) -> Self {
|
||||
Self(s)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct InProgress {
|
||||
pub idempotency_key: IdempotencyKey,
|
||||
pub location: Location,
|
||||
pub started_at: chrono::NaiveDateTime,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Done {
|
||||
pub idempotency_key: IdempotencyKey,
|
||||
pub started_at: chrono::NaiveDateTime,
|
||||
pub finished_at: chrono::NaiveDateTime,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Location {
|
||||
#[cfg(feature = "testing")]
|
||||
LocalFs { path: Utf8PathBuf },
|
||||
AwsS3 {
|
||||
region: String,
|
||||
bucket: String,
|
||||
key: String,
|
||||
},
|
||||
}
|
||||
|
||||
impl Root {
|
||||
pub fn is_done(&self) -> bool {
|
||||
match self {
|
||||
Root::V1(v1) => match v1 {
|
||||
V1::Done(_) => true,
|
||||
V1::InProgress(_) => false,
|
||||
},
|
||||
}
|
||||
}
|
||||
pub fn idempotency_key(&self) -> &IdempotencyKey {
|
||||
match self {
|
||||
Root::V1(v1) => match v1 {
|
||||
V1::InProgress(in_progress) => &in_progress.idempotency_key,
|
||||
V1::Done(done) => &done.idempotency_key,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,119 +0,0 @@
|
||||
//! FIXME: most of this is copy-paste from mgmt_api.rs ; dedupe into a `reqwest_utils::Client` crate.
|
||||
use pageserver_client::mgmt_api::{Error, ResponseErrorMessageExt};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::error;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use reqwest::Method;
|
||||
|
||||
use super::importbucket_format::Spec;
|
||||
|
||||
pub struct Client {
|
||||
base_url: String,
|
||||
authorization_header: Option<String>,
|
||||
client: reqwest::Client,
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct ImportProgressRequest {
|
||||
// no fields yet, not sure if there every will be any
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct ImportProgressResponse {
|
||||
// we don't care
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub fn new(conf: &PageServerConf, cancel: CancellationToken) -> anyhow::Result<Self> {
|
||||
let Some(ref base_url) = conf.import_pgdata_upcall_api else {
|
||||
anyhow::bail!("import_pgdata_upcall_api is not configured")
|
||||
};
|
||||
Ok(Self {
|
||||
base_url: base_url.to_string(),
|
||||
client: reqwest::Client::new(),
|
||||
cancel,
|
||||
authorization_header: conf
|
||||
.import_pgdata_upcall_api_token
|
||||
.as_ref()
|
||||
.map(|secret_string| secret_string.get_contents())
|
||||
.map(|jwt| format!("Bearer {jwt}")),
|
||||
})
|
||||
}
|
||||
|
||||
fn start_request<U: reqwest::IntoUrl>(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: U,
|
||||
) -> reqwest::RequestBuilder {
|
||||
let req = self.client.request(method, uri);
|
||||
if let Some(value) = &self.authorization_header {
|
||||
req.header(reqwest::header::AUTHORIZATION, value)
|
||||
} else {
|
||||
req
|
||||
}
|
||||
}
|
||||
|
||||
async fn request_noerror<B: serde::Serialize, U: reqwest::IntoUrl>(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: U,
|
||||
body: B,
|
||||
) -> Result<reqwest::Response> {
|
||||
self.start_request(method, uri)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)
|
||||
}
|
||||
|
||||
async fn request<B: serde::Serialize, U: reqwest::IntoUrl>(
|
||||
&self,
|
||||
method: Method,
|
||||
uri: U,
|
||||
body: B,
|
||||
) -> Result<reqwest::Response> {
|
||||
let res = self.request_noerror(method, uri, body).await?;
|
||||
let response = res.error_from_body().await?;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub async fn send_progress_once(&self, spec: &Spec) -> Result<()> {
|
||||
let url = format!(
|
||||
"{}/projects/{}/branches/{}/import_progress",
|
||||
self.base_url, spec.project_id, spec.branch_id
|
||||
);
|
||||
let ImportProgressResponse {} = self
|
||||
.request(Method::POST, url, &ImportProgressRequest {})
|
||||
.await?
|
||||
.json()
|
||||
.await
|
||||
.map_err(Error::ReceiveBody)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn send_progress_until_success(&self, spec: &Spec) -> anyhow::Result<()> {
|
||||
loop {
|
||||
match self.send_progress_once(spec).await {
|
||||
Ok(()) => return Ok(()),
|
||||
Err(Error::Cancelled) => return Err(anyhow::anyhow!("cancelled")),
|
||||
Err(err) => {
|
||||
error!(?err, "error sending progress, retrying");
|
||||
if tokio::time::timeout(
|
||||
std::time::Duration::from_secs(10),
|
||||
self.cancel.cancelled(),
|
||||
)
|
||||
.await
|
||||
.is_ok()
|
||||
{
|
||||
anyhow::bail!("cancelled while sending early progress update");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,7 @@ use std::{collections::hash_map::Entry, fs, sync::Arc};
|
||||
use anyhow::Context;
|
||||
use camino::Utf8PathBuf;
|
||||
use tracing::{error, info, info_span};
|
||||
use utils::{fs_ext, id::TimelineId, lsn::Lsn, sync::gate::GateGuard};
|
||||
use utils::{fs_ext, id::TimelineId, lsn::Lsn};
|
||||
|
||||
use crate::{
|
||||
context::RequestContext,
|
||||
@@ -23,14 +23,14 @@ use super::Timeline;
|
||||
pub struct UninitializedTimeline<'t> {
|
||||
pub(crate) owning_tenant: &'t Tenant,
|
||||
timeline_id: TimelineId,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard<'t>)>,
|
||||
}
|
||||
|
||||
impl<'t> UninitializedTimeline<'t> {
|
||||
pub(crate) fn new(
|
||||
owning_tenant: &'t Tenant,
|
||||
timeline_id: TimelineId,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard)>,
|
||||
raw_timeline: Option<(Arc<Timeline>, TimelineCreateGuard<'t>)>,
|
||||
) -> Self {
|
||||
Self {
|
||||
owning_tenant,
|
||||
@@ -87,10 +87,6 @@ impl<'t> UninitializedTimeline<'t> {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn finish_creation_myself(&mut self) -> (Arc<Timeline>, TimelineCreateGuard) {
|
||||
self.raw_timeline.take().expect("already checked")
|
||||
}
|
||||
|
||||
/// Prepares timeline data by loading it from the basebackup archive.
|
||||
pub(crate) async fn import_basebackup_from_tar(
|
||||
self,
|
||||
@@ -171,10 +167,9 @@ pub(crate) fn cleanup_timeline_directory(create_guard: TimelineCreateGuard) {
|
||||
/// A guard for timeline creations in process: as long as this object exists, the timeline ID
|
||||
/// is kept in `[Tenant::timelines_creating]` to exclude concurrent attempts to create the same timeline.
|
||||
#[must_use]
|
||||
pub(crate) struct TimelineCreateGuard {
|
||||
pub(crate) _tenant_gate_guard: GateGuard,
|
||||
pub(crate) owning_tenant: Arc<Tenant>,
|
||||
pub(crate) timeline_id: TimelineId,
|
||||
pub(crate) struct TimelineCreateGuard<'t> {
|
||||
owning_tenant: &'t Tenant,
|
||||
timeline_id: TimelineId,
|
||||
pub(crate) timeline_path: Utf8PathBuf,
|
||||
pub(crate) idempotency: CreateTimelineIdempotency,
|
||||
}
|
||||
@@ -189,27 +184,20 @@ pub(crate) enum TimelineExclusionError {
|
||||
},
|
||||
#[error("Already creating")]
|
||||
AlreadyCreating,
|
||||
#[error("Shutting down")]
|
||||
ShuttingDown,
|
||||
|
||||
// e.g. I/O errors, or some failure deep in postgres initdb
|
||||
#[error(transparent)]
|
||||
Other(#[from] anyhow::Error),
|
||||
}
|
||||
|
||||
impl TimelineCreateGuard {
|
||||
impl<'t> TimelineCreateGuard<'t> {
|
||||
pub(crate) fn new(
|
||||
owning_tenant: &Arc<Tenant>,
|
||||
owning_tenant: &'t Tenant,
|
||||
timeline_id: TimelineId,
|
||||
timeline_path: Utf8PathBuf,
|
||||
idempotency: CreateTimelineIdempotency,
|
||||
allow_offloaded: bool,
|
||||
) -> Result<Self, TimelineExclusionError> {
|
||||
let _tenant_gate_guard = owning_tenant
|
||||
.gate
|
||||
.enter()
|
||||
.map_err(|_| TimelineExclusionError::ShuttingDown)?;
|
||||
|
||||
// Lock order: this is the only place we take both locks. During drop() we only
|
||||
// lock creating_timelines
|
||||
let timelines = owning_tenant.timelines.lock().unwrap();
|
||||
@@ -237,12 +225,8 @@ impl TimelineCreateGuard {
|
||||
return Err(TimelineExclusionError::AlreadyCreating);
|
||||
}
|
||||
creating_timelines.insert(timeline_id);
|
||||
drop(creating_timelines);
|
||||
drop(timelines_offloaded);
|
||||
drop(timelines);
|
||||
Ok(Self {
|
||||
_tenant_gate_guard,
|
||||
owning_tenant: Arc::clone(owning_tenant),
|
||||
owning_tenant,
|
||||
timeline_id,
|
||||
timeline_path,
|
||||
idempotency,
|
||||
@@ -250,7 +234,7 @@ impl TimelineCreateGuard {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TimelineCreateGuard {
|
||||
impl Drop for TimelineCreateGuard<'_> {
|
||||
fn drop(&mut self) {
|
||||
self.owning_tenant
|
||||
.timelines_creating
|
||||
|
||||
@@ -3,7 +3,6 @@ use super::storage_layer::ResidentLayer;
|
||||
use crate::tenant::metadata::TimelineMetadata;
|
||||
use crate::tenant::remote_timeline_client::index::IndexPart;
|
||||
use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
|
||||
use std::collections::HashSet;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::fmt::Debug;
|
||||
|
||||
@@ -15,6 +14,7 @@ use utils::lsn::AtomicLsn;
|
||||
use std::sync::atomic::AtomicU32;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
use utils::generation::Generation;
|
||||
|
||||
// clippy warns that Uninitialized is much smaller than Initialized, which wastes
|
||||
@@ -38,12 +38,6 @@ impl UploadQueue {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
|
||||
pub(crate) enum OpType {
|
||||
MayReorder,
|
||||
FlushDeletion,
|
||||
}
|
||||
|
||||
/// This keeps track of queued and in-progress tasks.
|
||||
pub(crate) struct UploadQueueInitialized {
|
||||
/// Counter to assign task IDs
|
||||
@@ -94,12 +88,6 @@ pub(crate) struct UploadQueueInitialized {
|
||||
#[cfg(feature = "testing")]
|
||||
pub(crate) dangling_files: HashMap<LayerName, Generation>,
|
||||
|
||||
/// Ensure we order file operations correctly.
|
||||
pub(crate) recently_deleted: HashSet<(LayerName, Generation)>,
|
||||
|
||||
/// Deletions that are blocked by the tenant configuration
|
||||
pub(crate) blocked_deletions: Vec<Delete>,
|
||||
|
||||
/// Set to true when we have inserted the `UploadOp::Shutdown` into the `inprogress_tasks`.
|
||||
pub(crate) shutting_down: bool,
|
||||
|
||||
@@ -192,8 +180,6 @@ impl UploadQueue {
|
||||
queued_operations: VecDeque::new(),
|
||||
#[cfg(feature = "testing")]
|
||||
dangling_files: HashMap::new(),
|
||||
recently_deleted: HashSet::new(),
|
||||
blocked_deletions: Vec::new(),
|
||||
shutting_down: false,
|
||||
shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
|
||||
};
|
||||
@@ -234,8 +220,6 @@ impl UploadQueue {
|
||||
queued_operations: VecDeque::new(),
|
||||
#[cfg(feature = "testing")]
|
||||
dangling_files: HashMap::new(),
|
||||
recently_deleted: HashSet::new(),
|
||||
blocked_deletions: Vec::new(),
|
||||
shutting_down: false,
|
||||
shutdown_ready: Arc::new(tokio::sync::Semaphore::new(0)),
|
||||
};
|
||||
@@ -286,15 +270,15 @@ pub(crate) struct UploadTask {
|
||||
|
||||
/// A deletion of some layers within the lifetime of a timeline. This is not used
|
||||
/// for timeline deletion, which skips this queue and goes directly to DeletionQueue.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Delete {
|
||||
pub(crate) layers: Vec<(LayerName, LayerFileMetadata)>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum UploadOp {
|
||||
/// Upload a layer file. The last field indicates the last operation for thie file.
|
||||
UploadLayer(ResidentLayer, LayerFileMetadata, Option<OpType>),
|
||||
/// Upload a layer file
|
||||
UploadLayer(ResidentLayer, LayerFileMetadata),
|
||||
|
||||
/// Upload a index_part.json file
|
||||
UploadMetadata {
|
||||
@@ -316,11 +300,11 @@ pub(crate) enum UploadOp {
|
||||
impl std::fmt::Display for UploadOp {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match self {
|
||||
UploadOp::UploadLayer(layer, metadata, mode) => {
|
||||
UploadOp::UploadLayer(layer, metadata) => {
|
||||
write!(
|
||||
f,
|
||||
"UploadLayer({}, size={:?}, gen={:?}, mode={:?})",
|
||||
layer, metadata.file_size, metadata.generation, mode
|
||||
"UploadLayer({}, size={:?}, gen={:?})",
|
||||
layer, metadata.file_size, metadata.generation
|
||||
)
|
||||
}
|
||||
UploadOp::UploadMetadata { uploaded, .. } => {
|
||||
|
||||
@@ -175,16 +175,10 @@ impl VirtualFile {
|
||||
}
|
||||
|
||||
pub async fn sync_all(&self) -> Result<(), Error> {
|
||||
if SYNC_MODE.load(std::sync::atomic::Ordering::Relaxed) == SyncMode::UnsafeNoSync as u8 {
|
||||
return Ok(());
|
||||
}
|
||||
self.inner.sync_all().await
|
||||
}
|
||||
|
||||
pub async fn sync_data(&self) -> Result<(), Error> {
|
||||
if SYNC_MODE.load(std::sync::atomic::Ordering::Relaxed) == SyncMode::UnsafeNoSync as u8 {
|
||||
return Ok(());
|
||||
}
|
||||
self.inner.sync_data().await
|
||||
}
|
||||
|
||||
@@ -239,27 +233,6 @@ impl VirtualFile {
|
||||
}
|
||||
}
|
||||
|
||||
/// Indicates whether to enable fsync, fdatasync, or O_SYNC/O_DSYNC when writing
|
||||
/// files. Switching this off is unsafe and only used for testing on machines
|
||||
/// with slow drives.
|
||||
#[repr(u8)]
|
||||
pub enum SyncMode {
|
||||
Sync,
|
||||
UnsafeNoSync,
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for SyncMode {
|
||||
type Error = u8;
|
||||
|
||||
fn try_from(value: u8) -> Result<Self, Self::Error> {
|
||||
Ok(match value {
|
||||
v if v == (SyncMode::Sync as u8) => SyncMode::Sync,
|
||||
v if v == (SyncMode::UnsafeNoSync as u8) => SyncMode::UnsafeNoSync,
|
||||
x => return Err(x),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// A virtual file descriptor. You can use this just like std::fs::File, but internally
|
||||
/// the underlying file is closed if the system is low on file descriptors,
|
||||
@@ -1359,13 +1332,12 @@ impl OpenFiles {
|
||||
/// server startup.
|
||||
///
|
||||
#[cfg(not(test))]
|
||||
pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoMode, sync_mode: SyncMode) {
|
||||
pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoMode) {
|
||||
if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {
|
||||
panic!("virtual_file::init called twice");
|
||||
}
|
||||
set_io_mode(mode);
|
||||
io_engine::init(engine);
|
||||
SYNC_MODE.store(sync_mode as u8, std::sync::atomic::Ordering::Relaxed);
|
||||
crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);
|
||||
}
|
||||
|
||||
@@ -1407,9 +1379,6 @@ pub(crate) fn set_io_mode(mode: IoMode) {
|
||||
pub(crate) fn get_io_mode() -> IoMode {
|
||||
IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap()
|
||||
}
|
||||
|
||||
static SYNC_MODE: AtomicU8 = AtomicU8::new(SyncMode::Sync as u8);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::context::DownloadBehavior;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user