diff --git a/.github/actions/allure-report-generate/action.yml b/.github/actions/allure-report-generate/action.yml index 16b6e71498..d1d09223db 100644 --- a/.github/actions/allure-report-generate/action.yml +++ b/.github/actions/allure-report-generate/action.yml @@ -7,6 +7,10 @@ inputs: type: boolean required: false default: false + aws_oicd_role_arn: + description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' + required: false + default: '' outputs: base-url: @@ -79,6 +83,14 @@ runs: ALLURE_VERSION: 2.27.0 ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777 + - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test + if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-duration-seconds: 3600 # 1 hour should be more than enough to upload report + # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this - name: Acquire lock shell: bash -euxo pipefail {0} diff --git a/.github/actions/allure-report-store/action.yml b/.github/actions/allure-report-store/action.yml index df4a6712ac..9c376f420a 100644 --- a/.github/actions/allure-report-store/action.yml +++ b/.github/actions/allure-report-store/action.yml @@ -8,6 +8,10 @@ inputs: unique-key: description: 'string to distinguish different results in the same run' required: true + aws_oicd_role_arn: + description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' + required: false + default: '' runs: using: "composite" @@ -31,6 +35,14 @@ runs: env: REPORT_DIR: ${{ inputs.report-dir }} + - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test + if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-duration-seconds: 3600 # 1 hour should be more than enough to upload report + - name: Upload test results shell: bash -euxo pipefail {0} run: | diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 037b9aeb1e..275f161019 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -48,6 +48,10 @@ inputs: description: 'benchmark durations JSON' required: false default: '{}' + aws_oicd_role_arn: + description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role' + required: false + default: '' runs: using: "composite" @@ -222,6 +226,13 @@ runs: # (for example if we didn't run the test for non build-and-test workflow) skip-if-does-not-exist: true + - name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test + if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }} + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ inputs.aws_oicd_role_arn }} + role-duration-seconds: 3600 # 1 hour should be more than enough to upload report - name: Upload test results if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-store diff --git a/.github/workflows/_create-release-pr.yml b/.github/workflows/_create-release-pr.yml new file mode 100644 index 0000000000..cc6994397f --- /dev/null +++ b/.github/workflows/_create-release-pr.yml @@ -0,0 +1,79 @@ +name: Create Release PR + +on: + workflow_call: + inputs: + component-name: + description: 'Component name' + required: true + type: string + release-branch: + description: 'Release branch' + required: true + type: string + secrets: + ci-access-token: + description: 'CI access token' + required: true + +defaults: + run: + shell: bash -euo pipefail {0} + +jobs: + create-storage-release-branch: + runs-on: ubuntu-22.04 + + permissions: + contents: write # for `git push` + + steps: + - uses: actions/checkout@v4 + with: + ref: main + + - name: Set variables + id: vars + env: + COMPONENT_NAME: ${{ inputs.component-name }} + RELEASE_BRANCH: ${{ inputs.release-branch }} + run: | + today=$(date +'%Y-%m-%d') + echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT} + echo "rc-branch=rc/${RELEASE_BRANCH}/${today}" | tee -a ${GITHUB_OUTPUT} + + - name: Configure git + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + - name: Create RC branch + env: + RC_BRANCH: ${{ steps.vars.outputs.rc-branch }} + TITLE: ${{ steps.vars.outputs.title }} + run: | + git checkout -b "${RC_BRANCH}" + + # create an empty commit to distinguish workflow runs + # from other possible releases from the same commit + git commit --allow-empty -m "${TITLE}" + + git push origin "${RC_BRANCH}" + + - name: Create a PR into ${{ inputs.release-branch }} + env: + GH_TOKEN: ${{ secrets.ci-access-token }} + RC_BRANCH: ${{ steps.vars.outputs.rc-branch }} + RELEASE_BRANCH: ${{ inputs.release-branch }} + TITLE: ${{ steps.vars.outputs.title }} + run: | + cat << EOF > body.md + ## ${TITLE} + + **Please merge this Pull Request using 'Create a merge commit' button** + EOF + + gh pr create --title "${TITLE}" \ + --body-file "body.md" \ + --head "${RC_BRANCH}" \ + --base "${RELEASE_BRANCH}" diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 69b8bc5d70..acea859b4d 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -122,6 +122,7 @@ jobs: run_in_parallel: false save_perf_report: ${{ env.SAVE_PERF_REPORT }} pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # Set --sparse-ordering option of pytest-order plugin # to ensure tests are running in order of appears in the file. # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests @@ -133,6 +134,7 @@ jobs: --ignore test_runner/performance/test_perf_pgvector_queries.py --ignore test_runner/performance/test_logical_replication.py --ignore test_runner/performance/test_physical_replication.py + --ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py env: BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -149,12 +151,14 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic perf testing: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> @@ -210,6 +214,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 5400 pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -226,6 +231,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 5400 pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -237,11 +243,13 @@ jobs: uses: ./.github/actions/allure-report-generate with: store-test-results-into-db: true + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} + # Post both success and failure to the Slack channel - name: Post to a Slack channel - if: ${{ github.event.schedule && failure() }} + if: ${{ github.event.schedule }} uses: slackapi/slack-github-action@v1 with: channel-id: "C06T9AMNDQQ" # on-call-compute-staging-stream @@ -444,6 +452,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -458,6 +467,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -472,6 +482,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -488,12 +499,14 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic perf testing on ${{ matrix.platform }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> @@ -545,12 +558,12 @@ jobs: arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g') cd /home/nonroot - wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.0-1.pgdg110+1_${arch}.deb" - wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb" - wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110+2_${arch}.deb" - dpkg -x libpq5_17.0-1.pgdg110+1_${arch}.deb pg - dpkg -x postgresql-16_16.4-1.pgdg110+2_${arch}.deb pg - dpkg -x postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb pg + wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.1-1.pgdg110+1_${arch}.deb" + wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.5-1.pgdg110+1_${arch}.deb" + wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.5-1.pgdg110+1_${arch}.deb" + dpkg -x libpq5_17.1-1.pgdg110+1_${arch}.deb pg + dpkg -x postgresql-16_16.5-1.pgdg110+1_${arch}.deb pg + dpkg -x postgresql-client-16_16.5-1.pgdg110+1_${arch}.deb pg mkdir -p /tmp/neon/pg_install/v16/bin ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench @@ -598,6 +611,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -612,6 +626,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -621,12 +636,14 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic perf testing on ${{ env.PLATFORM }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> @@ -722,6 +739,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 43200 -k test_clickbench pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -734,12 +752,14 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic OLAP perf testing on ${{ matrix.platform }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> @@ -836,6 +856,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_tpch pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -846,12 +867,14 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> @@ -934,6 +957,7 @@ jobs: save_perf_report: ${{ env.SAVE_PERF_REPORT }} extra_params: -m remote_cluster --timeout 21600 -k test_user_examples pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -943,12 +967,14 @@ jobs: id: create-allure-report if: ${{ !cancelled() }} uses: ./.github/actions/allure-report-generate + with: + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} - name: Post to a Slack channel if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: | Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> diff --git a/.github/workflows/build-build-tools-image.yml b/.github/workflows/build-build-tools-image.yml index 82b065c524..9e7be76901 100644 --- a/.github/workflows/build-build-tools-image.yml +++ b/.github/workflows/build-build-tools-image.yml @@ -2,18 +2,13 @@ name: Build build-tools image on: workflow_call: - inputs: - image-tag: - description: "build-tools image tag" - required: true - type: string outputs: image-tag: description: "build-tools tag" - value: ${{ inputs.image-tag }} + value: ${{ jobs.check-image.outputs.tag }} image: description: "build-tools image" - value: neondatabase/build-tools:${{ inputs.image-tag }} + value: neondatabase/build-tools:${{ jobs.check-image.outputs.tag }} defaults: run: @@ -35,7 +30,36 @@ permissions: {} jobs: check-image: - uses: ./.github/workflows/check-build-tools-image.yml + runs-on: ubuntu-22.04 + outputs: + tag: ${{ steps.get-build-tools-tag.outputs.image-tag }} + found: ${{ steps.check-image.outputs.found }} + + steps: + - uses: actions/checkout@v4 + + - name: Get build-tools image tag for the current commit + id: get-build-tools-tag + env: + IMAGE_TAG: | + ${{ hashFiles('build-tools.Dockerfile', + '.github/workflows/build-build-tools-image.yml') }} + run: | + echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT + + - name: Check if such tag found in the registry + id: check-image + env: + IMAGE_TAG: ${{ steps.get-build-tools-tag.outputs.image-tag }} + run: | + if docker manifest inspect neondatabase/build-tools:${IMAGE_TAG}; then + found=true + else + found=false + fi + + echo "found=${found}" | tee -a $GITHUB_OUTPUT + build-image: needs: [ check-image ] @@ -48,20 +72,7 @@ jobs: runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} - env: - IMAGE_TAG: ${{ inputs.image-tag }} - steps: - - name: Check `input.tag` is correct - env: - INPUTS_IMAGE_TAG: ${{ inputs.image-tag }} - CHECK_IMAGE_TAG : ${{ needs.check-image.outputs.image-tag }} - run: | - if [ "${INPUTS_IMAGE_TAG}" != "${CHECK_IMAGE_TAG}" ]; then - echo "'inputs.image-tag' (${INPUTS_IMAGE_TAG}) does not match the tag of the latest build-tools image 'inputs.image-tag' (${CHECK_IMAGE_TAG})" - exit 1 - fi - - uses: actions/checkout@v4 - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193 @@ -92,10 +103,10 @@ jobs: cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.debian-version }}-${{ matrix.arch }} cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0}-{1},mode=max', matrix.debian-version, matrix.arch) || '' }} tags: | - neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.debian-version }}-${{ matrix.arch }} + neondatabase/build-tools:${{ needs.check-image.outputs.tag }}-${{ matrix.debian-version }}-${{ matrix.arch }} merge-images: - needs: [ build-image ] + needs: [ check-image, build-image ] runs-on: ubuntu-22.04 steps: @@ -107,7 +118,7 @@ jobs: - name: Create multi-arch image env: DEFAULT_DEBIAN_VERSION: bullseye - IMAGE_TAG: ${{ inputs.image-tag }} + IMAGE_TAG: ${{ needs.check-image.outputs.tag }} run: | for debian_version in bullseye bookworm; do tags=("-t" "neondatabase/build-tools:${IMAGE_TAG}-${debian_version}") diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 809273d67d..41d0b9b3a3 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -77,15 +77,9 @@ jobs: shell: bash id: build-tag - check-build-tools-image: - needs: [ check-permissions ] - uses: ./.github/workflows/check-build-tools-image.yml - build-build-tools-image: - needs: [ check-build-tools-image ] + needs: [ check-permissions ] uses: ./.github/workflows/build-build-tools-image.yml - with: - image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }} secrets: inherit check-codestyle-python: diff --git a/.github/workflows/check-build-tools-image.yml b/.github/workflows/check-build-tools-image.yml deleted file mode 100644 index a7a15ad58b..0000000000 --- a/.github/workflows/check-build-tools-image.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Check build-tools image - -on: - workflow_call: - outputs: - image-tag: - description: "build-tools image tag" - value: ${{ jobs.check-image.outputs.tag }} - found: - description: "Whether the image is found in the registry" - value: ${{ jobs.check-image.outputs.found }} - -defaults: - run: - shell: bash -euo pipefail {0} - -# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job. -permissions: {} - -jobs: - check-image: - runs-on: ubuntu-22.04 - outputs: - tag: ${{ steps.get-build-tools-tag.outputs.image-tag }} - found: ${{ steps.check-image.outputs.found }} - - steps: - - uses: actions/checkout@v4 - - - name: Get build-tools image tag for the current commit - id: get-build-tools-tag - env: - IMAGE_TAG: | - ${{ hashFiles('build-tools.Dockerfile', - '.github/workflows/check-build-tools-image.yml', - '.github/workflows/build-build-tools-image.yml') }} - run: | - echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT - - - name: Check if such tag found in the registry - id: check-image - env: - IMAGE_TAG: ${{ steps.get-build-tools-tag.outputs.image-tag }} - run: | - if docker manifest inspect neondatabase/build-tools:${IMAGE_TAG}; then - found=true - else - found=false - fi - - echo "found=${found}" | tee -a $GITHUB_OUTPUT diff --git a/.github/workflows/ingest_benchmark.yml b/.github/workflows/ingest_benchmark.yml index d770bb2bb5..1033dc6489 100644 --- a/.github/workflows/ingest_benchmark.yml +++ b/.github/workflows/ingest_benchmark.yml @@ -1,4 +1,4 @@ -name: Benchmarking +name: benchmarking ingest on: # uncomment to run on push for debugging your PR @@ -74,18 +74,16 @@ jobs: compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck api_key: ${{ secrets.NEON_STAGING_API_KEY }} - - name: Initialize Neon project and retrieve current backpressure seconds + - name: Initialize Neon project if: ${{ matrix.target_project == 'new_empty_project' }} env: - NEW_PROJECT_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }} + BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-project-ingest-target.outputs.dsn }} NEW_PROJECT_ID: ${{ steps.create-neon-project-ingest-target.outputs.project_id }} run: | echo "Initializing Neon project with project_id: ${NEW_PROJECT_ID}" export LD_LIBRARY_PATH=${PG_16_LIB_PATH} - ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" - BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;") - echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV - echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV + ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" + echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV - name: Create Neon Branch for large tenant if: ${{ matrix.target_project == 'large_existing_project' }} @@ -95,266 +93,55 @@ jobs: project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} - - name: Initialize Neon project and retrieve current backpressure seconds + - name: Initialize Neon project if: ${{ matrix.target_project == 'large_existing_project' }} env: - NEW_PROJECT_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }} + BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }} NEW_BRANCH_ID: ${{ steps.create-neon-branch-ingest-target.outputs.branch_id }} run: | echo "Initializing Neon branch with branch_id: ${NEW_BRANCH_ID}" export LD_LIBRARY_PATH=${PG_16_LIB_PATH} # Extract the part before the database name - base_connstr="${NEW_PROJECT_CONNSTR%/*}" + base_connstr="${BENCHMARK_INGEST_TARGET_CONNSTR%/*}" # Extract the query parameters (if any) after the database name - query_params="${NEW_PROJECT_CONNSTR#*\?}" + query_params="${BENCHMARK_INGEST_TARGET_CONNSTR#*\?}" # Reconstruct the new connection string - if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then + if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then new_connstr="${base_connstr}/neondb?${query_params}" else new_connstr="${base_connstr}/neondb" fi ${PSQL} "${new_connstr}" -c "drop database ludicrous;" ${PSQL} "${new_connstr}" -c "CREATE DATABASE ludicrous;" - if [ "$query_params" != "$NEW_PROJECT_CONNSTR" ]; then - NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous?${query_params}" + if [ "$query_params" != "$BENCHMARK_INGEST_TARGET_CONNSTR" ]; then + BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous?${query_params}" else - NEW_PROJECT_CONNSTR="${base_connstr}/ludicrous" + BENCHMARK_INGEST_TARGET_CONNSTR="${base_connstr}/ludicrous" fi - ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" - BACKPRESSURE_TIME_BEFORE_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;") - echo "BACKPRESSURE_TIME_BEFORE_INGEST=${BACKPRESSURE_TIME_BEFORE_INGEST}" >> $GITHUB_ENV - echo "NEW_PROJECT_CONNSTR=${NEW_PROJECT_CONNSTR}" >> $GITHUB_ENV - - - - name: Create pgcopydb filter file - run: | - cat << EOF > /tmp/pgcopydb_filter.txt - [include-only-table] - public.events - public.emails - public.email_transmissions - public.payments - public.editions - public.edition_modules - public.sp_content - public.email_broadcasts - public.user_collections - public.devices - public.user_accounts - public.lessons - public.lesson_users - public.payment_methods - public.orders - public.course_emails - public.modules - public.users - public.module_users - public.courses - public.payment_gateway_keys - public.accounts - public.roles - public.payment_gateways - public.management - public.event_names - EOF + ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;" + echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV - - name: Invoke pgcopydb + - name: Invoke pgcopydb + uses: ./.github/actions/run-python-test-set + with: + build_type: remote + test_selection: performance/test_perf_ingest_using_pgcopydb.py + run_in_parallel: false + extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb + pg_version: v16 + save_perf_report: true + aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: - BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }} - run: | - export LD_LIBRARY_PATH=${PGCOPYDB_LIB_PATH}:${PG_16_LIB_PATH} - export PGCOPYDB_SOURCE_PGURI="${BENCHMARK_INGEST_SOURCE_CONNSTR}" - export PGCOPYDB_TARGET_PGURI="${NEW_PROJECT_CONNSTR}" - export PGOPTIONS="-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7" - ${PG_CONFIG} --bindir - ${PGCOPYDB} --version - ${PGCOPYDB} clone --skip-vacuum --no-owner --no-acl --skip-db-properties --table-jobs 4 \ - --index-jobs 4 --restore-jobs 4 --split-tables-larger-than 10GB --skip-extensions \ - --use-copy-binary --filters /tmp/pgcopydb_filter.txt 2>&1 | tee /tmp/pgcopydb_${{ matrix.target_project }}.log + BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }} + TARGET_PROJECT_TYPE: ${{ matrix.target_project }} + # we report PLATFORM in zenbenchmark NeonBenchmarker perf database and want to distinguish between new project and large tenant + PLATFORM: "${{ matrix.target_project }}-us-east-2-staging" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" - # create dummy pgcopydb log to test parsing - # - name: create dummy log for parser test - # run: | - # cat << EOF > /tmp/pgcopydb_${{ matrix.target_project }}.log - # 2024-11-04 18:00:53.433 500861 INFO main.c:136 Running pgcopydb version 0.17.10.g8361a93 from "/usr/lib/postgresql/17/bin/pgcopydb" - # 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1225 [SOURCE] Copying database from "postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60" - # 2024-11-04 18:00:53.434 500861 INFO cli_common.c:1226 [TARGET] Copying database into "postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60" - # 2024-11-04 18:00:53.442 500861 INFO copydb.c:105 Using work dir "/tmp/pgcopydb" - # 2024-11-04 18:00:53.541 500861 INFO snapshot.c:107 Exported snapshot "00000008-00000033-1" from the source database - # 2024-11-04 18:00:53.556 500865 INFO cli_clone_follow.c:543 STEP 1: fetch source database tables, indexes, and sequences - # 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:716 Splitting source candidate tables larger than 10 GB - # 2024-11-04 18:00:54.570 500865 INFO copydb_schema.c:829 Table public.events is 96 GB large which is larger than --split-tables-larger-than 10 GB, and does not have a unique column of type integer: splitting by CTID - # 2024-11-04 18:01:05.538 500865 INFO copydb_schema.c:905 Table public.events is 96 GB large, 10 COPY processes will be used, partitioning on ctid. - # 2024-11-04 18:01:05.564 500865 INFO copydb_schema.c:905 Table public.email_transmissions is 27 GB large, 4 COPY processes will be used, partitioning on id. - # 2024-11-04 18:01:05.584 500865 INFO copydb_schema.c:905 Table public.lessons is 25 GB large, 4 COPY processes will be used, partitioning on id. - # 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:905 Table public.lesson_users is 16 GB large, 3 COPY processes will be used, partitioning on id. - # 2024-11-04 18:01:05.605 500865 INFO copydb_schema.c:761 Fetched information for 26 tables (including 4 tables split in 21 partitions total), with an estimated total of 907 million tuples and 175 GB on-disk - # 2024-11-04 18:01:05.687 500865 INFO copydb_schema.c:968 Fetched information for 57 indexes (supporting 25 constraints) - # 2024-11-04 18:01:05.753 500865 INFO sequences.c:78 Fetching information for 24 sequences - # 2024-11-04 18:01:05.903 500865 INFO copydb_schema.c:1122 Fetched information for 4 extensions - # 2024-11-04 18:01:06.178 500865 INFO copydb_schema.c:1538 Found 0 indexes (supporting 0 constraints) in the target database - # 2024-11-04 18:01:06.184 500865 INFO cli_clone_follow.c:584 STEP 2: dump the source database schema (pre/post data) - # 2024-11-04 18:01:06.186 500865 INFO pgcmd.c:468 /usr/lib/postgresql/16/bin/pg_dump -Fc --snapshot 00000008-00000033-1 --section=pre-data --section=post-data --file /tmp/pgcopydb/schema/schema.dump 'postgres://neondb_owner@ep-bitter-shape-w2c1ir0a.us-east-2.aws.neon.build/neondb?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' - # 2024-11-04 18:01:06.952 500865 INFO cli_clone_follow.c:592 STEP 3: restore the pre-data section to the target database - # 2024-11-04 18:01:07.004 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section pre-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/pre-filtered.list /tmp/pgcopydb/schema/schema.dump - # 2024-11-04 18:01:07.438 500874 INFO table-data.c:656 STEP 4: starting 4 table-data COPY processes - # 2024-11-04 18:01:07.451 500877 INFO vacuum.c:139 STEP 8: skipping VACUUM jobs per --skip-vacuum - # 2024-11-04 18:01:07.457 500875 INFO indexes.c:182 STEP 6: starting 4 CREATE INDEX processes - # 2024-11-04 18:01:07.457 500875 INFO indexes.c:183 STEP 7: constraints are built by the CREATE INDEX processes - # 2024-11-04 18:01:07.507 500865 INFO blobs.c:74 Skipping large objects: none found. - # 2024-11-04 18:01:07.509 500865 INFO sequences.c:194 STEP 9: reset sequences values - # 2024-11-04 18:01:07.510 500886 INFO sequences.c:290 Set sequences values on the target database - # 2024-11-04 20:49:00.587 500865 INFO cli_clone_follow.c:608 STEP 10: restore the post-data section to the target database - # 2024-11-04 20:49:00.600 500865 INFO pgcmd.c:1001 /usr/lib/postgresql/16/bin/pg_restore --dbname 'postgres://neondb_owner@ep-icy-union-w25qd5pj.us-east-2.aws.neon.build/ludicrous?sslmode=require&keepalives=1&keepalives_idle=10&keepalives_interval=10&keepalives_count=60' --section post-data --jobs 4 --no-owner --no-acl --use-list /tmp/pgcopydb/schema/post-filtered.list /tmp/pgcopydb/schema/schema.dump - # 2024-11-05 10:50:58.508 500865 INFO cli_clone_follow.c:639 All step are now done, 16h49m elapsed - # 2024-11-05 10:50:58.508 500865 INFO summary.c:3155 Printing summary for 26 tables and 57 indexes - - # OID | Schema | Name | Parts | copy duration | transmitted bytes | indexes | create index duration - # ------+--------+----------------------+-------+---------------+-------------------+---------+---------------------- - # 24654 | public | events | 10 | 1d11h | 878 GB | 1 | 1h41m - # 24623 | public | email_transmissions | 4 | 4h46m | 99 GB | 3 | 2h04m - # 24665 | public | lessons | 4 | 4h42m | 161 GB | 4 | 1m11s - # 24661 | public | lesson_users | 3 | 2h46m | 49 GB | 3 | 39m35s - # 24631 | public | emails | 1 | 34m07s | 10 GB | 2 | 17s - # 24739 | public | payments | 1 | 5m47s | 1848 MB | 4 | 4m40s - # 24681 | public | module_users | 1 | 4m57s | 1610 MB | 3 | 1m50s - # 24694 | public | orders | 1 | 2m50s | 835 MB | 3 | 1m05s - # 24597 | public | devices | 1 | 1m45s | 498 MB | 2 | 40s - # 24723 | public | payment_methods | 1 | 1m24s | 548 MB | 2 | 31s - # 24765 | public | user_collections | 1 | 2m17s | 1005 MB | 2 | 968ms - # 24774 | public | users | 1 | 52s | 291 MB | 4 | 27s - # 24760 | public | user_accounts | 1 | 16s | 172 MB | 3 | 16s - # 24606 | public | edition_modules | 1 | 8s983 | 46 MB | 3 | 4s749 - # 24583 | public | course_emails | 1 | 8s526 | 26 MB | 2 | 996ms - # 24685 | public | modules | 1 | 1s592 | 21 MB | 3 | 1s696 - # 24610 | public | editions | 1 | 2s199 | 7483 kB | 2 | 1s032 - # 24755 | public | sp_content | 1 | 1s555 | 4177 kB | 0 | 0ms - # 24619 | public | email_broadcasts | 1 | 744ms | 2645 kB | 2 | 677ms - # 24590 | public | courses | 1 | 387ms | 1540 kB | 2 | 367ms - # 24704 | public | payment_gateway_keys | 1 | 1s972 | 164 kB | 2 | 27ms - # 24576 | public | accounts | 1 | 58ms | 24 kB | 1 | 14ms - # 24647 | public | event_names | 1 | 32ms | 397 B | 1 | 8ms - # 24716 | public | payment_gateways | 1 | 1s675 | 117 B | 1 | 11ms - # 24748 | public | roles | 1 | 71ms | 173 B | 1 | 8ms - # 24676 | public | management | 1 | 33ms | 40 B | 1 | 19ms - - - # Step Connection Duration Transfer Concurrency - # -------------------------------------------------- ---------- ---------- ---------- ------------ - # Catalog Queries (table ordering, filtering, etc) source 12s 1 - # Dump Schema source 765ms 1 - # Prepare Schema target 466ms 1 - # COPY, INDEX, CONSTRAINTS, VACUUM (wall clock) both 2h47m 12 - # COPY (cumulative) both 7h46m 1225 GB 4 - # CREATE INDEX (cumulative) target 4h36m 4 - # CONSTRAINTS (cumulative) target 8s493 4 - # VACUUM (cumulative) target 0ms 4 - # Reset Sequences both 60ms 1 - # Large Objects (cumulative) (null) 0ms 0 - # Finalize Schema both 14h01m 4 - # -------------------------------------------------- ---------- ---------- ---------- ------------ - # Total Wall Clock Duration both 16h49m 20 - - - # EOF - - - - name: show tables sizes and retrieve current backpressure seconds + - name: show tables sizes after ingest run: | export LD_LIBRARY_PATH=${PG_16_LIB_PATH} - ${PSQL} "${NEW_PROJECT_CONNSTR}" -c "\dt+" - BACKPRESSURE_TIME_AFTER_INGEST=$(${PSQL} "${NEW_PROJECT_CONNSTR}" -t -c "select backpressure_throttling_time()/1000000;") - echo "BACKPRESSURE_TIME_AFTER_INGEST=${BACKPRESSURE_TIME_AFTER_INGEST}" >> $GITHUB_ENV - - - name: Parse pgcopydb log and report performance metrics - env: - PERF_TEST_RESULT_CONNSTR: ${{ secrets.PERF_TEST_RESULT_CONNSTR }} - run: | - export LD_LIBRARY_PATH=${PG_16_LIB_PATH} - - # Define the log file path - LOG_FILE="/tmp/pgcopydb_${{ matrix.target_project }}.log" - - # Get the current git commit hash - git config --global --add safe.directory /__w/neon/neon - COMMIT_HASH=$(git rev-parse --short HEAD) - - # Define the platform and test suite - PLATFORM="pg16-${{ matrix.target_project }}-us-east-2-staging" - SUIT="pgcopydb_ingest_bench" - - # Function to convert time (e.g., "2h47m", "4h36m", "118ms", "8s493") to seconds - convert_to_seconds() { - local duration=$1 - local total_seconds=0 - - # Check for hours (h) - if [[ "$duration" =~ ([0-9]+)h ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 3600)) - fi - - # Check for seconds (s) - if [[ "$duration" =~ ([0-9]+)s ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0})) - fi - - # Check for milliseconds (ms) (if applicable) - if [[ "$duration" =~ ([0-9]+)ms ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} / 1000)) - duration=${duration/${BASH_REMATCH[0]}/} # need to remove it to avoid double counting with m - fi - - # Check for minutes (m) - must be checked after ms because m is contained in ms - if [[ "$duration" =~ ([0-9]+)m ]]; then - total_seconds=$((total_seconds + ${BASH_REMATCH[1]#0} * 60)) - fi - - echo $total_seconds - } - - # Calculate the backpressure difference in seconds - BACKPRESSURE_TIME_DIFF=$(awk "BEGIN {print $BACKPRESSURE_TIME_AFTER_INGEST - $BACKPRESSURE_TIME_BEFORE_INGEST}") - - # Insert the backpressure time difference into the performance database - if [ -n "$BACKPRESSURE_TIME_DIFF" ]; then - PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \" - INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp) - VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', 'backpressure_time', ${BACKPRESSURE_TIME_DIFF}, 'seconds', 'lower_is_better', now()); - \"" - echo "Inserting backpressure time difference: ${BACKPRESSURE_TIME_DIFF} seconds" - eval $PSQL_CMD - fi - - # Extract and process log lines - while IFS= read -r line; do - METRIC_NAME="" - # Match each desired line and extract the relevant information - if [[ "$line" =~ COPY,\ INDEX,\ CONSTRAINTS,\ VACUUM.* ]]; then - METRIC_NAME="COPY, INDEX, CONSTRAINTS, VACUUM (wall clock)" - elif [[ "$line" =~ COPY\ \(cumulative\).* ]]; then - METRIC_NAME="COPY (cumulative)" - elif [[ "$line" =~ CREATE\ INDEX\ \(cumulative\).* ]]; then - METRIC_NAME="CREATE INDEX (cumulative)" - elif [[ "$line" =~ CONSTRAINTS\ \(cumulative\).* ]]; then - METRIC_NAME="CONSTRAINTS (cumulative)" - elif [[ "$line" =~ Finalize\ Schema.* ]]; then - METRIC_NAME="Finalize Schema" - elif [[ "$line" =~ Total\ Wall\ Clock\ Duration.* ]]; then - METRIC_NAME="Total Wall Clock Duration" - fi - - # If a metric was matched, insert it into the performance database - if [ -n "$METRIC_NAME" ]; then - DURATION=$(echo "$line" | grep -oP '\d+h\d+m|\d+s|\d+ms|\d{1,2}h\d{1,2}m|\d+\.\d+s' | head -n 1) - METRIC_VALUE=$(convert_to_seconds "$DURATION") - PSQL_CMD="${PSQL} \"${PERF_TEST_RESULT_CONNSTR}\" -c \" - INSERT INTO public.perf_test_results (suit, revision, platform, metric_name, metric_value, metric_unit, metric_report_type, recorded_at_timestamp) - VALUES ('${SUIT}', '${COMMIT_HASH}', '${PLATFORM}', '${METRIC_NAME}', ${METRIC_VALUE}, 'seconds', 'lower_is_better', now()); - \"" - echo "Inserting ${METRIC_NAME} with value ${METRIC_VALUE} seconds" - eval $PSQL_CMD - fi - done < "$LOG_FILE" + ${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+" - name: Delete Neon Project if: ${{ always() && matrix.target_project == 'new_empty_project' }} diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml index 70a324d76f..8600b9fe2d 100644 --- a/.github/workflows/neon_extra_builds.yml +++ b/.github/workflows/neon_extra_builds.yml @@ -26,15 +26,9 @@ jobs: with: github-event-name: ${{ github.event_name}} - check-build-tools-image: - needs: [ check-permissions ] - uses: ./.github/workflows/check-build-tools-image.yml - build-build-tools-image: - needs: [ check-build-tools-image ] + needs: [ check-permissions ] uses: ./.github/workflows/build-build-tools-image.yml - with: - image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }} secrets: inherit run-macos-build: diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml index 615937b5a1..1cce348ae2 100644 --- a/.github/workflows/periodic_pagebench.yml +++ b/.github/workflows/periodic_pagebench.yml @@ -72,7 +72,7 @@ jobs: echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV fi - - name: Start Bench with run_id + - name: Start Bench with run_id run: | curl -k -X 'POST' \ "${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \ @@ -116,7 +116,7 @@ jobs: -H 'accept: application/gzip' \ -H "Authorization: Bearer $API_KEY" \ --output "test_log_${GITHUB_RUN_ID}.gz" - + - name: Unzip Test Log and Print it into this job's log if: always() && steps.poll_step.outputs.too_many_runs != 'true' run: | @@ -134,13 +134,13 @@ jobs: if: ${{ github.event.schedule && failure() }} uses: slackapi/slack-github-action@v1 with: - channel-id: "C033QLM5P7D" # dev-staging-stream + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" env: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - name: Cleanup Test Resources - if: always() + if: always() run: | curl -k -X 'POST' \ "${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \ diff --git a/.github/workflows/pg-clients.yml b/.github/workflows/pg-clients.yml index df40b5beda..4f5495cbe2 100644 --- a/.github/workflows/pg-clients.yml +++ b/.github/workflows/pg-clients.yml @@ -39,15 +39,9 @@ jobs: with: github-event-name: ${{ github.event_name }} - check-build-tools-image: - needs: [ check-permissions ] - uses: ./.github/workflows/check-build-tools-image.yml - build-build-tools-image: - needs: [ check-build-tools-image ] + needs: [ check-permissions ] uses: ./.github/workflows/build-build-tools-image.yml - with: - image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }} secrets: inherit test-logical-replication: diff --git a/.github/workflows/pre-merge-checks.yml b/.github/workflows/pre-merge-checks.yml index 137faa7abc..e1cec6d33d 100644 --- a/.github/workflows/pre-merge-checks.yml +++ b/.github/workflows/pre-merge-checks.yml @@ -34,16 +34,10 @@ jobs: run: | echo "${PYTHON_CHANGED_FILES}" - check-build-tools-image: + build-build-tools-image: if: needs.get-changed-files.outputs.python-changed == 'true' needs: [ get-changed-files ] - uses: ./.github/workflows/check-build-tools-image.yml - - build-build-tools-image: - needs: [ check-build-tools-image ] uses: ./.github/workflows/build-build-tools-image.yml - with: - image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }} secrets: inherit check-codestyle-python: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 56ef6f4bbb..11f010b6d4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -26,82 +26,26 @@ defaults: jobs: create-storage-release-branch: if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }} - runs-on: ubuntu-22.04 permissions: - contents: write # for `git push` + contents: write - steps: - - name: Check out code - uses: actions/checkout@v4 - with: - ref: main - - - name: Set environment variables - run: | - echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV - echo "RELEASE_BRANCH=rc/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV - - - name: Create release branch - run: git checkout -b $RELEASE_BRANCH - - - name: Push new branch - run: git push origin $RELEASE_BRANCH - - - name: Create pull request into release - env: - GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} - run: | - TITLE="Storage & Compute release ${RELEASE_DATE}" - - cat << EOF > body.md - ## ${TITLE} - - **Please merge this Pull Request using 'Create a merge commit' button** - EOF - - gh pr create --title "${TITLE}" \ - --body-file "body.md" \ - --head "${RELEASE_BRANCH}" \ - --base "release" + uses: ./.github/workflows/_create-release-pr.yml + with: + component-name: 'Storage & Compute' + release-branch: 'release' + secrets: + ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }} create-proxy-release-branch: if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }} - runs-on: ubuntu-22.04 permissions: - contents: write # for `git push` + contents: write - steps: - - name: Check out code - uses: actions/checkout@v4 - with: - ref: main - - - name: Set environment variables - run: | - echo "RELEASE_DATE=$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV - echo "RELEASE_BRANCH=rc/proxy/$(date +'%Y-%m-%d')" | tee -a $GITHUB_ENV - - - name: Create release branch - run: git checkout -b $RELEASE_BRANCH - - - name: Push new branch - run: git push origin $RELEASE_BRANCH - - - name: Create pull request into release - env: - GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} - run: | - TITLE="Proxy release ${RELEASE_DATE}" - - cat << EOF > body.md - ## ${TITLE} - - **Please merge this Pull Request using 'Create a merge commit' button** - EOF - - gh pr create --title "${TITLE}" \ - --body-file "body.md" \ - --head "${RELEASE_BRANCH}" \ - --base "release-proxy" + uses: ./.github/workflows/_create-release-pr.yml + with: + component-name: 'Proxy' + release-branch: 'release-proxy' + secrets: + ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }} diff --git a/.github/workflows/report-workflow-stats.yml b/.github/workflows/report-workflow-stats.yml index 0d135a257c..15e446bcd7 100644 --- a/.github/workflows/report-workflow-stats.yml +++ b/.github/workflows/report-workflow-stats.yml @@ -9,7 +9,6 @@ on: - Build and Test Locally - Build build-tools image - Check Permissions - - Check build-tools image - Check neon with extra platform builds - Cloud Regression Test - Create Release Branch diff --git a/CODEOWNERS b/CODEOWNERS index f8ed4be816..21b0e7c51f 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,6 +1,5 @@ +/.github/ @neondatabase/developer-productivity /compute_tools/ @neondatabase/control-plane @neondatabase/compute -/storage_controller @neondatabase/storage -/storage_scrubber @neondatabase/storage /libs/pageserver_api/ @neondatabase/storage /libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage /libs/remote_storage/ @neondatabase/storage @@ -11,4 +10,6 @@ /pgxn/neon/ @neondatabase/compute @neondatabase/storage /proxy/ @neondatabase/proxy /safekeeper/ @neondatabase/storage +/storage_controller @neondatabase/storage +/storage_scrubber @neondatabase/storage /vendor/ @neondatabase/compute diff --git a/Cargo.lock b/Cargo.lock index f6e3f9ddb1..c7af140f7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3578,7 +3578,6 @@ dependencies = [ "thiserror", "tokio", "tokio-util", - "toml_edit", "utils", "workspace_hack", ] @@ -3642,6 +3641,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_with", + "smallvec", "storage_broker", "strum", "strum_macros", @@ -5663,9 +5663,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.1" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "smol_str" @@ -6074,9 +6074,9 @@ dependencies = [ [[package]] name = "tikv-jemalloc-ctl" -version = "0.5.4" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "619bfed27d807b54f7f776b9430d4f8060e66ee138a28632ca898584d462c31c" +checksum = "f21f216790c8df74ce3ab25b534e0718da5a1916719771d3fec23315c99e468b" dependencies = [ "libc", "paste", @@ -6085,9 +6085,9 @@ dependencies = [ [[package]] name = "tikv-jemalloc-sys" -version = "0.5.4+5.3.0-patched" +version = "0.6.0+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1" +checksum = "cd3c60906412afa9c2b5b5a48ca6a5abe5736aec9eb48ad05037a677e52e4e2d" dependencies = [ "cc", "libc", @@ -6095,9 +6095,9 @@ dependencies = [ [[package]] name = "tikv-jemallocator" -version = "0.5.4" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca" +checksum = "4cec5ff18518d81584f477e9bfdf957f5bb0979b0bac3af4ca30b5b3ae2d2865" dependencies = [ "libc", "tikv-jemalloc-sys", diff --git a/Cargo.toml b/Cargo.toml index 706d742f1b..dbda930535 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -168,8 +168,8 @@ sync_wrapper = "0.1.2" tar = "0.4" test-context = "0.3" thiserror = "1.0" -tikv-jemallocator = "0.5" -tikv-jemalloc-ctl = "0.5" +tikv-jemallocator = { version = "0.6", features = ["stats"] } +tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] } tokio = { version = "1.17", features = ["macros"] } tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" } tokio-io-timeout = "1.2.0" diff --git a/compute/etc/neon_collector.jsonnet b/compute/etc/neon_collector.jsonnet index c6fa645b41..75d69c7b68 100644 --- a/compute/etc/neon_collector.jsonnet +++ b/compute/etc/neon_collector.jsonnet @@ -6,6 +6,7 @@ import 'sql_exporter/compute_backpressure_throttling_seconds.libsonnet', import 'sql_exporter/compute_current_lsn.libsonnet', import 'sql_exporter/compute_logical_snapshot_files.libsonnet', + import 'sql_exporter/compute_max_connections.libsonnet', import 'sql_exporter/compute_receive_lsn.libsonnet', import 'sql_exporter/compute_subscriptions_count.libsonnet', import 'sql_exporter/connection_counts.libsonnet', diff --git a/compute/etc/sql_exporter/compute_max_connections.libsonnet b/compute/etc/sql_exporter/compute_max_connections.libsonnet new file mode 100644 index 0000000000..69cfa1f19c --- /dev/null +++ b/compute/etc/sql_exporter/compute_max_connections.libsonnet @@ -0,0 +1,10 @@ +{ + metric_name: 'compute_max_connections', + type: 'gauge', + help: 'Max connections allowed for Postgres', + key_labels: null, + values: [ + 'max_connections', + ], + query: importstr 'sql_exporter/compute_max_connections.sql', +} diff --git a/compute/etc/sql_exporter/compute_max_connections.sql b/compute/etc/sql_exporter/compute_max_connections.sql new file mode 100644 index 0000000000..99a49483a6 --- /dev/null +++ b/compute/etc/sql_exporter/compute_max_connections.sql @@ -0,0 +1 @@ +SELECT current_setting('max_connections') as max_connections; diff --git a/compute/patches/cloud_regress_pg16.patch b/compute/patches/cloud_regress_pg16.patch index d15d0cffeb..a4b93d0260 100644 --- a/compute/patches/cloud_regress_pg16.patch +++ b/compute/patches/cloud_regress_pg16.patch @@ -147,7 +147,7 @@ index 542c2e098c..0062d3024f 100644 ALTER TABLE ptnowner1 OWNER TO regress_ptnowner; ALTER TABLE ptnowner OWNER TO regress_ptnowner; diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out -index 97bbe53b64..eac3d42a79 100644 +index 3f9a8f539c..0a51b52940 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1016,7 +1016,7 @@ select * from collate_test1 where b ilike 'ABC'; @@ -309,7 +309,7 @@ index b48365ec98..a6ef910055 100644 -- the wrong partition. This test is *not* guaranteed to trigger that bug, but -- does so when shared_buffers is small enough. To test if we encountered the diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out -index faf1a4d1b0..a44c97db52 100644 +index 9a74820ee8..22400a5551 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -553,8 +553,8 @@ select * from check_con_tbl; @@ -573,7 +573,7 @@ index 93302a07ef..1a73f083ac 100644 -- that does not match with what's expected. -- This checks all the object types that include schema qualifications. diff --git a/src/test/regress/expected/create_view.out b/src/test/regress/expected/create_view.out -index f3f8c7b5a2..3e3e54ff4c 100644 +index f551624afb..57f1e432d4 100644 --- a/src/test/regress/expected/create_view.out +++ b/src/test/regress/expected/create_view.out @@ -18,7 +18,8 @@ CREATE TABLE real_city ( @@ -700,12 +700,12 @@ index 6ed50fdcfa..caa00a345d 100644 COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless'; CREATE FOREIGN DATA WRAPPER postgresql VALIDATOR postgresql_fdw_validator; diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out -index 12e523c737..8872e23935 100644 +index 6b8c2f2414..8e13b7fa46 100644 --- a/src/test/regress/expected/foreign_key.out +++ b/src/test/regress/expected/foreign_key.out -@@ -1968,7 +1968,7 @@ ALTER TABLE fk_partitioned_fk ATTACH PARTITION fk_partitioned_fk_2 - FOR VALUES IN (1600); - -- leave these tables around intentionally +@@ -1985,7 +1985,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES + ERROR: cannot ALTER TABLE "fk_partitioned_pk_61" because it is being used by active queries in this session + DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6; -- test the case when the referenced table is owned by a different user -create role regress_other_partitioned_fk_owner; +create role regress_other_partitioned_fk_owner PASSWORD NEON_PASSWORD_PLACEHOLDER; @@ -713,7 +713,7 @@ index 12e523c737..8872e23935 100644 set role regress_other_partitioned_fk_owner; create table other_partitioned_fk(a int, b int) partition by list (a); diff --git a/src/test/regress/expected/generated.out b/src/test/regress/expected/generated.out -index 0f623f7119..b48588a54e 100644 +index 5881420388..4ae21aa43c 100644 --- a/src/test/regress/expected/generated.out +++ b/src/test/regress/expected/generated.out @@ -534,7 +534,7 @@ CREATE TABLE gtest10a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) STOR @@ -762,7 +762,7 @@ index a2036a1597..805d73b9d2 100644 -- fields, leading to long bucket chains and lots of table expansion. -- this is therefore a stress test of the bucket overflow code (unlike diff --git a/src/test/regress/expected/identity.out b/src/test/regress/expected/identity.out -index cc7772349f..98a08eb48d 100644 +index 1b74958de9..078187b542 100644 --- a/src/test/regress/expected/identity.out +++ b/src/test/regress/expected/identity.out @@ -520,7 +520,7 @@ ALTER TABLE itest7 ALTER COLUMN a SET GENERATED BY DEFAULT; @@ -775,10 +775,10 @@ index cc7772349f..98a08eb48d 100644 GRANT SELECT, INSERT ON itest8 TO regress_identity_user1; SET ROLE regress_identity_user1; diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out -index 4943429e9b..0257f22b15 100644 +index 8f831c95c3..ec681b52af 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out -@@ -2606,7 +2606,7 @@ create index on permtest_parent (left(c, 3)); +@@ -2636,7 +2636,7 @@ create index on permtest_parent (left(c, 3)); insert into permtest_parent select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i; analyze permtest_parent; @@ -1133,7 +1133,7 @@ index 8475231735..1afae5395f 100644 SELECT rolname, rolpassword FROM pg_authid diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out -index fbb0489a4f..2905194e2c 100644 +index 5b9dba7b32..cc408dad42 100644 --- a/src/test/regress/expected/privileges.out +++ b/src/test/regress/expected/privileges.out @@ -20,19 +20,19 @@ SELECT lo_unlink(oid) FROM pg_largeobject_metadata WHERE oid >= 1000 AND oid < 3 @@ -1185,7 +1185,7 @@ index fbb0489a4f..2905194e2c 100644 GRANT pg_read_all_data TO regress_priv_user6; GRANT pg_write_all_data TO regress_priv_user7; GRANT pg_read_all_settings TO regress_priv_user8 WITH ADMIN OPTION; -@@ -145,8 +145,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8; +@@ -212,8 +212,8 @@ REVOKE pg_read_all_settings FROM regress_priv_user8; DROP USER regress_priv_user10; DROP USER regress_priv_user9; DROP USER regress_priv_user8; @@ -1196,7 +1196,7 @@ index fbb0489a4f..2905194e2c 100644 ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4; GRANT regress_priv_group2 TO regress_priv_user2 GRANTED BY regress_priv_user1; SET SESSION AUTHORIZATION regress_priv_user1; -@@ -172,12 +172,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre +@@ -239,12 +239,16 @@ GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY regre ERROR: permission denied to grant privileges as role "regress_priv_role" DETAIL: The grantor must have the ADMIN option on role "regress_priv_role". GRANT regress_priv_role TO regress_priv_user1 WITH ADMIN OPTION GRANTED BY CURRENT_ROLE; @@ -1213,7 +1213,7 @@ index fbb0489a4f..2905194e2c 100644 DROP ROLE regress_priv_role; SET SESSION AUTHORIZATION regress_priv_user1; SELECT session_user, current_user; -@@ -1709,7 +1713,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP +@@ -1776,7 +1780,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP -- security-restricted operations \c - @@ -1222,7 +1222,7 @@ index fbb0489a4f..2905194e2c 100644 -- Check that index expressions and predicates are run as the table's owner -- A dummy index function checking current_user CREATE FUNCTION sro_ifun(int) RETURNS int AS $$ -@@ -2601,8 +2605,8 @@ drop cascades to function testns.priv_testagg(integer) +@@ -2668,8 +2672,8 @@ drop cascades to function testns.priv_testagg(integer) drop cascades to function testns.priv_testproc(integer) -- Change owner of the schema & and rename of new schema owner \c - @@ -1233,7 +1233,7 @@ index fbb0489a4f..2905194e2c 100644 SET SESSION ROLE regress_schemauser1; CREATE SCHEMA testns; SELECT nspname, rolname FROM pg_namespace, pg_roles WHERE pg_namespace.nspname = 'testns' AND pg_namespace.nspowner = pg_roles.oid; -@@ -2725,7 +2729,7 @@ DROP USER regress_priv_user7; +@@ -2792,7 +2796,7 @@ DROP USER regress_priv_user7; DROP USER regress_priv_user8; -- does not exist ERROR: role "regress_priv_user8" does not exist -- permissions with LOCK TABLE @@ -1242,7 +1242,7 @@ index fbb0489a4f..2905194e2c 100644 CREATE TABLE lock_table (a int); -- LOCK TABLE and SELECT permission GRANT SELECT ON lock_table TO regress_locktable_user; -@@ -2807,7 +2811,7 @@ DROP USER regress_locktable_user; +@@ -2874,7 +2878,7 @@ DROP USER regress_locktable_user; -- pg_backend_memory_contexts. -- switch to superuser \c - @@ -1251,7 +1251,7 @@ index fbb0489a4f..2905194e2c 100644 SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no has_table_privilege --------------------- -@@ -2851,10 +2855,10 @@ RESET ROLE; +@@ -2918,10 +2922,10 @@ RESET ROLE; -- clean up DROP ROLE regress_readallstats; -- test role grantor machinery @@ -1266,7 +1266,7 @@ index fbb0489a4f..2905194e2c 100644 GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE; GRANT regress_group_direct_manager TO regress_group_indirect_manager; SET SESSION AUTHORIZATION regress_group_direct_manager; -@@ -2883,9 +2887,9 @@ DROP ROLE regress_group_direct_manager; +@@ -2950,9 +2954,9 @@ DROP ROLE regress_group_direct_manager; DROP ROLE regress_group_indirect_manager; DROP ROLE regress_group_member; -- test SET and INHERIT options with object ownership changes @@ -1813,7 +1813,7 @@ index 5e6969b173..2c4d52237f 100644 -- clean up roles diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out -index 97ca9bf72c..b2a7a6f710 100644 +index 218c0c2863..f7af0cfb12 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -14,13 +14,13 @@ DROP ROLE IF EXISTS regress_rls_group2; @@ -1917,6 +1917,19 @@ index b79fe9a1c0..e29fab88ab 100644 ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user REVOKE INSERT ON TABLES FROM regress_selinto_user; GRANT ALL ON SCHEMA selinto_schema TO public; +diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out +index afc6ab08c2..dfcd891af3 100644 +--- a/src/test/regress/expected/select_parallel.out ++++ b/src/test/regress/expected/select_parallel.out +@@ -1220,7 +1220,7 @@ SELECT 1 FROM tenk1_vw_sec + + rollback; + -- test that function option SET ROLE works in parallel workers. +-create role regress_parallel_worker; ++create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER; + create function set_and_report_role() returns text as + $$ select current_setting('role') $$ language sql parallel safe + set role = regress_parallel_worker; diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out index 1aeed8452b..7d9427d070 100644 --- a/src/test/regress/expected/select_views.out @@ -2369,7 +2382,7 @@ index 6cb9c926c0..5e689e4062 100644 ALTER TABLE ptnowner1 OWNER TO regress_ptnowner; ALTER TABLE ptnowner OWNER TO regress_ptnowner; diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql -index 3db9e25913..c66d5aa2c2 100644 +index 8aa902d5ab..24bb823b86 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -353,7 +353,7 @@ reset enable_seqscan; @@ -2532,7 +2545,7 @@ index 43d2e906dd..6c993d70f0 100644 -- An earlier bug (see commit b1ecb9b3fcf) could end up using a buffer from -- the wrong partition. This test is *not* guaranteed to trigger that bug, but diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql -index d759635068..d58e50dcc5 100644 +index cf3828c16e..cf3ca38175 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -365,8 +365,8 @@ copy check_con_tbl from stdin; @@ -2774,7 +2787,7 @@ index 1b7064247a..be5b662ce1 100644 -- Cases where schema creation fails as objects are qualified with a schema -- that does not match with what's expected. diff --git a/src/test/regress/sql/create_view.sql b/src/test/regress/sql/create_view.sql -index 3a78be1b0c..617d2dc8d6 100644 +index ae6841308b..47bc792e30 100644 --- a/src/test/regress/sql/create_view.sql +++ b/src/test/regress/sql/create_view.sql @@ -23,7 +23,8 @@ CREATE TABLE real_city ( @@ -2901,11 +2914,11 @@ index aa147b14a9..370e0dd570 100644 CREATE FOREIGN DATA WRAPPER dummy; COMMENT ON FOREIGN DATA WRAPPER dummy IS 'useless'; diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql -index 22e177f89b..7138d5e1d4 100644 +index 45c7a534cb..32dd26b8cd 100644 --- a/src/test/regress/sql/foreign_key.sql +++ b/src/test/regress/sql/foreign_key.sql -@@ -1418,7 +1418,7 @@ ALTER TABLE fk_partitioned_fk ATTACH PARTITION fk_partitioned_fk_2 - -- leave these tables around intentionally +@@ -1435,7 +1435,7 @@ ALTER TABLE fk_partitioned_fk_6 ATTACH PARTITION fk_partitioned_pk_6 FOR VALUES + DROP TABLE fk_partitioned_pk_6, fk_partitioned_fk_6; -- test the case when the referenced table is owned by a different user -create role regress_other_partitioned_fk_owner; @@ -2963,7 +2976,7 @@ index 527024f710..de49c0b85f 100644 -- the data in this file has a lot of duplicates in the index key -- fields, leading to long bucket chains and lots of table expansion. diff --git a/src/test/regress/sql/identity.sql b/src/test/regress/sql/identity.sql -index 91d2e443b4..241c93f373 100644 +index 7537258a75..9041e35e34 100644 --- a/src/test/regress/sql/identity.sql +++ b/src/test/regress/sql/identity.sql @@ -287,7 +287,7 @@ ALTER TABLE itest7 ALTER COLUMN a RESTART; @@ -2976,10 +2989,10 @@ index 91d2e443b4..241c93f373 100644 GRANT SELECT, INSERT ON itest8 TO regress_identity_user1; SET ROLE regress_identity_user1; diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql -index fe699c54d5..bdd5993f45 100644 +index b5b554a125..109889ad24 100644 --- a/src/test/regress/sql/inherit.sql +++ b/src/test/regress/sql/inherit.sql -@@ -950,7 +950,7 @@ create index on permtest_parent (left(c, 3)); +@@ -958,7 +958,7 @@ create index on permtest_parent (left(c, 3)); insert into permtest_parent select 1, 'a', left(fipshash(i::text), 5) from generate_series(0, 100) i; analyze permtest_parent; @@ -3218,7 +3231,7 @@ index 53e86b0b6c..f07cf1ec54 100644 CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023'; diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql -index 3f68cafcd1..004b26831d 100644 +index 249df17a58..b258e7f26a 100644 --- a/src/test/regress/sql/privileges.sql +++ b/src/test/regress/sql/privileges.sql @@ -24,18 +24,18 @@ RESET client_min_messages; @@ -3269,7 +3282,7 @@ index 3f68cafcd1..004b26831d 100644 GRANT pg_read_all_data TO regress_priv_user6; GRANT pg_write_all_data TO regress_priv_user7; -@@ -130,8 +130,8 @@ DROP USER regress_priv_user10; +@@ -163,8 +163,8 @@ DROP USER regress_priv_user10; DROP USER regress_priv_user9; DROP USER regress_priv_user8; @@ -3280,7 +3293,7 @@ index 3f68cafcd1..004b26831d 100644 ALTER GROUP regress_priv_group1 ADD USER regress_priv_user4; -@@ -1124,7 +1124,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP +@@ -1157,7 +1157,7 @@ SELECT has_table_privilege('regress_priv_user1', 'atest4', 'SELECT WITH GRANT OP -- security-restricted operations \c - @@ -3289,7 +3302,7 @@ index 3f68cafcd1..004b26831d 100644 -- Check that index expressions and predicates are run as the table's owner -@@ -1620,8 +1620,8 @@ DROP SCHEMA testns CASCADE; +@@ -1653,8 +1653,8 @@ DROP SCHEMA testns CASCADE; -- Change owner of the schema & and rename of new schema owner \c - @@ -3300,7 +3313,7 @@ index 3f68cafcd1..004b26831d 100644 SET SESSION ROLE regress_schemauser1; CREATE SCHEMA testns; -@@ -1715,7 +1715,7 @@ DROP USER regress_priv_user8; -- does not exist +@@ -1748,7 +1748,7 @@ DROP USER regress_priv_user8; -- does not exist -- permissions with LOCK TABLE @@ -3309,7 +3322,7 @@ index 3f68cafcd1..004b26831d 100644 CREATE TABLE lock_table (a int); -- LOCK TABLE and SELECT permission -@@ -1803,7 +1803,7 @@ DROP USER regress_locktable_user; +@@ -1836,7 +1836,7 @@ DROP USER regress_locktable_user; -- switch to superuser \c - @@ -3318,7 +3331,7 @@ index 3f68cafcd1..004b26831d 100644 SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- no -@@ -1823,10 +1823,10 @@ RESET ROLE; +@@ -1856,10 +1856,10 @@ RESET ROLE; DROP ROLE regress_readallstats; -- test role grantor machinery @@ -3333,7 +3346,7 @@ index 3f68cafcd1..004b26831d 100644 GRANT regress_group TO regress_group_direct_manager WITH INHERIT FALSE, ADMIN TRUE; GRANT regress_group_direct_manager TO regress_group_indirect_manager; -@@ -1848,9 +1848,9 @@ DROP ROLE regress_group_indirect_manager; +@@ -1881,9 +1881,9 @@ DROP ROLE regress_group_indirect_manager; DROP ROLE regress_group_member; -- test SET and INHERIT options with object ownership changes @@ -3625,7 +3638,7 @@ index c961b2d730..0859b89c4f 100644 -- clean up roles DROP ROLE regress_test_def_superuser; diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql -index dec7340538..cdbc03a5cc 100644 +index d3bfd53e23..919ce1d0c6 100644 --- a/src/test/regress/sql/rowsecurity.sql +++ b/src/test/regress/sql/rowsecurity.sql @@ -20,13 +20,13 @@ DROP SCHEMA IF EXISTS regress_rls_schema CASCADE; @@ -3701,6 +3714,19 @@ index 689c448cc2..223ceb1d75 100644 ALTER DEFAULT PRIVILEGES FOR ROLE regress_selinto_user REVOKE INSERT ON TABLES FROM regress_selinto_user; GRANT ALL ON SCHEMA selinto_schema TO public; +diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql +index 33d78e16dc..cb193c9b27 100644 +--- a/src/test/regress/sql/select_parallel.sql ++++ b/src/test/regress/sql/select_parallel.sql +@@ -464,7 +464,7 @@ SELECT 1 FROM tenk1_vw_sec + rollback; + + -- test that function option SET ROLE works in parallel workers. +-create role regress_parallel_worker; ++create role regress_parallel_worker PASSWORD NEON_PASSWORD_PLACEHOLDER; + + create function set_and_report_role() returns text as + $$ select current_setting('role') $$ language sql parallel safe diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql index e742f13699..7bd0255df8 100644 --- a/src/test/regress/sql/select_views.sql diff --git a/compute_tools/src/catalog.rs b/compute_tools/src/catalog.rs index 4fefa831e0..2f6f82dd39 100644 --- a/compute_tools/src/catalog.rs +++ b/compute_tools/src/catalog.rs @@ -1,38 +1,40 @@ -use compute_api::{ - responses::CatalogObjects, - spec::{Database, Role}, -}; +use compute_api::responses::CatalogObjects; use futures::Stream; -use postgres::{Client, NoTls}; +use postgres::NoTls; use std::{path::Path, process::Stdio, result::Result, sync::Arc}; use tokio::{ io::{AsyncBufReadExt, BufReader}, process::Command, - task, + spawn, }; +use tokio_postgres::connect; use tokio_stream::{self as stream, StreamExt}; use tokio_util::codec::{BytesCodec, FramedRead}; use tracing::warn; -use crate::{ - compute::ComputeNode, - pg_helpers::{get_existing_dbs, get_existing_roles}, -}; +use crate::compute::ComputeNode; +use crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async}; pub async fn get_dbs_and_roles(compute: &Arc) -> anyhow::Result { let connstr = compute.connstr.clone(); - task::spawn_blocking(move || { - let mut client = Client::connect(connstr.as_str(), NoTls)?; - let roles: Vec; - { - let mut xact = client.transaction()?; - roles = get_existing_roles(&mut xact)?; - } - let databases: Vec = get_existing_dbs(&mut client)?.values().cloned().collect(); - Ok(CatalogObjects { roles, databases }) - }) - .await? + let (client, connection): (tokio_postgres::Client, _) = + connect(connstr.as_str(), NoTls).await?; + + spawn(async move { + if let Err(e) = connection.await { + eprintln!("connection error: {}", e); + } + }); + + let roles = get_existing_roles_async(&client).await?; + + let databases = get_existing_dbs_async(&client) + .await? + .into_values() + .collect(); + + Ok(CatalogObjects { roles, databases }) } #[derive(Debug, thiserror::Error)] diff --git a/compute_tools/src/checker.rs b/compute_tools/src/checker.rs index d76eaad0a0..cec2b1bed8 100644 --- a/compute_tools/src/checker.rs +++ b/compute_tools/src/checker.rs @@ -1,37 +1,9 @@ use anyhow::{anyhow, Ok, Result}; -use postgres::Client; use tokio_postgres::NoTls; use tracing::{error, instrument, warn}; use crate::compute::ComputeNode; -/// Create a special service table for availability checks -/// only if it does not exist already. -pub fn create_availability_check_data(client: &mut Client) -> Result<()> { - let query = " - DO $$ - BEGIN - IF NOT EXISTS( - SELECT 1 - FROM pg_catalog.pg_tables - WHERE tablename = 'health_check' - ) - THEN - CREATE TABLE health_check ( - id serial primary key, - updated_at timestamptz default now() - ); - INSERT INTO health_check VALUES (1, now()) - ON CONFLICT (id) DO UPDATE - SET updated_at = now(); - END IF; - END - $$;"; - client.execute(query, &[])?; - - Ok(()) -} - /// Update timestamp in a row in a special service table to check /// that we can actually write some data in this particular timeline. #[instrument(skip_all)] diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 0a8cb14058..4f67425ba8 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -1,20 +1,21 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::env; use std::fs; +use std::iter::once; use std::os::unix::fs::{symlink, PermissionsExt}; use std::path::Path; use std::process::{Command, Stdio}; use std::str::FromStr; use std::sync::atomic::AtomicU32; use std::sync::atomic::Ordering; -use std::sync::{Condvar, Mutex, RwLock}; +use std::sync::{Arc, Condvar, Mutex, RwLock}; use std::thread; use std::time::Duration; use std::time::Instant; use anyhow::{Context, Result}; use chrono::{DateTime, Utc}; -use compute_api::spec::PgIdent; +use compute_api::spec::{PgIdent, Role}; use futures::future::join_all; use futures::stream::FuturesUnordered; use futures::StreamExt; @@ -31,15 +32,23 @@ use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion}; use utils::measured_stream::MeasuredReader; use nix::sys::signal::{kill, Signal}; - use remote_storage::{DownloadError, RemotePath}; +use tokio::spawn; +use url::Url; -use crate::checker::create_availability_check_data; use crate::installed_extensions::get_installed_extensions_sync; use crate::local_proxy; -use crate::logger::inlinify; use crate::pg_helpers::*; use crate::spec::*; +use crate::spec_apply::ApplySpecPhase::{ + CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSuperUser, + DropInvalidDatabases, DropRoles, HandleNeonExtension, HandleOtherExtensions, + RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase, +}; +use crate::spec_apply::PerDatabasePhase::{ + ChangeSchemaPerms, DeleteDBRoleReferences, HandleAnonExtension, +}; +use crate::spec_apply::{apply_operations, MutableApplyContext, DB}; use crate::sync_sk::{check_if_synced, ping_safekeeper}; use crate::{config, extension_server}; @@ -224,10 +233,7 @@ fn maybe_cgexec(cmd: &str) -> Command { } } -/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser -/// that we give to customers -#[instrument(skip_all)] -fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> { +pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String { let roles = spec .cluster .roles @@ -296,11 +302,8 @@ fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> $$;"#, roles_decl, database_decl, ); - info!("Neon superuser created: {}", inlinify(&query)); - client - .simple_query(&query) - .map_err(|e| anyhow::anyhow!(e).context(query))?; - Ok(()) + + query } impl ComputeNode { @@ -813,21 +816,14 @@ impl ComputeNode { Ok(()) } - /// Do initial configuration of the already started Postgres. - #[instrument(skip_all)] - pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> { - // If connection fails, - // it may be the old node with `zenith_admin` superuser. - // - // In this case we need to connect with old `zenith_admin` name - // and create new user. We cannot simply rename connected user, - // but we can create a new one and grant it all privileges. - let mut connstr = self.connstr.clone(); + async fn get_maintenance_client(url: &Url) -> Result { + let mut connstr = url.clone(); + connstr .query_pairs_mut() .append_pair("application_name", "apply_config"); - let mut client = match Client::connect(connstr.as_str(), NoTls) { + let (client, conn) = match tokio_postgres::connect(connstr.as_str(), NoTls).await { Err(e) => match e.code() { Some(&SqlState::INVALID_PASSWORD) | Some(&SqlState::INVALID_AUTHORIZATION_SPECIFICATION) => { @@ -845,8 +841,8 @@ impl ComputeNode { let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls) .context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?; - // Disable forwarding so that users don't get a cloud_admin role + // Disable forwarding so that users don't get a cloud_admin role let mut func = || { client.simple_query("SET neon.forward_ddl = false")?; client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?; @@ -858,49 +854,309 @@ impl ComputeNode { drop(client); // reconnect with connstring with expected name - Client::connect(connstr.as_str(), NoTls)? + tokio_postgres::connect(connstr.as_str(), NoTls).await? } _ => return Err(e.into()), }, - Ok(client) => client, + Ok((client, conn)) => (client, conn), }; - // Disable DDL forwarding because control plane already knows about these roles/databases. + spawn(async move { + if let Err(e) = conn.await { + error!("maintenance client connection error: {}", e); + } + }); + + // Disable DDL forwarding because control plane already knows about the roles/databases + // we're about to modify. client .simple_query("SET neon.forward_ddl = false") + .await .context("apply_config SET neon.forward_ddl = false")?; - // Proceed with post-startup configuration. Note, that order of operations is important. - let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec; - create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?; - cleanup_instance(&mut client).context("apply_config cleanup_instance")?; - handle_roles(spec, &mut client).context("apply_config handle_roles")?; - handle_databases(spec, &mut client).context("apply_config handle_databases")?; - handle_role_deletions(spec, connstr.as_str(), &mut client) - .context("apply_config handle_role_deletions")?; - handle_grants( - spec, - &mut client, - connstr.as_str(), - self.has_feature(ComputeFeature::AnonExtension), - ) - .context("apply_config handle_grants")?; - handle_extensions(spec, &mut client).context("apply_config handle_extensions")?; - handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?; - create_availability_check_data(&mut client) - .context("apply_config create_availability_check_data")?; + Ok(client) + } - // 'Close' connection - drop(client); + /// Apply the spec to the running PostgreSQL instance. + /// The caller can decide to run with multiple clients in parallel, or + /// single mode. Either way, the commands executed will be the same, and + /// only commands run in different databases are parallelized. + #[instrument(skip_all)] + pub fn apply_spec_sql( + &self, + spec: Arc, + url: Arc, + concurrency: usize, + ) -> Result<()> { + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build()?; - if let Some(ref local_proxy) = spec.local_proxy_config { + info!("Applying config with max {} concurrency", concurrency); + debug!("Config: {:?}", spec); + + rt.block_on(async { + // Proceed with post-startup configuration. Note, that order of operations is important. + let client = Self::get_maintenance_client(&url).await?; + let spec = spec.clone(); + + let databases = get_existing_dbs_async(&client).await?; + let roles = get_existing_roles_async(&client) + .await? + .into_iter() + .map(|role| (role.name.clone(), role)) + .collect::>(); + + let jwks_roles = Arc::new( + spec.as_ref() + .local_proxy_config + .iter() + .flat_map(|it| &it.jwks) + .flatten() + .flat_map(|setting| &setting.role_names) + .cloned() + .collect::>(), + ); + + let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext { + roles, + dbs: databases, + })); + + for phase in [ + CreateSuperUser, + DropInvalidDatabases, + RenameRoles, + CreateAndAlterRoles, + RenameAndDeleteDatabases, + CreateAndAlterDatabases, + ] { + debug!("Applying phase {:?}", &phase); + apply_operations( + spec.clone(), + ctx.clone(), + jwks_roles.clone(), + phase, + || async { Ok(&client) }, + ) + .await?; + } + + let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency)); + + let db_processes = spec + .cluster + .databases + .iter() + .map(|db| DB::new(db.clone())) + // include + .chain(once(DB::SystemDB)) + .map(|db| { + let spec = spec.clone(); + let ctx = ctx.clone(); + let jwks_roles = jwks_roles.clone(); + let mut url = url.as_ref().clone(); + let concurrency_token = concurrency_token.clone(); + let db = db.clone(); + + debug!("Applying per-database phases for Database {:?}", &db); + + match &db { + DB::SystemDB => {} + DB::UserDB(db) => { + url.set_path(db.name.as_str()); + } + } + + let url = Arc::new(url); + let fut = Self::apply_spec_sql_db( + spec.clone(), + url, + ctx.clone(), + jwks_roles.clone(), + concurrency_token.clone(), + db, + ); + + Ok(spawn(fut)) + }) + .collect::>>(); + + for process in db_processes.into_iter() { + let handle = process?; + handle.await??; + } + + for phase in vec![ + HandleOtherExtensions, + HandleNeonExtension, + CreateAvailabilityCheck, + DropRoles, + ] { + debug!("Applying phase {:?}", &phase); + apply_operations( + spec.clone(), + ctx.clone(), + jwks_roles.clone(), + phase, + || async { Ok(&client) }, + ) + .await?; + } + + Ok::<(), anyhow::Error>(()) + })?; + + Ok(()) + } + + /// Apply SQL migrations of the RunInEachDatabase phase. + /// + /// May opt to not connect to databases that don't have any scheduled + /// operations. The function is concurrency-controlled with the provided + /// semaphore. The caller has to make sure the semaphore isn't exhausted. + async fn apply_spec_sql_db( + spec: Arc, + url: Arc, + ctx: Arc>, + jwks_roles: Arc>, + concurrency_token: Arc, + db: DB, + ) -> Result<()> { + let _permit = concurrency_token.acquire().await?; + + let mut client_conn = None; + + for subphase in [ + DeleteDBRoleReferences, + ChangeSchemaPerms, + HandleAnonExtension, + ] { + apply_operations( + spec.clone(), + ctx.clone(), + jwks_roles.clone(), + RunInEachDatabase { + db: db.clone(), + subphase, + }, + // Only connect if apply_operation actually wants a connection. + // It's quite possible this database doesn't need any queries, + // so by not connecting we save time and effort connecting to + // that database. + || async { + if client_conn.is_none() { + let db_client = Self::get_maintenance_client(&url).await?; + client_conn.replace(db_client); + } + let client = client_conn.as_ref().unwrap(); + Ok(client) + }, + ) + .await?; + } + + drop(client_conn); + + Ok::<(), anyhow::Error>(()) + } + + /// Do initial configuration of the already started Postgres. + #[instrument(skip_all)] + pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> { + // If connection fails, + // it may be the old node with `zenith_admin` superuser. + // + // In this case we need to connect with old `zenith_admin` name + // and create new user. We cannot simply rename connected user, + // but we can create a new one and grant it all privileges. + let mut url = self.connstr.clone(); + url.query_pairs_mut() + .append_pair("application_name", "apply_config"); + + let url = Arc::new(url); + let spec = Arc::new( + compute_state + .pspec + .as_ref() + .expect("spec must be set") + .spec + .clone(), + ); + + // Choose how many concurrent connections to use for applying the spec changes. + // If the cluster is not currently Running we don't have to deal with user connections, + // and can thus use all `max_connections` connection slots. However, that's generally not + // very efficient, so we generally still limit it to a smaller number. + let max_concurrent_connections = if compute_state.status != ComputeStatus::Running { + // If the settings contain 'max_connections', use that as template + if let Some(config) = spec.cluster.settings.find("max_connections") { + config.parse::().ok() + } else { + // Otherwise, try to find the setting in the postgresql_conf string + spec.cluster + .postgresql_conf + .iter() + .flat_map(|conf| conf.split("\n")) + .filter_map(|line| { + if !line.contains("max_connections") { + return None; + } + + let (key, value) = line.split_once("=")?; + let key = key + .trim_start_matches(char::is_whitespace) + .trim_end_matches(char::is_whitespace); + + let value = value + .trim_start_matches(char::is_whitespace) + .trim_end_matches(char::is_whitespace); + + if key != "max_connections" { + return None; + } + + value.parse::().ok() + }) + .next() + } + // If max_connections is present, use at most 1/3rd of that. + // When max_connections is lower than 30, try to use at least 10 connections, but + // never more than max_connections. + .map(|limit| match limit { + 0..10 => limit, + 10..30 => 10, + 30.. => limit / 3, + }) + // If we didn't find max_connections, default to 10 concurrent connections. + .unwrap_or(10) + } else { + // state == Running + // Because the cluster is already in the Running state, we should assume users are + // already connected to the cluster, and high concurrency could negatively + // impact user connectivity. Therefore, we can limit concurrency to the number of + // reserved superuser connections, which users wouldn't be able to use anyway. + spec.cluster + .settings + .find("superuser_reserved_connections") + .iter() + .filter_map(|val| val.parse::().ok()) + .map(|val| if val > 1 { val - 1 } else { 1 }) + .last() + .unwrap_or(3) + }; + + // Merge-apply spec & changes to PostgreSQL state. + self.apply_spec_sql(spec.clone(), url.clone(), max_concurrent_connections)?; + + if let Some(ref local_proxy) = &spec.clone().local_proxy_config { info!("configuring local_proxy"); local_proxy::configure(local_proxy).context("apply_config local_proxy")?; } // Run migrations separately to not hold up cold starts thread::spawn(move || { - let mut connstr = connstr.clone(); + let mut connstr = url.as_ref().clone(); connstr .query_pairs_mut() .append_pair("application_name", "migrations"); @@ -908,7 +1164,8 @@ impl ComputeNode { let mut client = Client::connect(connstr.as_str(), NoTls)?; handle_migrations(&mut client).context("apply_config handle_migrations") }); - Ok(()) + + Ok::<(), anyhow::Error>(()) } // Wrapped this around `pg_ctl reload`, but right now we don't use @@ -971,32 +1228,16 @@ impl ComputeNode { config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || { self.pg_reload_conf()?; - let mut client = Client::connect(self.connstr.as_str(), NoTls)?; - - // Proceed with post-startup configuration. Note, that order of operations is important. - // Disable DDL forwarding because control plane already knows about these roles/databases. if spec.mode == ComputeMode::Primary { - client.simple_query("SET neon.forward_ddl = false")?; - cleanup_instance(&mut client)?; - handle_roles(&spec, &mut client)?; - handle_databases(&spec, &mut client)?; - handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?; - handle_grants( - &spec, - &mut client, - self.connstr.as_str(), - self.has_feature(ComputeFeature::AnonExtension), - )?; - handle_extensions(&spec, &mut client)?; - handle_extension_neon(&mut client)?; - // We can skip handle_migrations here because a new migration can only appear - // if we have a new version of the compute_ctl binary, which can only happen - // if compute got restarted, in which case we'll end up inside of apply_config - // instead of reconfigure. - } + let mut url = self.connstr.clone(); + url.query_pairs_mut() + .append_pair("application_name", "apply_config"); + let url = Arc::new(url); - // 'Close' connection - drop(client); + let spec = Arc::new(spec.clone()); + + self.apply_spec_sql(spec, url, 1)?; + } Ok(()) })?; diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs index d27ae58fa2..ee4cf2dfa5 100644 --- a/compute_tools/src/lib.rs +++ b/compute_tools/src/lib.rs @@ -23,5 +23,6 @@ pub mod monitor; pub mod params; pub mod pg_helpers; pub mod spec; +mod spec_apply; pub mod swap; pub mod sync_sk; diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index b2dc265864..4a1e5ee0e8 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -10,9 +10,9 @@ use std::thread::JoinHandle; use std::time::{Duration, Instant}; use anyhow::{bail, Result}; +use futures::StreamExt; use ini::Ini; use notify::{RecursiveMode, Watcher}; -use postgres::{Client, Transaction}; use tokio::io::AsyncBufReadExt; use tokio::time::timeout; use tokio_postgres::NoTls; @@ -197,27 +197,34 @@ impl Escaping for PgIdent { } /// Build a list of existing Postgres roles -pub fn get_existing_roles(xact: &mut Transaction<'_>) -> Result> { - let postgres_roles = xact - .query("SELECT rolname, rolpassword FROM pg_catalog.pg_authid", &[])? - .iter() +pub async fn get_existing_roles_async(client: &tokio_postgres::Client) -> Result> { + let postgres_roles = client + .query_raw::( + "SELECT rolname, rolpassword FROM pg_catalog.pg_authid", + &[], + ) + .await? + .filter_map(|row| async { row.ok() }) .map(|row| Role { name: row.get("rolname"), encrypted_password: row.get("rolpassword"), options: None, }) - .collect(); + .collect() + .await; Ok(postgres_roles) } /// Build a list of existing Postgres databases -pub fn get_existing_dbs(client: &mut Client) -> Result> { +pub async fn get_existing_dbs_async( + client: &tokio_postgres::Client, +) -> Result> { // `pg_database.datconnlimit = -2` means that the database is in the // invalid state. See: // https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9 - let postgres_dbs: Vec = client - .query( + let rowstream = client + .query_raw::( "SELECT datname AS name, datdba::regrole::text AS owner, @@ -226,8 +233,11 @@ pub fn get_existing_dbs(client: &mut Client) -> Result FROM pg_catalog.pg_database;", &[], - )? - .iter() + ) + .await?; + + let dbs_map = rowstream + .filter_map(|r| async { r.ok() }) .map(|row| Database { name: row.get("name"), owner: row.get("owner"), @@ -235,12 +245,9 @@ pub fn get_existing_dbs(client: &mut Client) -> Result invalid: row.get("invalid"), options: None, }) - .collect(); - - let dbs_map = postgres_dbs - .iter() .map(|db| (db.name.clone(), db.clone())) - .collect::>(); + .collect::>() + .await; Ok(dbs_map) } diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index 73f3d1006a..c7d2deb090 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -1,22 +1,17 @@ -use std::collections::HashSet; +use anyhow::{anyhow, bail, Result}; +use postgres::Client; +use reqwest::StatusCode; use std::fs::File; use std::path::Path; -use std::str::FromStr; - -use anyhow::{anyhow, bail, Context, Result}; -use postgres::config::Config; -use postgres::{Client, NoTls}; -use reqwest::StatusCode; -use tracing::{error, info, info_span, instrument, span_enabled, warn, Level}; +use tracing::{error, info, instrument, warn}; use crate::config; -use crate::logger::inlinify; use crate::migration::MigrationRunner; use crate::params::PG_HBA_ALL_MD5; use crate::pg_helpers::*; use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse}; -use compute_api::spec::{ComputeSpec, PgIdent, Role}; +use compute_api::spec::ComputeSpec; // Do control plane request and return response if any. In case of error it // returns a bool flag indicating whether it makes sense to retry the request @@ -151,625 +146,6 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> { Ok(()) } -/// Compute could be unexpectedly shut down, for example, during the -/// database dropping. This leaves the database in the invalid state, -/// which prevents new db creation with the same name. This function -/// will clean it up before proceeding with catalog updates. All -/// possible future cleanup operations may go here too. -#[instrument(skip_all)] -pub fn cleanup_instance(client: &mut Client) -> Result<()> { - let existing_dbs = get_existing_dbs(client)?; - - for (_, db) in existing_dbs { - if db.invalid { - // After recent commit in Postgres, interrupted DROP DATABASE - // leaves the database in the invalid state. According to the - // commit message, the only option for user is to drop it again. - // See: - // https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9 - // - // Postgres Neon extension is done the way, that db is de-registered - // in the control plane metadata only after it is dropped. So there is - // a chance that it still thinks that db should exist. This means - // that it will be re-created by `handle_databases()`. Yet, it's fine - // as user can just repeat drop (in vanilla Postgres they would need - // to do the same, btw). - let query = format!("DROP DATABASE IF EXISTS {}", db.name.pg_quote()); - info!("dropping invalid database {}", db.name); - client.execute(query.as_str(), &[])?; - } - } - - Ok(()) -} - -/// Given a cluster spec json and open transaction it handles roles creation, -/// deletion and update. -#[instrument(skip_all)] -pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> { - let mut xact = client.transaction()?; - let existing_roles: Vec = get_existing_roles(&mut xact)?; - - let mut jwks_roles = HashSet::new(); - if let Some(local_proxy) = &spec.local_proxy_config { - for jwks_setting in local_proxy.jwks.iter().flatten() { - for role_name in &jwks_setting.role_names { - jwks_roles.insert(role_name.clone()); - } - } - } - - // Print a list of existing Postgres roles (only in debug mode) - if span_enabled!(Level::INFO) { - let mut vec = Vec::new(); - for r in &existing_roles { - vec.push(format!( - "{}:{}", - r.name, - if r.encrypted_password.is_some() { - "[FILTERED]" - } else { - "(null)" - } - )); - } - - info!("postgres roles (total {}): {:?}", vec.len(), vec); - } - - // Process delta operations first - if let Some(ops) = &spec.delta_operations { - info!("processing role renames"); - for op in ops { - match op.action.as_ref() { - "delete_role" => { - // no-op now, roles will be deleted at the end of configuration - } - // Renaming role drops its password, since role name is - // used as a salt there. It is important that this role - // is recorded with a new `name` in the `roles` list. - // Follow up roles update will set the new password. - "rename_role" => { - let new_name = op.new_name.as_ref().unwrap(); - - // XXX: with a limited number of roles it is fine, but consider making it a HashMap - if existing_roles.iter().any(|r| r.name == op.name) { - let query: String = format!( - "ALTER ROLE {} RENAME TO {}", - op.name.pg_quote(), - new_name.pg_quote() - ); - - warn!("renaming role '{}' to '{}'", op.name, new_name); - xact.execute(query.as_str(), &[])?; - } - } - _ => {} - } - } - } - - // Refresh Postgres roles info to handle possible roles renaming - let existing_roles: Vec = get_existing_roles(&mut xact)?; - - info!( - "handling cluster spec roles (total {})", - spec.cluster.roles.len() - ); - for role in &spec.cluster.roles { - let name = &role.name; - // XXX: with a limited number of roles it is fine, but consider making it a HashMap - let pg_role = existing_roles.iter().find(|r| r.name == *name); - - enum RoleAction { - None, - Update, - Create, - } - let action = if let Some(r) = pg_role { - if (r.encrypted_password.is_none() && role.encrypted_password.is_some()) - || (r.encrypted_password.is_some() && role.encrypted_password.is_none()) - { - RoleAction::Update - } else if let Some(pg_pwd) = &r.encrypted_password { - // Check whether password changed or not (trim 'md5' prefix first if any) - // - // This is a backward compatibility hack, which comes from the times when we were using - // md5 for everyone and hashes were stored in the console db without md5 prefix. So when - // role comes from the control-plane (json spec) `Role.encrypted_password` doesn't have md5 prefix, - // but when role comes from Postgres (`get_existing_roles` / `existing_roles`) it has this prefix. - // Here is the only place so far where we compare hashes, so it seems to be the best candidate - // to place this compatibility layer. - let pg_pwd = if let Some(stripped) = pg_pwd.strip_prefix("md5") { - stripped - } else { - pg_pwd - }; - if pg_pwd != *role.encrypted_password.as_ref().unwrap() { - RoleAction::Update - } else { - RoleAction::None - } - } else { - RoleAction::None - } - } else { - RoleAction::Create - }; - - match action { - RoleAction::None => {} - RoleAction::Update => { - // This can be run on /every/ role! Not just ones created through the console. - // This means that if you add some funny ALTER here that adds a permission, - // this will get run even on user-created roles! This will result in different - // behavior before and after a spec gets reapplied. The below ALTER as it stands - // now only grants LOGIN and changes the password. Please do not allow this branch - // to do anything silly. - let mut query: String = format!("ALTER ROLE {} ", name.pg_quote()); - query.push_str(&role.to_pg_options()); - xact.execute(query.as_str(), &[])?; - } - RoleAction::Create => { - // This branch only runs when roles are created through the console, so it is - // safe to add more permissions here. BYPASSRLS and REPLICATION are inherited - // from neon_superuser. - let mut query: String = format!( - "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser", - name.pg_quote() - ); - if jwks_roles.contains(name.as_str()) { - query = format!("CREATE ROLE {}", name.pg_quote()); - } - info!("running role create query: '{}'", &query); - query.push_str(&role.to_pg_options()); - xact.execute(query.as_str(), &[])?; - } - } - - if span_enabled!(Level::INFO) { - let pwd = if role.encrypted_password.is_some() { - "[FILTERED]" - } else { - "(null)" - }; - let action_str = match action { - RoleAction::None => "", - RoleAction::Create => " -> create", - RoleAction::Update => " -> update", - }; - info!(" - {}:{}{}", name, pwd, action_str); - } - } - - xact.commit()?; - - Ok(()) -} - -/// Reassign all dependent objects and delete requested roles. -#[instrument(skip_all)] -pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> { - if let Some(ops) = &spec.delta_operations { - // First, reassign all dependent objects to db owners. - info!("reassigning dependent objects of to-be-deleted roles"); - - // Fetch existing roles. We could've exported and used `existing_roles` from - // `handle_roles()`, but we only make this list there before creating new roles. - // Which is probably fine as we never create to-be-deleted roles, but that'd - // just look a bit untidy. Anyway, the entire `pg_roles` should be in shared - // buffers already, so this shouldn't be a big deal. - let mut xact = client.transaction()?; - let existing_roles: Vec = get_existing_roles(&mut xact)?; - xact.commit()?; - - for op in ops { - // Check that role is still present in Postgres, as this could be a - // restart with the same spec after role deletion. - if op.action == "delete_role" && existing_roles.iter().any(|r| r.name == op.name) { - reassign_owned_objects(spec, connstr, &op.name)?; - } - } - - // Second, proceed with role deletions. - info!("processing role deletions"); - let mut xact = client.transaction()?; - for op in ops { - // We do not check either role exists or not, - // Postgres will take care of it for us - if op.action == "delete_role" { - let query: String = format!("DROP ROLE IF EXISTS {}", &op.name.pg_quote()); - - warn!("deleting role '{}'", &op.name); - xact.execute(query.as_str(), &[])?; - } - } - xact.commit()?; - } - - Ok(()) -} - -fn reassign_owned_objects_in_one_db( - conf: Config, - role_name: &PgIdent, - db_owner: &PgIdent, -) -> Result<()> { - let mut client = conf.connect(NoTls)?; - - // This will reassign all dependent objects to the db owner - let reassign_query = format!( - "REASSIGN OWNED BY {} TO {}", - role_name.pg_quote(), - db_owner.pg_quote() - ); - info!( - "reassigning objects owned by '{}' in db '{}' to '{}'", - role_name, - conf.get_dbname().unwrap_or(""), - db_owner - ); - client.simple_query(&reassign_query)?; - - // This now will only drop privileges of the role - let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote()); - client.simple_query(&drop_query)?; - Ok(()) -} - -// Reassign all owned objects in all databases to the owner of the database. -fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent) -> Result<()> { - for db in &spec.cluster.databases { - if db.owner != *role_name { - let mut conf = Config::from_str(connstr)?; - conf.dbname(&db.name); - reassign_owned_objects_in_one_db(conf, role_name, &db.owner)?; - } - } - - // Also handle case when there are no databases in the spec. - // In this case we need to reassign objects in the default database. - let conf = Config::from_str(connstr)?; - let db_owner = PgIdent::from_str("cloud_admin")?; - reassign_owned_objects_in_one_db(conf, role_name, &db_owner)?; - - Ok(()) -} - -/// It follows mostly the same logic as `handle_roles()` excepting that we -/// does not use an explicit transactions block, since major database operations -/// like `CREATE DATABASE` and `DROP DATABASE` do not support it. Statement-level -/// atomicity should be enough here due to the order of operations and various checks, -/// which together provide us idempotency. -#[instrument(skip_all)] -pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> { - let existing_dbs = get_existing_dbs(client)?; - - // Print a list of existing Postgres databases (only in debug mode) - if span_enabled!(Level::INFO) { - let mut vec = Vec::new(); - for (dbname, db) in &existing_dbs { - vec.push(format!("{}:{}", dbname, db.owner)); - } - info!("postgres databases (total {}): {:?}", vec.len(), vec); - } - - // Process delta operations first - if let Some(ops) = &spec.delta_operations { - info!("processing delta operations on databases"); - for op in ops { - match op.action.as_ref() { - // We do not check either DB exists or not, - // Postgres will take care of it for us - "delete_db" => { - // In Postgres we can't drop a database if it is a template. - // So we need to unset the template flag first, but it could - // be a retry, so we could've already dropped the database. - // Check that database exists first to make it idempotent. - let unset_template_query: String = format!( - " - DO $$ - BEGIN - IF EXISTS( - SELECT 1 - FROM pg_catalog.pg_database - WHERE datname = {} - ) - THEN - ALTER DATABASE {} is_template false; - END IF; - END - $$;", - escape_literal(&op.name), - &op.name.pg_quote() - ); - // Use FORCE to drop database even if there are active connections. - // We run this from `cloud_admin`, so it should have enough privileges. - // NB: there could be other db states, which prevent us from dropping - // the database. For example, if db is used by any active subscription - // or replication slot. - // TODO: deal with it once we allow logical replication. Proper fix should - // involve returning an error code to the control plane, so it could - // figure out that this is a non-retryable error, return it to the user - // and fail operation permanently. - let drop_db_query: String = format!( - "DROP DATABASE IF EXISTS {} WITH (FORCE)", - &op.name.pg_quote() - ); - - warn!("deleting database '{}'", &op.name); - client.execute(unset_template_query.as_str(), &[])?; - client.execute(drop_db_query.as_str(), &[])?; - } - "rename_db" => { - let new_name = op.new_name.as_ref().unwrap(); - - if existing_dbs.contains_key(&op.name) { - let query: String = format!( - "ALTER DATABASE {} RENAME TO {}", - op.name.pg_quote(), - new_name.pg_quote() - ); - - warn!("renaming database '{}' to '{}'", op.name, new_name); - client.execute(query.as_str(), &[])?; - } - } - _ => {} - } - } - } - - // Refresh Postgres databases info to handle possible renames - let existing_dbs = get_existing_dbs(client)?; - - info!( - "handling cluster spec databases (total {})", - spec.cluster.databases.len() - ); - for db in &spec.cluster.databases { - let name = &db.name; - let pg_db = existing_dbs.get(name); - - enum DatabaseAction { - None, - Update, - Create, - } - let action = if let Some(r) = pg_db { - // XXX: db owner name is returned as quoted string from Postgres, - // when quoting is needed. - let new_owner = if r.owner.starts_with('"') { - db.owner.pg_quote() - } else { - db.owner.clone() - }; - - if new_owner != r.owner { - // Update the owner - DatabaseAction::Update - } else { - DatabaseAction::None - } - } else { - DatabaseAction::Create - }; - - match action { - DatabaseAction::None => {} - DatabaseAction::Update => { - let query: String = format!( - "ALTER DATABASE {} OWNER TO {}", - name.pg_quote(), - db.owner.pg_quote() - ); - let _guard = info_span!("executing", query).entered(); - client.execute(query.as_str(), &[])?; - } - DatabaseAction::Create => { - let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote()); - query.push_str(&db.to_pg_options()); - let _guard = info_span!("executing", query).entered(); - client.execute(query.as_str(), &[])?; - let grant_query: String = format!( - "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser", - name.pg_quote() - ); - client.execute(grant_query.as_str(), &[])?; - } - }; - - if span_enabled!(Level::INFO) { - let action_str = match action { - DatabaseAction::None => "", - DatabaseAction::Create => " -> create", - DatabaseAction::Update => " -> update", - }; - info!(" - {}:{}{}", db.name, db.owner, action_str); - } - } - - Ok(()) -} - -/// Grant CREATE ON DATABASE to the database owner and do some other alters and grants -/// to allow users creating trusted extensions and re-creating `public` schema, for example. -#[instrument(skip_all)] -pub fn handle_grants( - spec: &ComputeSpec, - client: &mut Client, - connstr: &str, - enable_anon_extension: bool, -) -> Result<()> { - info!("modifying database permissions"); - let existing_dbs = get_existing_dbs(client)?; - - // Do some per-database access adjustments. We'd better do this at db creation time, - // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants - // atomically. - for db in &spec.cluster.databases { - match existing_dbs.get(&db.name) { - Some(pg_db) => { - if pg_db.restrict_conn || pg_db.invalid { - info!( - "skipping grants for db {} (invalid: {}, connections not allowed: {})", - db.name, pg_db.invalid, pg_db.restrict_conn - ); - continue; - } - } - None => { - bail!( - "database {} doesn't exist in Postgres after handle_databases()", - db.name - ); - } - } - - let mut conf = Config::from_str(connstr)?; - conf.dbname(&db.name); - - let mut db_client = conf.connect(NoTls)?; - - // This will only change ownership on the schema itself, not the objects - // inside it. Without it owner of the `public` schema will be `cloud_admin` - // and database owner cannot do anything with it. SQL procedure ensures - // that it won't error out if schema `public` doesn't exist. - let alter_query = format!( - "DO $$\n\ - DECLARE\n\ - schema_owner TEXT;\n\ - BEGIN\n\ - IF EXISTS(\n\ - SELECT nspname\n\ - FROM pg_catalog.pg_namespace\n\ - WHERE nspname = 'public'\n\ - )\n\ - THEN\n\ - SELECT nspowner::regrole::text\n\ - FROM pg_catalog.pg_namespace\n\ - WHERE nspname = 'public'\n\ - INTO schema_owner;\n\ - \n\ - IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin'\n\ - THEN\n\ - ALTER SCHEMA public OWNER TO {};\n\ - END IF;\n\ - END IF;\n\ - END\n\ - $$;", - db.owner.pg_quote() - ); - db_client.simple_query(&alter_query)?; - - // Explicitly grant CREATE ON SCHEMA PUBLIC to the web_access user. - // This is needed because since postgres 15 this privilege is removed by default. - // TODO: web_access isn't created for almost 1 year. It could be that we have - // active users of 1 year old projects, but hopefully not, so check it and - // remove this code if possible. The worst thing that could happen is that - // user won't be able to use public schema in NEW databases created in the - // very OLD project. - // - // Also, alter default permissions so that relations created by extensions can be - // used by neon_superuser without permission issues. - let grant_query = "DO $$\n\ - BEGIN\n\ - IF EXISTS(\n\ - SELECT nspname\n\ - FROM pg_catalog.pg_namespace\n\ - WHERE nspname = 'public'\n\ - ) AND\n\ - current_setting('server_version_num')::int/10000 >= 15\n\ - THEN\n\ - IF EXISTS(\n\ - SELECT rolname\n\ - FROM pg_catalog.pg_roles\n\ - WHERE rolname = 'web_access'\n\ - )\n\ - THEN\n\ - GRANT CREATE ON SCHEMA public TO web_access;\n\ - END IF;\n\ - END IF;\n\ - IF EXISTS(\n\ - SELECT nspname\n\ - FROM pg_catalog.pg_namespace\n\ - WHERE nspname = 'public'\n\ - )\n\ - THEN\n\ - ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;\n\ - ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;\n\ - END IF;\n\ - END\n\ - $$;" - .to_string(); - - info!( - "grant query for db {} : {}", - &db.name, - inlinify(&grant_query) - ); - db_client.simple_query(&grant_query)?; - - // it is important to run this after all grants - if enable_anon_extension { - handle_extension_anon(spec, &db.owner, &mut db_client, false) - .context("handle_grants handle_extension_anon")?; - } - } - - Ok(()) -} - -/// Create required system extensions -#[instrument(skip_all)] -pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> { - if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") { - if libs.contains("pg_stat_statements") { - // Create extension only if this compute really needs it - let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements"; - info!("creating system extensions with query: {}", query); - client.simple_query(query)?; - } - } - - Ok(()) -} - -/// Run CREATE and ALTER EXTENSION neon UPDATE for postgres database -#[instrument(skip_all)] -pub fn handle_extension_neon(client: &mut Client) -> Result<()> { - info!("handle extension neon"); - - let mut query = "CREATE SCHEMA IF NOT EXISTS neon"; - client.simple_query(query)?; - - query = "CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"; - info!("create neon extension with query: {}", query); - client.simple_query(query)?; - - query = "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'"; - client.simple_query(query)?; - - query = "ALTER EXTENSION neon SET SCHEMA neon"; - info!("alter neon extension schema with query: {}", query); - client.simple_query(query)?; - - // this will be a no-op if extension is already up to date, - // which may happen in two cases: - // - extension was just installed - // - extension was already installed and is up to date - let query = "ALTER EXTENSION neon UPDATE"; - info!("update neon extension version with query: {}", query); - if let Err(e) = client.simple_query(query) { - error!( - "failed to upgrade neon extension during `handle_extension_neon`: {}", - e - ); - } - - Ok(()) -} - #[instrument(skip_all)] pub fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> { info!("handle neon extension upgrade"); diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs new file mode 100644 index 0000000000..7308d5d36e --- /dev/null +++ b/compute_tools/src/spec_apply.rs @@ -0,0 +1,680 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::{Debug, Formatter}; +use std::future::Future; +use std::iter::empty; +use std::iter::once; +use std::sync::Arc; + +use crate::compute::construct_superuser_query; +use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt}; +use anyhow::{bail, Result}; +use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role}; +use futures::future::join_all; +use tokio::sync::RwLock; +use tokio_postgres::Client; +use tracing::{debug, info_span, Instrument}; + +#[derive(Clone)] +pub enum DB { + SystemDB, + UserDB(Database), +} + +impl DB { + pub fn new(db: Database) -> DB { + Self::UserDB(db) + } + + pub fn is_owned_by(&self, role: &PgIdent) -> bool { + match self { + DB::SystemDB => false, + DB::UserDB(db) => &db.owner == role, + } + } +} + +impl Debug for DB { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + DB::SystemDB => f.debug_tuple("SystemDB").finish(), + DB::UserDB(db) => f.debug_tuple("UserDB").field(&db.name).finish(), + } + } +} + +#[derive(Copy, Clone, Debug)] +pub enum PerDatabasePhase { + DeleteDBRoleReferences, + ChangeSchemaPerms, + HandleAnonExtension, +} + +#[derive(Clone, Debug)] +pub enum ApplySpecPhase { + CreateSuperUser, + DropInvalidDatabases, + RenameRoles, + CreateAndAlterRoles, + RenameAndDeleteDatabases, + CreateAndAlterDatabases, + RunInEachDatabase { db: DB, subphase: PerDatabasePhase }, + HandleOtherExtensions, + HandleNeonExtension, + CreateAvailabilityCheck, + DropRoles, +} + +pub struct Operation { + pub query: String, + pub comment: Option, +} + +pub struct MutableApplyContext { + pub roles: HashMap, + pub dbs: HashMap, +} + +/// Appply the operations that belong to the given spec apply phase. +/// +/// Commands within a single phase are executed in order of Iterator yield. +/// Commands of ApplySpecPhase::RunInEachDatabase will execute in the database +/// indicated by its `db` field, and can share a single client for all changes +/// to that database. +/// +/// Notes: +/// - Commands are pipelined, and thus may cause incomplete apply if one +/// command of many fails. +/// - Failing commands will fail the phase's apply step once the return value +/// is processed. +/// - No timeouts have (yet) been implemented. +/// - The caller is responsible for limiting and/or applying concurrency. +pub async fn apply_operations<'a, Fut, F>( + spec: Arc, + ctx: Arc>, + jwks_roles: Arc>, + apply_spec_phase: ApplySpecPhase, + client: F, +) -> Result<()> +where + F: FnOnce() -> Fut, + Fut: Future>, +{ + debug!("Starting phase {:?}", &apply_spec_phase); + let span = info_span!("db_apply_changes", phase=?apply_spec_phase); + let span2 = span.clone(); + async move { + debug!("Processing phase {:?}", &apply_spec_phase); + let ctx = ctx; + + let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase) + .await? + .peekable(); + + // Return (and by doing so, skip requesting the PostgreSQL client) if + // we don't have any operations scheduled. + if ops.peek().is_none() { + return Ok(()); + } + + let client = client().await?; + + debug!("Applying phase {:?}", &apply_spec_phase); + + let active_queries = ops + .map(|op| { + let Operation { comment, query } = op; + let inspan = match comment { + None => span.clone(), + Some(comment) => info_span!("phase {}: {}", comment), + }; + + async { + let query = query; + let res = client.simple_query(&query).await; + debug!( + "{} {}", + if res.is_ok() { + "successfully executed" + } else { + "failed to execute" + }, + query + ); + res + } + .instrument(inspan) + }) + .collect::>(); + + drop(ctx); + + for it in join_all(active_queries).await { + drop(it?); + } + + debug!("Completed phase {:?}", &apply_spec_phase); + + Ok(()) + } + .instrument(span2) + .await +} + +/// Create a stream of operations to be executed for that phase of applying +/// changes. +/// +/// In the future we may generate a single stream of changes and then +/// sort/merge/batch execution, but for now this is a nice way to improve +/// batching behaviour of the commands. +async fn get_operations<'a>( + spec: &'a ComputeSpec, + ctx: &'a RwLock, + jwks_roles: &'a HashSet, + apply_spec_phase: &'a ApplySpecPhase, +) -> Result + 'a + Send>> { + match apply_spec_phase { + ApplySpecPhase::CreateSuperUser => { + let query = construct_superuser_query(spec); + + Ok(Box::new(once(Operation { + query, + comment: None, + }))) + } + ApplySpecPhase::DropInvalidDatabases => { + let mut ctx = ctx.write().await; + let databases = &mut ctx.dbs; + + let keys: Vec<_> = databases + .iter() + .filter(|(_, db)| db.invalid) + .map(|(dbname, _)| dbname.clone()) + .collect(); + + // After recent commit in Postgres, interrupted DROP DATABASE + // leaves the database in the invalid state. According to the + // commit message, the only option for user is to drop it again. + // See: + // https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9 + // + // Postgres Neon extension is done the way, that db is de-registered + // in the control plane metadata only after it is dropped. So there is + // a chance that it still thinks that the db should exist. This means + // that it will be re-created by the `CreateDatabases` phase. This + // is fine, as user can just drop the table again (in vanilla + // Postgres they would need to do the same). + let operations = keys + .into_iter() + .filter_map(move |dbname| ctx.dbs.remove(&dbname)) + .map(|db| Operation { + query: format!("DROP DATABASE IF EXISTS {}", db.name.pg_quote()), + comment: Some(format!("Dropping invalid database {}", db.name)), + }); + + Ok(Box::new(operations)) + } + ApplySpecPhase::RenameRoles => { + let mut ctx = ctx.write().await; + + let operations = spec + .delta_operations + .iter() + .flatten() + .filter(|op| op.action == "rename_role") + .filter_map(move |op| { + let roles = &mut ctx.roles; + + if roles.contains_key(op.name.as_str()) { + None + } else { + let new_name = op.new_name.as_ref().unwrap(); + let mut role = roles.remove(op.name.as_str()).unwrap(); + + role.name = new_name.clone(); + role.encrypted_password = None; + roles.insert(role.name.clone(), role); + + Some(Operation { + query: format!( + "ALTER ROLE {} RENAME TO {}", + op.name.pg_quote(), + new_name.pg_quote() + ), + comment: Some(format!("renaming role '{}' to '{}'", op.name, new_name)), + }) + } + }); + + Ok(Box::new(operations)) + } + ApplySpecPhase::CreateAndAlterRoles => { + let mut ctx = ctx.write().await; + + let operations = spec.cluster.roles + .iter() + .filter_map(move |role| { + let roles = &mut ctx.roles; + let db_role = roles.get(&role.name); + + match db_role { + Some(db_role) => { + if db_role.encrypted_password != role.encrypted_password { + // This can be run on /every/ role! Not just ones created through the console. + // This means that if you add some funny ALTER here that adds a permission, + // this will get run even on user-created roles! This will result in different + // behavior before and after a spec gets reapplied. The below ALTER as it stands + // now only grants LOGIN and changes the password. Please do not allow this branch + // to do anything silly. + Some(Operation { + query: format!( + "ALTER ROLE {} {}", + role.name.pg_quote(), + role.to_pg_options(), + ), + comment: None, + }) + } else { + None + } + } + None => { + let query = if !jwks_roles.contains(role.name.as_str()) { + format!( + "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}", + role.name.pg_quote(), + role.to_pg_options(), + ) + } else { + format!( + "CREATE ROLE {} {}", + role.name.pg_quote(), + role.to_pg_options(), + ) + }; + Some(Operation { + query, + comment: Some(format!("creating role {}", role.name)), + }) + } + } + }); + + Ok(Box::new(operations)) + } + ApplySpecPhase::RenameAndDeleteDatabases => { + let mut ctx = ctx.write().await; + + let operations = spec + .delta_operations + .iter() + .flatten() + .filter_map(move |op| { + let databases = &mut ctx.dbs; + match op.action.as_str() { + // We do not check whether the DB exists or not, + // Postgres will take care of it for us + "delete_db" => { + // In Postgres we can't drop a database if it is a template. + // So we need to unset the template flag first, but it could + // be a retry, so we could've already dropped the database. + // Check that database exists first to make it idempotent. + let unset_template_query: String = format!( + include_str!("sql/unset_template_for_drop_dbs.sql"), + datname_str = escape_literal(&op.name), + datname = &op.name.pg_quote() + ); + + // Use FORCE to drop database even if there are active connections. + // We run this from `cloud_admin`, so it should have enough privileges. + // NB: there could be other db states, which prevent us from dropping + // the database. For example, if db is used by any active subscription + // or replication slot. + // TODO: deal with it once we allow logical replication. Proper fix should + // involve returning an error code to the control plane, so it could + // figure out that this is a non-retryable error, return it to the user + // and fail operation permanently. + let drop_db_query: String = format!( + "DROP DATABASE IF EXISTS {} WITH (FORCE)", + &op.name.pg_quote() + ); + + databases.remove(&op.name); + + Some(vec![ + Operation { + query: unset_template_query, + comment: Some(format!( + "optionally clearing template flags for DB {}", + op.name, + )), + }, + Operation { + query: drop_db_query, + comment: Some(format!("deleting database {}", op.name,)), + }, + ]) + } + "rename_db" => { + if let Some(mut db) = databases.remove(&op.name) { + // update state of known databases + let new_name = op.new_name.as_ref().unwrap(); + db.name = new_name.clone(); + databases.insert(db.name.clone(), db); + + Some(vec![Operation { + query: format!( + "ALTER DATABASE {} RENAME TO {}", + op.name.pg_quote(), + new_name.pg_quote(), + ), + comment: Some(format!( + "renaming database '{}' to '{}'", + op.name, new_name + )), + }]) + } else { + None + } + } + _ => None, + } + }) + .flatten(); + + Ok(Box::new(operations)) + } + ApplySpecPhase::CreateAndAlterDatabases => { + let mut ctx = ctx.write().await; + + let operations = spec + .cluster + .databases + .iter() + .filter_map(move |db| { + let databases = &mut ctx.dbs; + if let Some(edb) = databases.get_mut(&db.name) { + let change_owner = if edb.owner.starts_with('"') { + db.owner.pg_quote() != edb.owner + } else { + db.owner != edb.owner + }; + + edb.owner = db.owner.clone(); + + if change_owner { + Some(vec![Operation { + query: format!( + "ALTER DATABASE {} OWNER TO {}", + db.name.pg_quote(), + db.owner.pg_quote() + ), + comment: Some(format!( + "changing database owner of database {} to {}", + db.name, db.owner + )), + }]) + } else { + None + } + } else { + databases.insert(db.name.clone(), db.clone()); + + Some(vec![ + Operation { + query: format!( + "CREATE DATABASE {} {}", + db.name.pg_quote(), + db.to_pg_options(), + ), + comment: None, + }, + Operation { + query: format!( + "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser", + db.name.pg_quote() + ), + comment: None, + }, + ]) + } + }) + .flatten(); + + Ok(Box::new(operations)) + } + ApplySpecPhase::RunInEachDatabase { db, subphase } => { + match subphase { + PerDatabasePhase::DeleteDBRoleReferences => { + let ctx = ctx.read().await; + + let operations = + spec.delta_operations + .iter() + .flatten() + .filter(|op| op.action == "delete_role") + .filter_map(move |op| { + if db.is_owned_by(&op.name) { + return None; + } + if !ctx.roles.contains_key(&op.name) { + return None; + } + let quoted = op.name.pg_quote(); + let new_owner = match &db { + DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(), + DB::UserDB(db) => db.owner.pg_quote(), + }; + + Some(vec![ + // This will reassign all dependent objects to the db owner + Operation { + query: format!( + "REASSIGN OWNED BY {} TO {}", + quoted, new_owner, + ), + comment: None, + }, + // This now will only drop privileges of the role + Operation { + query: format!("DROP OWNED BY {}", quoted), + comment: None, + }, + ]) + }) + .flatten(); + + Ok(Box::new(operations)) + } + PerDatabasePhase::ChangeSchemaPerms => { + let ctx = ctx.read().await; + let databases = &ctx.dbs; + + let db = match &db { + // ignore schema permissions on the system database + DB::SystemDB => return Ok(Box::new(empty())), + DB::UserDB(db) => db, + }; + + if databases.get(&db.name).is_none() { + bail!("database {} doesn't exist in PostgreSQL", db.name); + } + + let edb = databases.get(&db.name).unwrap(); + + if edb.restrict_conn || edb.invalid { + return Ok(Box::new(empty())); + } + + let operations = vec![ + Operation { + query: format!( + include_str!("sql/set_public_schema_owner.sql"), + db_owner = db.owner.pg_quote() + ), + comment: None, + }, + Operation { + query: String::from(include_str!("sql/default_grants.sql")), + comment: None, + }, + ] + .into_iter(); + + Ok(Box::new(operations)) + } + PerDatabasePhase::HandleAnonExtension => { + // Only install Anon into user databases + let db = match &db { + DB::SystemDB => return Ok(Box::new(empty())), + DB::UserDB(db) => db, + }; + // Never install Anon when it's not enabled as feature + if !spec.features.contains(&ComputeFeature::AnonExtension) { + return Ok(Box::new(empty())); + } + + // Only install Anon when it's added in preload libraries + let opt_libs = spec.cluster.settings.find("shared_preload_libraries"); + + let libs = match opt_libs { + Some(libs) => libs, + None => return Ok(Box::new(empty())), + }; + + if !libs.contains("anon") { + return Ok(Box::new(empty())); + } + + let db_owner = db.owner.pg_quote(); + + let operations = vec![ + // Create anon extension if this compute needs it + // Users cannot create it themselves, because superuser is required. + Operation { + query: String::from("CREATE EXTENSION IF NOT EXISTS anon CASCADE"), + comment: Some(String::from("creating anon extension")), + }, + // Initialize anon extension + // This also requires superuser privileges, so users cannot do it themselves. + Operation { + query: String::from("SELECT anon.init()"), + comment: Some(String::from("initializing anon extension data")), + }, + Operation { + query: format!("GRANT ALL ON SCHEMA anon TO {}", db_owner), + comment: Some(String::from( + "granting anon extension schema permissions", + )), + }, + Operation { + query: format!( + "GRANT ALL ON ALL FUNCTIONS IN SCHEMA anon TO {}", + db_owner + ), + comment: Some(String::from( + "granting anon extension schema functions permissions", + )), + }, + // We need this, because some functions are defined as SECURITY DEFINER. + // In Postgres SECURITY DEFINER functions are executed with the privileges + // of the owner. + // In anon extension this it is needed to access some GUCs, which are only accessible to + // superuser. But we've patched postgres to allow db_owner to access them as well. + // So we need to change owner of these functions to db_owner. + Operation { + query: format!( + include_str!("sql/anon_ext_fn_reassign.sql"), + db_owner = db_owner, + ), + comment: Some(String::from( + "change anon extension functions owner to database_owner", + )), + }, + Operation { + query: format!( + "GRANT ALL ON ALL TABLES IN SCHEMA anon TO {}", + db_owner, + ), + comment: Some(String::from( + "granting anon extension tables permissions", + )), + }, + Operation { + query: format!( + "GRANT ALL ON ALL SEQUENCES IN SCHEMA anon TO {}", + db_owner, + ), + comment: Some(String::from( + "granting anon extension sequences permissions", + )), + }, + ] + .into_iter(); + + Ok(Box::new(operations)) + } + } + } + // Interestingly, we only install p_s_s in the main database, even when + // it's preloaded. + ApplySpecPhase::HandleOtherExtensions => { + if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") { + if libs.contains("pg_stat_statements") { + return Ok(Box::new(once(Operation { + query: String::from("CREATE EXTENSION IF NOT EXISTS pg_stat_statements"), + comment: Some(String::from("create system extensions")), + }))); + } + } + Ok(Box::new(empty())) + } + ApplySpecPhase::HandleNeonExtension => { + let operations = vec![ + Operation { + query: String::from("CREATE SCHEMA IF NOT EXISTS neon"), + comment: Some(String::from("init: add schema for extension")), + }, + Operation { + query: String::from("CREATE EXTENSION IF NOT EXISTS neon WITH SCHEMA neon"), + comment: Some(String::from( + "init: install the extension if not already installed", + )), + }, + Operation { + query: String::from( + "UPDATE pg_extension SET extrelocatable = true WHERE extname = 'neon'", + ), + comment: Some(String::from("compat/fix: make neon relocatable")), + }, + Operation { + query: String::from("ALTER EXTENSION neon SET SCHEMA neon"), + comment: Some(String::from("compat/fix: alter neon extension schema")), + }, + Operation { + query: String::from("ALTER EXTENSION neon UPDATE"), + comment: Some(String::from("compat/update: update neon extension version")), + }, + ] + .into_iter(); + + Ok(Box::new(operations)) + } + ApplySpecPhase::CreateAvailabilityCheck => Ok(Box::new(once(Operation { + query: String::from(include_str!("sql/add_availabilitycheck_tables.sql")), + comment: None, + }))), + ApplySpecPhase::DropRoles => { + let operations = spec + .delta_operations + .iter() + .flatten() + .filter(|op| op.action == "delete_role") + .map(|op| Operation { + query: format!("DROP ROLE IF EXISTS {}", op.name.pg_quote()), + comment: None, + }); + + Ok(Box::new(operations)) + } + } +} diff --git a/compute_tools/src/sql/add_availabilitycheck_tables.sql b/compute_tools/src/sql/add_availabilitycheck_tables.sql new file mode 100644 index 0000000000..7c60690c78 --- /dev/null +++ b/compute_tools/src/sql/add_availabilitycheck_tables.sql @@ -0,0 +1,18 @@ +DO $$ +BEGIN + IF NOT EXISTS( + SELECT 1 + FROM pg_catalog.pg_tables + WHERE tablename = 'health_check' + ) + THEN + CREATE TABLE health_check ( + id serial primary key, + updated_at timestamptz default now() + ); + INSERT INTO health_check VALUES (1, now()) + ON CONFLICT (id) DO UPDATE + SET updated_at = now(); + END IF; +END +$$ \ No newline at end of file diff --git a/compute_tools/src/sql/anon_ext_fn_reassign.sql b/compute_tools/src/sql/anon_ext_fn_reassign.sql new file mode 100644 index 0000000000..3d7b15c590 --- /dev/null +++ b/compute_tools/src/sql/anon_ext_fn_reassign.sql @@ -0,0 +1,12 @@ +DO $$ +DECLARE + query varchar; +BEGIN + FOR query IN SELECT 'ALTER FUNCTION '||nsp.nspname||'.'||p.proname||'('||pg_get_function_identity_arguments(p.oid)||') OWNER TO {db_owner};' + FROM pg_proc p + JOIN pg_namespace nsp ON p.pronamespace = nsp.oid + WHERE nsp.nspname = 'anon' LOOP + EXECUTE query; + END LOOP; +END +$$; diff --git a/compute_tools/src/sql/default_grants.sql b/compute_tools/src/sql/default_grants.sql new file mode 100644 index 0000000000..58ebb0690b --- /dev/null +++ b/compute_tools/src/sql/default_grants.sql @@ -0,0 +1,30 @@ +DO +$$ + BEGIN + IF EXISTS( + SELECT nspname + FROM pg_catalog.pg_namespace + WHERE nspname = 'public' + ) AND + current_setting('server_version_num')::int / 10000 >= 15 + THEN + IF EXISTS( + SELECT rolname + FROM pg_catalog.pg_roles + WHERE rolname = 'web_access' + ) + THEN + GRANT CREATE ON SCHEMA public TO web_access; + END IF; + END IF; + IF EXISTS( + SELECT nspname + FROM pg_catalog.pg_namespace + WHERE nspname = 'public' + ) + THEN + ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION; + ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION; + END IF; + END +$$; \ No newline at end of file diff --git a/compute_tools/src/sql/set_public_schema_owner.sql b/compute_tools/src/sql/set_public_schema_owner.sql new file mode 100644 index 0000000000..fd061a713e --- /dev/null +++ b/compute_tools/src/sql/set_public_schema_owner.sql @@ -0,0 +1,23 @@ +DO +$$ + DECLARE + schema_owner TEXT; + BEGIN + IF EXISTS( + SELECT nspname + FROM pg_catalog.pg_namespace + WHERE nspname = 'public' + ) + THEN + SELECT nspowner::regrole::text + FROM pg_catalog.pg_namespace + WHERE nspname = 'public' + INTO schema_owner; + + IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin' + THEN + ALTER SCHEMA public OWNER TO {db_owner}; + END IF; + END IF; + END +$$; \ No newline at end of file diff --git a/compute_tools/src/sql/unset_template_for_drop_dbs.sql b/compute_tools/src/sql/unset_template_for_drop_dbs.sql new file mode 100644 index 0000000000..6c4343a589 --- /dev/null +++ b/compute_tools/src/sql/unset_template_for_drop_dbs.sql @@ -0,0 +1,12 @@ +DO $$ + BEGIN + IF EXISTS( + SELECT 1 + FROM pg_catalog.pg_database + WHERE datname = {datname_str} + ) + THEN + ALTER DATABASE {datname} is_template false; + END IF; + END +$$; \ No newline at end of file diff --git a/deny.toml b/deny.toml index 327ac58db7..8bf643f4ba 100644 --- a/deny.toml +++ b/deny.toml @@ -37,6 +37,7 @@ allow = [ "BSD-2-Clause", "BSD-3-Clause", "CC0-1.0", + "CDDL-1.0", "ISC", "MIT", "MPL-2.0", diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index f48c1febb5..ee20613d6d 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -109,6 +109,8 @@ pub struct ConfigToml { pub virtual_file_io_mode: Option, #[serde(skip_serializing_if = "Option::is_none")] pub no_sync: Option, + #[serde(with = "humantime_serde")] + pub server_side_batch_timeout: Option, } #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] @@ -317,6 +319,8 @@ pub mod defaults { pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0; pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512; + + pub const DEFAULT_SERVER_SIDE_BATCH_TIMEOUT: Option<&str> = None; } impl Default for ConfigToml { @@ -397,6 +401,8 @@ impl Default for ConfigToml { ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB), l0_flush: None, virtual_file_io_mode: None, + server_side_batch_timeout: DEFAULT_SERVER_SIDE_BATCH_TIMEOUT + .map(|duration| humantime::parse_duration(duration).unwrap()), tenant_config: TenantConfigToml::default(), no_sync: None, } diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs index b3fcaae62f..4505101ea6 100644 --- a/libs/pageserver_api/src/key.rs +++ b/libs/pageserver_api/src/key.rs @@ -24,7 +24,7 @@ pub struct Key { /// When working with large numbers of Keys in-memory, it is more efficient to handle them as i128 than as /// a struct of fields. -#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd)] +#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)] pub struct CompactKey(i128); /// The storage key size. diff --git a/libs/pageserver_api/src/reltag.rs b/libs/pageserver_api/src/reltag.rs index 010a9c2932..09d1fae221 100644 --- a/libs/pageserver_api/src/reltag.rs +++ b/libs/pageserver_api/src/reltag.rs @@ -24,7 +24,7 @@ use postgres_ffi::Oid; // FIXME: should move 'forknum' as last field to keep this consistent with Postgres. // Then we could replace the custom Ord and PartialOrd implementations below with // deriving them. This will require changes in walredoproc.c. -#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize)] +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)] pub struct RelTag { pub forknum: u8, pub spcnode: Oid, diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs index 7419798a60..8c024375c1 100644 --- a/libs/postgres_backend/src/lib.rs +++ b/libs/postgres_backend/src/lib.rs @@ -716,6 +716,9 @@ impl PostgresBackend { Ok(()) } + // Proto looks like this: + // FeMessage::Query("pagestream_v2{FeMessage::CopyData(PagesetreamFeMessage::GetPage(..))}") + async fn process_message( &mut self, handler: &mut impl Handler, @@ -831,7 +834,7 @@ impl PostgresBackend { use CopyStreamHandlerEnd::*; let expected_end = match &end { - ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF => true, + ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF | Cancelled => true, CopyStreamHandlerEnd::Disconnected(ConnectionError::Io(io_error)) if is_expected_io_error(io_error) => { @@ -871,6 +874,9 @@ impl PostgresBackend { // message from server' when it receives ErrorResponse (anything but // CopyData/CopyDone) back. CopyFail => Some((end.to_string(), SQLSTATE_SUCCESSFUL_COMPLETION)), + + // When cancelled, send no response: we must not risk blocking on sending that response + Cancelled => None, _ => None, }; if let Some((err, errcode)) = err_to_send_and_errcode { @@ -1048,6 +1054,8 @@ pub enum CopyStreamHandlerEnd { /// The connection was lost #[error("connection error: {0}")] Disconnected(#[from] ConnectionError), + #[error("Shutdown")] + Cancelled, /// Some other error #[error(transparent)] Other(#[from] anyhow::Error), diff --git a/libs/postgres_ffi/src/walrecord.rs b/libs/postgres_ffi/src/walrecord.rs index dedbaef64d..b32106632a 100644 --- a/libs/postgres_ffi/src/walrecord.rs +++ b/libs/postgres_ffi/src/walrecord.rs @@ -16,7 +16,7 @@ use utils::bin_ser::DeserializeError; use utils::lsn::Lsn; #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct XlMultiXactCreate { pub mid: MultiXactId, /* new MultiXact's ID */ @@ -46,7 +46,7 @@ impl XlMultiXactCreate { } #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct XlMultiXactTruncate { pub oldest_multi_db: Oid, /* to-be-truncated range of multixact offsets */ @@ -72,7 +72,7 @@ impl XlMultiXactTruncate { } #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct XlRelmapUpdate { pub dbid: Oid, /* database ID, or 0 for shared map */ pub tsid: Oid, /* database's tablespace, or pg_global */ @@ -90,7 +90,7 @@ impl XlRelmapUpdate { } #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct XlReploriginDrop { pub node_id: RepOriginId, } @@ -104,7 +104,7 @@ impl XlReploriginDrop { } #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct XlReploriginSet { pub remote_lsn: Lsn, pub node_id: RepOriginId, @@ -120,7 +120,7 @@ impl XlReploriginSet { } #[repr(C)] -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct RelFileNode { pub spcnode: Oid, /* tablespace */ pub dbnode: Oid, /* database */ @@ -911,7 +911,7 @@ impl XlSmgrCreate { } #[repr(C)] -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct XlSmgrTruncate { pub blkno: BlockNumber, pub rnode: RelFileNode, @@ -984,7 +984,7 @@ impl XlDropDatabase { /// xl_xact_parsed_abort structs in PostgreSQL, but we use the same /// struct for commits and aborts. /// -#[derive(Debug)] +#[derive(Debug, Serialize, Deserialize)] pub struct XlXactParsedRecord { pub xid: TransactionId, pub info: u8, diff --git a/libs/remote_storage/src/config.rs b/libs/remote_storage/src/config.rs index d0e92411da..e99ae4f747 100644 --- a/libs/remote_storage/src/config.rs +++ b/libs/remote_storage/src/config.rs @@ -26,6 +26,16 @@ pub struct RemoteStorageConfig { pub timeout: Duration, } +impl RemoteStorageKind { + pub fn bucket_name(&self) -> Option<&str> { + match self { + RemoteStorageKind::LocalFs { .. } => None, + RemoteStorageKind::AwsS3(config) => Some(&config.bucket_name), + RemoteStorageKind::AzureContainer(config) => Some(&config.container_name), + } + } +} + fn default_timeout() -> Duration { RemoteStorageConfig::DEFAULT_TIMEOUT } @@ -178,6 +188,14 @@ impl RemoteStorageConfig { pub fn from_toml(toml: &toml_edit::Item) -> anyhow::Result { Ok(utils::toml_edit_ext::deserialize_item(toml)?) } + + pub fn from_toml_str(input: &str) -> anyhow::Result { + let toml_document = toml_edit::DocumentMut::from_str(input)?; + if let Some(item) = toml_document.get("remote_storage") { + return Self::from_toml(item); + } + Self::from_toml(toml_document.as_item()) + } } #[cfg(test)] @@ -185,8 +203,7 @@ mod tests { use super::*; fn parse(input: &str) -> anyhow::Result { - let toml = input.parse::().unwrap(); - RemoteStorageConfig::from_toml(toml.as_item()) + RemoteStorageConfig::from_toml_str(input) } #[test] diff --git a/libs/utils/scripts/restore_from_wal.sh b/libs/utils/scripts/restore_from_wal.sh index 93448369a0..a8615c2337 100755 --- a/libs/utils/scripts/restore_from_wal.sh +++ b/libs/utils/scripts/restore_from_wal.sh @@ -50,8 +50,8 @@ REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| declare -i WAL_SIZE=$REDO_POS+114 "$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start "$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate -cp "$DATA_DIR"/pg_wal/000000010000000000000001 . +cp "$DATA_DIR"/pg_wal/000000010000000000000001 "$DATA_DIR" cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/ for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done -dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc -rm -f 000000010000000000000001 +dd if="$DATA_DIR"/000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc +rm -f "$DATA_DIR"/000000010000000000000001 diff --git a/libs/utils/scripts/restore_from_wal_initdb.sh b/libs/utils/scripts/restore_from_wal_initdb.sh index c6277ebc60..e7b0432505 100755 --- a/libs/utils/scripts/restore_from_wal_initdb.sh +++ b/libs/utils/scripts/restore_from_wal_initdb.sh @@ -14,8 +14,8 @@ REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| declare -i WAL_SIZE=$REDO_POS+114 "$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" start "$PG_BIN"/pg_ctl -D "$DATA_DIR" -l "$DATA_DIR/logfile.log" stop -m immediate -cp "$DATA_DIR"/pg_wal/000000010000000000000001 . +cp "$DATA_DIR"/pg_wal/000000010000000000000001 "$DATA_DIR" cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/ for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done -dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc -rm -f 000000010000000000000001 +dd if="$DATA_DIR"/000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc +rm -f "$DATA_DIR"/000000010000000000000001 diff --git a/libs/utils/src/http/json.rs b/libs/utils/src/http/json.rs index 6c25440b42..e53231f313 100644 --- a/libs/utils/src/http/json.rs +++ b/libs/utils/src/http/json.rs @@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize}; use super::error::ApiError; +/// Parse a json request body and deserialize it to the type `T`. pub async fn json_request Deserialize<'de>>( request: &mut Request, ) -> Result { @@ -27,6 +28,27 @@ pub async fn json_request Deserialize<'de>>( .map_err(ApiError::BadRequest) } +/// Parse a json request body and deserialize it to the type `T`. If the body is empty, return `T::default`. +pub async fn json_request_maybe Deserialize<'de> + Default>( + request: &mut Request, +) -> Result { + let body = hyper::body::aggregate(request.body_mut()) + .await + .context("Failed to read request body") + .map_err(ApiError::BadRequest)?; + + if body.remaining() == 0 { + return Ok(T::default()); + } + + let mut deser = serde_json::de::Deserializer::from_reader(body.reader()); + + serde_path_to_error::deserialize(&mut deser) + // intentionally stringify because the debug version is not helpful in python logs + .map_err(|e| anyhow::anyhow!("Failed to parse json request: {e}")) + .map_err(ApiError::BadRequest) +} + pub fn json_response( status: StatusCode, data: T, diff --git a/libs/wal_decoder/src/decoder.rs b/libs/wal_decoder/src/decoder.rs index 684718d220..1895f25bfc 100644 --- a/libs/wal_decoder/src/decoder.rs +++ b/libs/wal_decoder/src/decoder.rs @@ -19,7 +19,7 @@ impl InterpretedWalRecord { pub fn from_bytes_filtered( buf: Bytes, shard: &ShardIdentity, - record_end_lsn: Lsn, + next_record_lsn: Lsn, pg_version: u32, ) -> anyhow::Result { let mut decoded = DecodedWALRecord::default(); @@ -32,18 +32,18 @@ impl InterpretedWalRecord { FlushUncommittedRecords::No }; - let metadata_record = MetadataRecord::from_decoded(&decoded, record_end_lsn, pg_version)?; + let metadata_record = MetadataRecord::from_decoded(&decoded, next_record_lsn, pg_version)?; let batch = SerializedValueBatch::from_decoded_filtered( decoded, shard, - record_end_lsn, + next_record_lsn, pg_version, )?; Ok(InterpretedWalRecord { metadata_record, batch, - end_lsn: record_end_lsn, + next_record_lsn, flush_uncommitted, xid, }) @@ -53,7 +53,7 @@ impl InterpretedWalRecord { impl MetadataRecord { fn from_decoded( decoded: &DecodedWALRecord, - record_end_lsn: Lsn, + next_record_lsn: Lsn, pg_version: u32, ) -> anyhow::Result> { // Note: this doesn't actually copy the bytes since @@ -74,7 +74,9 @@ impl MetadataRecord { Ok(None) } pg_constants::RM_CLOG_ID => Self::decode_clog_record(&mut buf, decoded, pg_version), - pg_constants::RM_XACT_ID => Self::decode_xact_record(&mut buf, decoded, record_end_lsn), + pg_constants::RM_XACT_ID => { + Self::decode_xact_record(&mut buf, decoded, next_record_lsn) + } pg_constants::RM_MULTIXACT_ID => { Self::decode_multixact_record(&mut buf, decoded, pg_version) } @@ -86,7 +88,9 @@ impl MetadataRecord { // // Alternatively, one can make the checkpoint part of the subscription protocol // to the pageserver. This should work fine, but can be done at a later point. - pg_constants::RM_XLOG_ID => Self::decode_xlog_record(&mut buf, decoded, record_end_lsn), + pg_constants::RM_XLOG_ID => { + Self::decode_xlog_record(&mut buf, decoded, next_record_lsn) + } pg_constants::RM_LOGICALMSG_ID => { Self::decode_logical_message_record(&mut buf, decoded) } diff --git a/libs/wal_decoder/src/models.rs b/libs/wal_decoder/src/models.rs index 5d90eeb69c..c69f8c869a 100644 --- a/libs/wal_decoder/src/models.rs +++ b/libs/wal_decoder/src/models.rs @@ -32,16 +32,19 @@ use postgres_ffi::walrecord::{ XlSmgrTruncate, XlXactParsedRecord, }; use postgres_ffi::{Oid, TransactionId}; +use serde::{Deserialize, Serialize}; use utils::lsn::Lsn; use crate::serialized_batch::SerializedValueBatch; +#[derive(Serialize, Deserialize)] pub enum FlushUncommittedRecords { Yes, No, } /// An interpreted Postgres WAL record, ready to be handled by the pageserver +#[derive(Serialize, Deserialize)] pub struct InterpretedWalRecord { /// Optional metadata record - may cause writes to metadata keys /// in the storage engine @@ -49,8 +52,10 @@ pub struct InterpretedWalRecord { /// A pre-serialized batch along with the required metadata for ingestion /// by the pageserver pub batch: SerializedValueBatch, - /// Byte offset within WAL for the end of the original PG WAL record - pub end_lsn: Lsn, + /// Byte offset within WAL for the start of the next PG WAL record. + /// Usually this is the end LSN of the current record, but in case of + /// XLOG SWITCH records it will be within the next segment. + pub next_record_lsn: Lsn, /// Whether to flush all uncommitted modifications to the storage engine /// before ingesting this record. This is currently only used for legacy PG /// database creations which read pages from a template database. Such WAL @@ -62,6 +67,7 @@ pub struct InterpretedWalRecord { /// The interpreted part of the Postgres WAL record which requires metadata /// writes to the underlying storage engine. +#[derive(Serialize, Deserialize)] pub enum MetadataRecord { Heapam(HeapamRecord), Neonrmgr(NeonrmgrRecord), @@ -77,10 +83,12 @@ pub enum MetadataRecord { Replorigin(ReploriginRecord), } +#[derive(Serialize, Deserialize)] pub enum HeapamRecord { ClearVmBits(ClearVmBits), } +#[derive(Serialize, Deserialize)] pub struct ClearVmBits { pub new_heap_blkno: Option, pub old_heap_blkno: Option, @@ -88,24 +96,29 @@ pub struct ClearVmBits { pub flags: u8, } +#[derive(Serialize, Deserialize)] pub enum NeonrmgrRecord { ClearVmBits(ClearVmBits), } +#[derive(Serialize, Deserialize)] pub enum SmgrRecord { Create(SmgrCreate), Truncate(XlSmgrTruncate), } +#[derive(Serialize, Deserialize)] pub struct SmgrCreate { pub rel: RelTag, } +#[derive(Serialize, Deserialize)] pub enum DbaseRecord { Create(DbaseCreate), Drop(DbaseDrop), } +#[derive(Serialize, Deserialize)] pub struct DbaseCreate { pub db_id: Oid, pub tablespace_id: Oid, @@ -113,27 +126,32 @@ pub struct DbaseCreate { pub src_tablespace_id: Oid, } +#[derive(Serialize, Deserialize)] pub struct DbaseDrop { pub db_id: Oid, pub tablespace_ids: Vec, } +#[derive(Serialize, Deserialize)] pub enum ClogRecord { ZeroPage(ClogZeroPage), Truncate(ClogTruncate), } +#[derive(Serialize, Deserialize)] pub struct ClogZeroPage { pub segno: u32, pub rpageno: u32, } +#[derive(Serialize, Deserialize)] pub struct ClogTruncate { pub pageno: u32, pub oldest_xid: TransactionId, pub oldest_xid_db: Oid, } +#[derive(Serialize, Deserialize)] pub enum XactRecord { Commit(XactCommon), Abort(XactCommon), @@ -142,6 +160,7 @@ pub enum XactRecord { Prepare(XactPrepare), } +#[derive(Serialize, Deserialize)] pub struct XactCommon { pub parsed: XlXactParsedRecord, pub origin_id: u16, @@ -150,61 +169,73 @@ pub struct XactCommon { pub lsn: Lsn, } +#[derive(Serialize, Deserialize)] pub struct XactPrepare { pub xl_xid: TransactionId, pub data: Bytes, } +#[derive(Serialize, Deserialize)] pub enum MultiXactRecord { ZeroPage(MultiXactZeroPage), Create(XlMultiXactCreate), Truncate(XlMultiXactTruncate), } +#[derive(Serialize, Deserialize)] pub struct MultiXactZeroPage { pub slru_kind: SlruKind, pub segno: u32, pub rpageno: u32, } +#[derive(Serialize, Deserialize)] pub enum RelmapRecord { Update(RelmapUpdate), } +#[derive(Serialize, Deserialize)] pub struct RelmapUpdate { pub update: XlRelmapUpdate, pub buf: Bytes, } +#[derive(Serialize, Deserialize)] pub enum XlogRecord { Raw(RawXlogRecord), } +#[derive(Serialize, Deserialize)] pub struct RawXlogRecord { pub info: u8, pub lsn: Lsn, pub buf: Bytes, } +#[derive(Serialize, Deserialize)] pub enum LogicalMessageRecord { Put(PutLogicalMessage), #[cfg(feature = "testing")] Failpoint, } +#[derive(Serialize, Deserialize)] pub struct PutLogicalMessage { pub path: String, pub buf: Bytes, } +#[derive(Serialize, Deserialize)] pub enum StandbyRecord { RunningXacts(StandbyRunningXacts), } +#[derive(Serialize, Deserialize)] pub struct StandbyRunningXacts { pub oldest_running_xid: TransactionId, } +#[derive(Serialize, Deserialize)] pub enum ReploriginRecord { Set(XlReploriginSet), Drop(XlReploriginDrop), diff --git a/libs/wal_decoder/src/serialized_batch.rs b/libs/wal_decoder/src/serialized_batch.rs index 8f33291023..9c0708ebbe 100644 --- a/libs/wal_decoder/src/serialized_batch.rs +++ b/libs/wal_decoder/src/serialized_batch.rs @@ -16,6 +16,7 @@ use pageserver_api::shard::ShardIdentity; use pageserver_api::{key::CompactKey, value::Value}; use postgres_ffi::walrecord::{DecodedBkpBlock, DecodedWALRecord}; use postgres_ffi::{page_is_new, page_set_lsn, pg_constants, BLCKSZ}; +use serde::{Deserialize, Serialize}; use utils::bin_ser::BeSer; use utils::lsn::Lsn; @@ -29,6 +30,7 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]); /// relation sizes. In the case of "observed" values, we only need to know /// the key and LSN, so two types of metadata are supported to save on network /// bandwidth. +#[derive(Serialize, Deserialize)] pub enum ValueMeta { Serialized(SerializedValueMeta), Observed(ObservedValueMeta), @@ -75,6 +77,7 @@ impl PartialEq for OrderedValueMeta { impl Eq for OrderedValueMeta {} /// Metadata for a [`Value`] serialized into the batch. +#[derive(Serialize, Deserialize)] pub struct SerializedValueMeta { pub key: CompactKey, pub lsn: Lsn, @@ -86,12 +89,14 @@ pub struct SerializedValueMeta { } /// Metadata for a [`Value`] observed by the batch +#[derive(Serialize, Deserialize)] pub struct ObservedValueMeta { pub key: CompactKey, pub lsn: Lsn, } /// Batch of serialized [`Value`]s. +#[derive(Serialize, Deserialize)] pub struct SerializedValueBatch { /// [`Value`]s serialized in EphemeralFile's native format, /// ready for disk write by the pageserver @@ -132,7 +137,7 @@ impl SerializedValueBatch { pub(crate) fn from_decoded_filtered( decoded: DecodedWALRecord, shard: &ShardIdentity, - record_end_lsn: Lsn, + next_record_lsn: Lsn, pg_version: u32, ) -> anyhow::Result { // First determine how big the buffer needs to be and allocate it up-front. @@ -156,13 +161,17 @@ impl SerializedValueBatch { let key = rel_block_to_key(rel, blk.blkno); if !key.is_valid_key_on_write_path() { - anyhow::bail!("Unsupported key decoded at LSN {}: {}", record_end_lsn, key); + anyhow::bail!( + "Unsupported key decoded at LSN {}: {}", + next_record_lsn, + key + ); } let key_is_local = shard.is_key_local(&key); tracing::debug!( - lsn=%record_end_lsn, + lsn=%next_record_lsn, key=%key, "ingest: shard decision {}", if !key_is_local { "drop" } else { "keep" }, @@ -174,7 +183,7 @@ impl SerializedValueBatch { // its blkno in case it implicitly extends a relation. metadata.push(ValueMeta::Observed(ObservedValueMeta { key: key.to_compact(), - lsn: record_end_lsn, + lsn: next_record_lsn, })) } @@ -205,7 +214,7 @@ impl SerializedValueBatch { // that would corrupt the page. // if !page_is_new(&image) { - page_set_lsn(&mut image, record_end_lsn) + page_set_lsn(&mut image, next_record_lsn) } assert_eq!(image.len(), BLCKSZ as usize); @@ -224,12 +233,12 @@ impl SerializedValueBatch { metadata.push(ValueMeta::Serialized(SerializedValueMeta { key: key.to_compact(), - lsn: record_end_lsn, + lsn: next_record_lsn, batch_offset: relative_off, len: val_ser_size, will_init: val.will_init(), })); - max_lsn = std::cmp::max(max_lsn, record_end_lsn); + max_lsn = std::cmp::max(max_lsn, next_record_lsn); len += 1; } diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index ecb8fa7491..143d8236df 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -84,6 +84,7 @@ enumset = { workspace = true, features = ["serde"]} strum.workspace = true strum_macros.workspace = true wal_decoder.workspace = true +smallvec.workspace = true [target.'cfg(target_os = "linux")'.dependencies] procfs.workspace = true diff --git a/pageserver/benches/bench_ingest.rs b/pageserver/benches/bench_ingest.rs index f6b2a8e031..caacd365b3 100644 --- a/pageserver/benches/bench_ingest.rs +++ b/pageserver/benches/bench_ingest.rs @@ -167,6 +167,7 @@ fn criterion_benchmark(c: &mut Criterion) { 16384, virtual_file::io_engine_for_bench(), conf.virtual_file_io_mode, + virtual_file::SyncMode::Sync, ); page_cache::init(conf.page_cache_size); diff --git a/pageserver/ctl/Cargo.toml b/pageserver/ctl/Cargo.toml index a753f806a0..39ca47568c 100644 --- a/pageserver/ctl/Cargo.toml +++ b/pageserver/ctl/Cargo.toml @@ -18,7 +18,6 @@ postgres_ffi.workspace = true thiserror.workspace = true tokio.workspace = true tokio-util.workspace = true -toml_edit.workspace = true utils.workspace = true svg_fmt.workspace = true workspace_hack.workspace = true diff --git a/pageserver/ctl/src/layer_map_analyzer.rs b/pageserver/ctl/src/layer_map_analyzer.rs index 11b8e98f57..2c350d6d86 100644 --- a/pageserver/ctl/src/layer_map_analyzer.rs +++ b/pageserver/ctl/src/layer_map_analyzer.rs @@ -138,6 +138,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> { 10, virtual_file::api::IoEngineKind::StdFs, IoMode::preferred(), + virtual_file::SyncMode::Sync, ); pageserver::page_cache::init(100); diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs index 6f543dcaa9..4c2c3ab30e 100644 --- a/pageserver/ctl/src/layers.rs +++ b/pageserver/ctl/src/layers.rs @@ -51,6 +51,7 @@ async fn read_delta_file(path: impl AsRef, ctx: &RequestContext) -> Result 10, virtual_file::api::IoEngineKind::StdFs, IoMode::preferred(), + virtual_file::SyncMode::Sync, ); page_cache::init(100); let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path"); @@ -65,6 +66,7 @@ async fn read_image_file(path: impl AsRef, ctx: &RequestContext) -> Result 10, virtual_file::api::IoEngineKind::StdFs, IoMode::preferred(), + virtual_file::SyncMode::Sync, ); page_cache::init(100); let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path"); @@ -171,6 +173,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> { 10, virtual_file::api::IoEngineKind::StdFs, IoMode::preferred(), + virtual_file::SyncMode::Sync, ); pageserver::page_cache::init(100); diff --git a/pageserver/ctl/src/main.rs b/pageserver/ctl/src/main.rs index f506caec5b..a0aac89dc8 100644 --- a/pageserver/ctl/src/main.rs +++ b/pageserver/ctl/src/main.rs @@ -174,11 +174,7 @@ async fn main() -> anyhow::Result<()> { println!("specified prefix '{}' failed validation", cmd.prefix); return Ok(()); }; - let toml_document = toml_edit::DocumentMut::from_str(&cmd.config_toml_str)?; - let toml_item = toml_document - .get("remote_storage") - .expect("need remote_storage"); - let config = RemoteStorageConfig::from_toml(toml_item)?; + let config = RemoteStorageConfig::from_toml_str(&cmd.config_toml_str)?; let storage = remote_storage::GenericRemoteStorage::from_config(&config).await; let cancel = CancellationToken::new(); storage @@ -209,6 +205,7 @@ async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> { 10, virtual_file::api::IoEngineKind::StdFs, IoMode::preferred(), + virtual_file::SyncMode::Sync, ); page_cache::init(100); let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error); diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index fe2a31167d..033a9a4619 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -171,11 +171,18 @@ fn main() -> anyhow::Result<()> { let scenario = failpoint_support::init(); // Basic initialization of things that don't change after startup + tracing::info!("Initializing virtual_file..."); virtual_file::init( conf.max_file_descriptors, conf.virtual_file_io_engine, conf.virtual_file_io_mode, + if conf.no_sync { + virtual_file::SyncMode::UnsafeNoSync + } else { + virtual_file::SyncMode::Sync + }, ); + tracing::info!("Initializing page_cache..."); page_cache::init(conf.page_cache_size); start_pageserver(launch_ts, conf).context("Failed to start pageserver")?; diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index b694a43599..f7be6ecaab 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -182,6 +182,10 @@ pub struct PageServerConf { /// Optionally disable disk syncs (unsafe!) pub no_sync: bool, + + /// Maximum amount of time for which a get page request request + /// might be held up for request merging. + pub server_side_batch_timeout: Option, } /// Token for authentication to safekeepers @@ -336,6 +340,7 @@ impl PageServerConf { concurrent_tenant_warmup, concurrent_tenant_size_logical_size_queries, virtual_file_io_engine, + server_side_batch_timeout, tenant_config, no_sync, } = config_toml; @@ -377,6 +382,7 @@ impl PageServerConf { image_compression, timeline_offloading, ephemeral_bytes_per_memory_kb, + server_side_batch_timeout, // ------------------------------------------------------------ // fields that require additional validation or custom handling diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index ab170679ba..306b0f35ab 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -83,6 +83,8 @@ use crate::tenant::storage_layer::LayerName; use crate::tenant::timeline::offload::offload_timeline; use crate::tenant::timeline::offload::OffloadError; use crate::tenant::timeline::CompactFlags; +use crate::tenant::timeline::CompactOptions; +use crate::tenant::timeline::CompactRange; use crate::tenant::timeline::CompactionError; use crate::tenant::timeline::Timeline; use crate::tenant::GetTimelineError; @@ -100,7 +102,7 @@ use utils::{ http::{ endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with}, error::{ApiError, HttpErrorBody}, - json::{json_request, json_response}, + json::{json_request, json_request_maybe, json_response}, request::parse_request_param, RequestExt, RouterBuilder, }, @@ -1927,13 +1929,15 @@ async fn timeline_gc_handler( // Run compaction immediately on given timeline. async fn timeline_compact_handler( - request: Request, + mut request: Request, cancel: CancellationToken, ) -> Result, ApiError> { let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; + let compact_range = json_request_maybe::>(&mut request).await?; + let state = get_state(&request); let mut flags = EnumSet::empty(); @@ -1957,11 +1961,16 @@ async fn timeline_compact_handler( let wait_until_uploaded = parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false); + let options = CompactOptions { + compact_range, + flags, + }; + async { let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?; timeline - .compact(&cancel, flags, &ctx) + .compact_with_options(&cancel, options, &ctx) .await .map_err(|e| ApiError::InternalServerError(e.into()))?; if wait_until_uploaded { diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 1473729186..3cdc2a761e 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1187,6 +1187,7 @@ struct GlobalAndPerTimelineHistogramTimer<'a, 'c> { ctx: &'c RequestContext, start: std::time::Instant, op: SmgrQueryType, + count: usize, } impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> { @@ -1214,10 +1215,13 @@ impl Drop for GlobalAndPerTimelineHistogramTimer<'_, '_> { elapsed } }; - self.global_latency_histo - .observe(ex_throttled.as_secs_f64()); - if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo { - per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64()); + + for _ in 0..self.count { + self.global_latency_histo + .observe(ex_throttled.as_secs_f64()); + if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo { + per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64()); + } } } } @@ -1385,6 +1389,14 @@ impl SmgrQueryTimePerTimeline { &'a self, op: SmgrQueryType, ctx: &'c RequestContext, + ) -> Option { + self.start_timer_many(op, 1, ctx) + } + pub(crate) fn start_timer_many<'c: 'a, 'a>( + &'a self, + op: SmgrQueryType, + count: usize, + ctx: &'c RequestContext, ) -> Option { let start = Instant::now(); @@ -1422,6 +1434,7 @@ impl SmgrQueryTimePerTimeline { ctx, start, op, + count, }) } } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index f07474df6a..a429dff1fd 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -7,13 +7,13 @@ use bytes::Buf; use futures::FutureExt; use itertools::Itertools; use once_cell::sync::OnceCell; -use pageserver_api::models::TenantState; +use pageserver_api::models::{self, TenantState}; use pageserver_api::models::{ PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse, PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse, - PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse, - PagestreamGetSlruSegmentRequest, PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, - PagestreamNblocksResponse, PagestreamProtocolVersion, + PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest, + PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse, + PagestreamProtocolVersion, }; use pageserver_api::shard::TenantShardId; use postgres_backend::{is_expected_io_error, AuthType, PostgresBackend, QueryError}; @@ -44,7 +44,7 @@ use crate::basebackup; use crate::basebackup::BasebackupError; use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; -use crate::metrics; +use crate::metrics::{self}; use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS}; use crate::pgdatadir_mapping::Version; use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; @@ -59,7 +59,7 @@ use crate::tenant::GetTimelineError; use crate::tenant::PageReconstructError; use crate::tenant::Timeline; use pageserver_api::key::rel_block_to_key; -use pageserver_api::reltag::SlruKind; +use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind}; use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID; use postgres_ffi::BLCKSZ; @@ -105,6 +105,7 @@ pub fn spawn( pg_auth, tcp_listener, conf.pg_auth_type, + conf.server_side_batch_timeout, libpq_ctx, cancel.clone(), ) @@ -153,6 +154,7 @@ pub async fn libpq_listener_main( auth: Option>, listener: tokio::net::TcpListener, auth_type: AuthType, + server_side_batch_timeout: Option, listener_ctx: RequestContext, listener_cancel: CancellationToken, ) -> Connections { @@ -183,6 +185,7 @@ pub async fn libpq_listener_main( local_auth, socket, auth_type, + server_side_batch_timeout, connection_ctx, connections_cancel.child_token(), )); @@ -210,6 +213,7 @@ async fn page_service_conn_main( auth: Option>, socket: tokio::net::TcpStream, auth_type: AuthType, + server_side_batch_timeout: Option, connection_ctx: RequestContext, cancel: CancellationToken, ) -> ConnectionHandlerResult { @@ -260,8 +264,13 @@ async fn page_service_conn_main( // and create a child per-query context when it invokes process_query. // But it's in a shared crate, so, we store connection_ctx inside PageServerHandler // and create the per-query context in process_query ourselves. - let mut conn_handler = - PageServerHandler::new(tenant_manager, auth, connection_ctx, cancel.clone()); + let mut conn_handler = PageServerHandler::new( + tenant_manager, + auth, + server_side_batch_timeout, + connection_ctx, + cancel.clone(), + ); let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?; match pgbackend.run(&mut conn_handler, &cancel).await { @@ -304,6 +313,12 @@ struct PageServerHandler { cancel: CancellationToken, timeline_handles: TimelineHandles, + + /// Messages queued up for the next processing batch + next_batch: Option, + + /// See [`PageServerConf::server_side_batch_timeout`] + server_side_batch_timeout: Option, } struct TimelineHandles { @@ -517,10 +532,47 @@ impl From for QueryError { } } +enum BatchedFeMessage { + Exists { + span: Span, + req: models::PagestreamExistsRequest, + }, + Nblocks { + span: Span, + req: models::PagestreamNblocksRequest, + }, + GetPage { + span: Span, + shard: timeline::handle::Handle, + effective_request_lsn: Lsn, + pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>, + }, + DbSize { + span: Span, + req: models::PagestreamDbSizeRequest, + }, + GetSlruSegment { + span: Span, + req: models::PagestreamGetSlruSegmentRequest, + }, + RespondError { + span: Span, + error: PageStreamError, + }, +} + +enum BatchOrEof { + /// In the common case, this has one entry. + /// At most, it has two entries: the first is the leftover batch, the second is an error. + Batch(smallvec::SmallVec<[BatchedFeMessage; 1]>), + Eof, +} + impl PageServerHandler { pub fn new( tenant_manager: Arc, auth: Option>, + server_side_batch_timeout: Option, connection_ctx: RequestContext, cancel: CancellationToken, ) -> Self { @@ -530,6 +582,8 @@ impl PageServerHandler { connection_ctx, timeline_handles: TimelineHandles::new(tenant_manager), cancel, + next_batch: None, + server_side_batch_timeout, } } @@ -557,6 +611,221 @@ impl PageServerHandler { ) } + async fn read_batch_from_connection( + &mut self, + pgb: &mut PostgresBackend, + tenant_id: &TenantId, + timeline_id: &TimelineId, + ctx: &RequestContext, + ) -> Result, QueryError> + where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, + { + let mut batch = self.next_batch.take(); + let mut batch_started_at: Option = None; + + let next_batch: Option = loop { + let sleep_fut = match (self.server_side_batch_timeout, batch_started_at) { + (Some(batch_timeout), Some(started_at)) => futures::future::Either::Left( + tokio::time::sleep_until((started_at + batch_timeout).into()), + ), + _ => futures::future::Either::Right(futures::future::pending()), + }; + + let msg = tokio::select! { + biased; + _ = self.cancel.cancelled() => { + return Err(QueryError::Shutdown) + } + msg = pgb.read_message() => { + msg + } + _ = sleep_fut => { + assert!(batch.is_some()); + break None; + } + }; + let copy_data_bytes = match msg? { + Some(FeMessage::CopyData(bytes)) => bytes, + Some(FeMessage::Terminate) => { + return Ok(Some(BatchOrEof::Eof)); + } + Some(m) => { + return Err(QueryError::Other(anyhow::anyhow!( + "unexpected message: {m:?} during COPY" + ))); + } + None => { + return Ok(Some(BatchOrEof::Eof)); + } // client disconnected + }; + trace!("query: {copy_data_bytes:?}"); + fail::fail_point!("ps::handle-pagerequest-message"); + + // parse request + let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?; + + let this_msg = match neon_fe_msg { + PagestreamFeMessage::Exists(req) => BatchedFeMessage::Exists { + span: tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn), + req, + }, + PagestreamFeMessage::Nblocks(req) => BatchedFeMessage::Nblocks { + span: tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn), + req, + }, + PagestreamFeMessage::DbSize(req) => BatchedFeMessage::DbSize { + span: tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn), + req, + }, + PagestreamFeMessage::GetSlruSegment(req) => BatchedFeMessage::GetSlruSegment { + span: tracing::info_span!("handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn), + req, + }, + PagestreamFeMessage::GetPage(PagestreamGetPageRequest { + request_lsn, + not_modified_since, + rel, + blkno, + }) => { + // shard_id is filled in by the handler + let span = tracing::info_span!( + "handle_get_page_at_lsn_request_batched", + %tenant_id, %timeline_id, shard_id = tracing::field::Empty, req_lsn = %request_lsn, + batch_size = tracing::field::Empty, batch_id = tracing::field::Empty + ); + + macro_rules! current_batch_and_error { + ($error:expr) => {{ + let error = BatchedFeMessage::RespondError { + span, + error: $error, + }; + let batch_and_error = match batch { + Some(b) => smallvec::smallvec![b, error], + None => smallvec::smallvec![error], + }; + Ok(Some(BatchOrEof::Batch(batch_and_error))) + }}; + } + + let key = rel_block_to_key(rel, blkno); + let shard = match self + .timeline_handles + .get(*tenant_id, *timeline_id, ShardSelector::Page(key)) + .instrument(span.clone()) + .await + { + Ok(tl) => tl, + Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => { + // We already know this tenant exists in general, because we resolved it at + // start of connection. Getting a NotFound here indicates that the shard containing + // the requested page is not present on this node: the client's knowledge of shard->pageserver + // mapping is out of date. + // + // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via + // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration + // and talk to a different pageserver. + return current_batch_and_error!(PageStreamError::Reconnect( + "getpage@lsn request routed to wrong shard".into() + )); + } + Err(e) => { + return current_batch_and_error!(e.into()); + } + }; + let effective_request_lsn = match Self::wait_or_get_last_lsn( + &shard, + request_lsn, + not_modified_since, + &shard.get_latest_gc_cutoff_lsn(), + ctx, + ) + // TODO: if we actually need to wait for lsn here, it delays the entire batch which doesn't need to wait + .await + { + Ok(lsn) => lsn, + Err(e) => { + return current_batch_and_error!(e); + } + }; + BatchedFeMessage::GetPage { + span, + shard, + effective_request_lsn, + pages: smallvec::smallvec![(rel, blkno)], + } + } + }; + + let batch_timeout = match self.server_side_batch_timeout { + Some(value) => value, + None => { + // Batching is not enabled - stop on the first message. + return Ok(Some(BatchOrEof::Batch(smallvec::smallvec![this_msg]))); + } + }; + + // check if we can batch + match (&mut batch, this_msg) { + (None, this_msg) => { + batch = Some(this_msg); + } + ( + Some(BatchedFeMessage::GetPage { + span: _, + shard: accum_shard, + pages: accum_pages, + effective_request_lsn: accum_lsn, + }), + BatchedFeMessage::GetPage { + span: _, + shard: this_shard, + pages: this_pages, + effective_request_lsn: this_lsn, + }, + ) if async { + assert_eq!(this_pages.len(), 1); + if accum_pages.len() >= Timeline::MAX_GET_VECTORED_KEYS as usize { + assert_eq!(accum_pages.len(), Timeline::MAX_GET_VECTORED_KEYS as usize); + return false; + } + if (accum_shard.tenant_shard_id, accum_shard.timeline_id) + != (this_shard.tenant_shard_id, this_shard.timeline_id) + { + // TODO: we _could_ batch & execute each shard seperately (and in parallel). + // But the current logic for keeping responses in order does not support that. + return false; + } + // the vectored get currently only supports a single LSN, so, bounce as soon + // as the effective request_lsn changes + if *accum_lsn != this_lsn { + return false; + } + true + } + .await => + { + // ok to batch + accum_pages.extend(this_pages); + } + (Some(_), this_msg) => { + // by default, don't continue batching + break Some(this_msg); + } + } + + // batching impl piece + let started_at = batch_started_at.get_or_insert_with(Instant::now); + if started_at.elapsed() > batch_timeout { + break None; + } + }; + + self.next_batch = next_batch; + Ok(batch.map(|b| BatchOrEof::Batch(smallvec::smallvec![b]))) + } + /// Pagestream sub-protocol handler. /// /// It is a simple request-response protocol inside a COPYBOTH session. @@ -592,133 +861,165 @@ impl PageServerHandler { } } + // If [`PageServerHandler`] is reused for multiple pagestreams, + // then make sure to not process requests from the previous ones. + self.next_batch = None; + loop { - // read request bytes (it's exactly 1 PagestreamFeMessage per CopyData) - let msg = tokio::select! { - biased; - _ = self.cancel.cancelled() => { - return Err(QueryError::Shutdown) + let maybe_batched = self + .read_batch_from_connection(pgb, &tenant_id, &timeline_id, &ctx) + .await?; + let batched = match maybe_batched { + Some(BatchOrEof::Batch(b)) => b, + Some(BatchOrEof::Eof) => { + break; } - msg = pgb.read_message() => { msg } - }; - let copy_data_bytes = match msg? { - Some(FeMessage::CopyData(bytes)) => bytes, - Some(FeMessage::Terminate) => break, - Some(m) => { - return Err(QueryError::Other(anyhow::anyhow!( - "unexpected message: {m:?} during COPY" - ))); - } - None => break, // client disconnected - }; - - trace!("query: {copy_data_bytes:?}"); - fail::fail_point!("ps::handle-pagerequest-message"); - - // parse request - let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?; - - // invoke handler function - let (handler_result, span) = match neon_fe_msg { - PagestreamFeMessage::Exists(req) => { - fail::fail_point!("ps::handle-pagerequest-message::exists"); - let span = tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn); - ( - self.handle_get_rel_exists_request(tenant_id, timeline_id, &req, &ctx) - .instrument(span.clone()) - .await, - span, - ) - } - PagestreamFeMessage::Nblocks(req) => { - fail::fail_point!("ps::handle-pagerequest-message::nblocks"); - let span = tracing::info_span!("handle_get_nblocks_request", rel = %req.rel, req_lsn = %req.request_lsn); - ( - self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx) - .instrument(span.clone()) - .await, - span, - ) - } - PagestreamFeMessage::GetPage(req) => { - fail::fail_point!("ps::handle-pagerequest-message::getpage"); - // shard_id is filled in by the handler - let span = tracing::info_span!("handle_get_page_at_lsn_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.request_lsn); - ( - self.handle_get_page_at_lsn_request(tenant_id, timeline_id, &req, &ctx) - .instrument(span.clone()) - .await, - span, - ) - } - PagestreamFeMessage::DbSize(req) => { - fail::fail_point!("ps::handle-pagerequest-message::dbsize"); - let span = tracing::info_span!("handle_db_size_request", dbnode = %req.dbnode, req_lsn = %req.request_lsn); - ( - self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx) - .instrument(span.clone()) - .await, - span, - ) - } - PagestreamFeMessage::GetSlruSegment(req) => { - fail::fail_point!("ps::handle-pagerequest-message::slrusegment"); - let span = tracing::info_span!("handle_get_slru_segment_request", kind = %req.kind, segno = %req.segno, req_lsn = %req.request_lsn); - ( - self.handle_get_slru_segment_request(tenant_id, timeline_id, &req, &ctx) - .instrument(span.clone()) - .await, - span, - ) + None => { + continue; } }; - // Map handler result to protocol behavior. - // Some handler errors cause exit from pagestream protocol. - // Other handler errors are sent back as an error message and we stay in pagestream protocol. - let response_msg = match handler_result { - Err(e) => match &e { - PageStreamError::Shutdown => { - // If we fail to fulfil a request during shutdown, which may be _because_ of - // shutdown, then do not send the error to the client. Instead just drop the - // connection. - span.in_scope(|| info!("dropping connection due to shutdown")); - return Err(QueryError::Shutdown); + for batch in batched { + // invoke handler function + let (handler_results, span): ( + Vec>, + _, + ) = match batch { + BatchedFeMessage::Exists { span, req } => { + fail::fail_point!("ps::handle-pagerequest-message::exists"); + ( + vec![ + self.handle_get_rel_exists_request( + tenant_id, + timeline_id, + &req, + &ctx, + ) + .instrument(span.clone()) + .await, + ], + span, + ) } - PageStreamError::Reconnect(reason) => { - span.in_scope(|| info!("handler requested reconnect: {reason}")); - return Err(QueryError::Reconnect); + BatchedFeMessage::Nblocks { span, req } => { + fail::fail_point!("ps::handle-pagerequest-message::nblocks"); + ( + vec![ + self.handle_get_nblocks_request(tenant_id, timeline_id, &req, &ctx) + .instrument(span.clone()) + .await, + ], + span, + ) } - PageStreamError::Read(_) - | PageStreamError::LsnTimeout(_) - | PageStreamError::NotFound(_) - | PageStreamError::BadRequest(_) => { - // print the all details to the log with {:#}, but for the client the - // error message is enough. Do not log if shutting down, as the anyhow::Error - // here includes cancellation which is not an error. - let full = utils::error::report_compact_sources(&e); - span.in_scope(|| { - error!("error reading relation or page version: {full:#}") - }); - PagestreamBeMessage::Error(PagestreamErrorResponse { - message: e.to_string(), - }) + BatchedFeMessage::GetPage { + span, + shard, + effective_request_lsn, + pages, + } => { + fail::fail_point!("ps::handle-pagerequest-message::getpage"); + ( + { + let npages = pages.len(); + let res = self + .handle_get_page_at_lsn_request_batched( + &shard, + effective_request_lsn, + pages, + &ctx, + ) + .instrument(span.clone()) + .await; + assert_eq!(res.len(), npages); + res + }, + span, + ) } - }, - Ok(response_msg) => response_msg, - }; + BatchedFeMessage::DbSize { span, req } => { + fail::fail_point!("ps::handle-pagerequest-message::dbsize"); + ( + vec![ + self.handle_db_size_request(tenant_id, timeline_id, &req, &ctx) + .instrument(span.clone()) + .await, + ], + span, + ) + } + BatchedFeMessage::GetSlruSegment { span, req } => { + fail::fail_point!("ps::handle-pagerequest-message::slrusegment"); + ( + vec![ + self.handle_get_slru_segment_request( + tenant_id, + timeline_id, + &req, + &ctx, + ) + .instrument(span.clone()) + .await, + ], + span, + ) + } + BatchedFeMessage::RespondError { span, error } => { + // We've already decided to respond with an error, so we don't need to + // call the handler. + (vec![Err(error)], span) + } + }; - // marshal & transmit response message - pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?; - tokio::select! { - biased; - _ = self.cancel.cancelled() => { - // We were requested to shut down. - info!("shutdown request received in page handler"); - return Err(QueryError::Shutdown) + // Map handler result to protocol behavior. + // Some handler errors cause exit from pagestream protocol. + // Other handler errors are sent back as an error message and we stay in pagestream protocol. + for handler_result in handler_results { + let response_msg = match handler_result { + Err(e) => match &e { + PageStreamError::Shutdown => { + // If we fail to fulfil a request during shutdown, which may be _because_ of + // shutdown, then do not send the error to the client. Instead just drop the + // connection. + span.in_scope(|| info!("dropping connection due to shutdown")); + return Err(QueryError::Shutdown); + } + PageStreamError::Reconnect(reason) => { + span.in_scope(|| info!("handler requested reconnect: {reason}")); + return Err(QueryError::Reconnect); + } + PageStreamError::Read(_) + | PageStreamError::LsnTimeout(_) + | PageStreamError::NotFound(_) + | PageStreamError::BadRequest(_) => { + // print the all details to the log with {:#}, but for the client the + // error message is enough. Do not log if shutting down, as the anyhow::Error + // here includes cancellation which is not an error. + let full = utils::error::report_compact_sources(&e); + span.in_scope(|| { + error!("error reading relation or page version: {full:#}") + }); + PagestreamBeMessage::Error(PagestreamErrorResponse { + message: e.to_string(), + }) + } + }, + Ok(response_msg) => response_msg, + }; + + // marshal & transmit response message + pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?; } - res = pgb.flush() => { - res?; + tokio::select! { + biased; + _ = self.cancel.cancelled() => { + // We were requested to shut down. + info!("shutdown request received in page handler"); + return Err(QueryError::Shutdown) + } + res = pgb.flush() => { + res?; + } } } } @@ -964,60 +1265,30 @@ impl PageServerHandler { })) } - #[instrument(skip_all, fields(shard_id))] - async fn handle_get_page_at_lsn_request( + #[instrument(skip_all)] + async fn handle_get_page_at_lsn_request_batched( &mut self, - tenant_id: TenantId, - timeline_id: TimelineId, - req: &PagestreamGetPageRequest, + timeline: &Timeline, + effective_lsn: Lsn, + pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>, ctx: &RequestContext, - ) -> Result { - let timeline = match self - .timeline_handles - .get( - tenant_id, - timeline_id, - ShardSelector::Page(rel_block_to_key(req.rel, req.blkno)), - ) - .await - { - Ok(tl) => tl, - Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => { - // We already know this tenant exists in general, because we resolved it at - // start of connection. Getting a NotFound here indicates that the shard containing - // the requested page is not present on this node: the client's knowledge of shard->pageserver - // mapping is out of date. - // - // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via - // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration - // and talk to a different pageserver. - return Err(PageStreamError::Reconnect( - "getpage@lsn request routed to wrong shard".into(), - )); - } - Err(e) => return Err(e.into()), - }; - - let _timer = timeline - .query_metrics - .start_timer(metrics::SmgrQueryType::GetPageAtLsn, ctx); - - let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); - let lsn = Self::wait_or_get_last_lsn( - &timeline, - req.request_lsn, - req.not_modified_since, - &latest_gc_cutoff_lsn, + ) -> Vec> { + debug_assert_current_span_has_tenant_and_timeline_id(); + let _timer = timeline.query_metrics.start_timer_many( + metrics::SmgrQueryType::GetPageAtLsn, + pages.len(), ctx, - ) - .await?; + ); - let page = timeline - .get_rel_page_at_lsn(req.rel, req.blkno, Version::Lsn(lsn), ctx) - .await?; + let pages = timeline + .get_rel_page_at_lsn_batched(pages, effective_lsn, ctx) + .await; - Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse { - page, + Vec::from_iter(pages.into_iter().map(|page| { + page.map(|page| { + PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse { page }) + }) + .map_err(PageStreamError::from) })) } @@ -1674,6 +1945,13 @@ fn set_tracing_field_shard_id(timeline: &Timeline) { debug_assert_current_span_has_tenant_and_timeline_id(); } +struct WaitedForLsn(Lsn); +impl From for Lsn { + fn from(WaitedForLsn(lsn): WaitedForLsn) -> Self { + lsn + } +} + #[cfg(test)] mod tests { use utils::shard::ShardCount; diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 7c1abbf3e2..5995d1cc57 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -10,10 +10,15 @@ use super::tenant::{PageReconstructError, Timeline}; use crate::aux_file; use crate::context::RequestContext; use crate::keyspace::{KeySpace, KeySpaceAccum}; -use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id; +use crate::span::{ + debug_assert_current_span_has_tenant_and_timeline_id, + debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id, +}; +use crate::tenant::timeline::GetVectoredError; use anyhow::{ensure, Context}; use bytes::{Buf, Bytes, BytesMut}; use enum_map::Enum; +use itertools::Itertools; use pageserver_api::key::Key; use pageserver_api::key::{ dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key, @@ -30,7 +35,7 @@ use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM}; use postgres_ffi::BLCKSZ; use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId}; use serde::{Deserialize, Serialize}; -use std::collections::{hash_map, HashMap, HashSet}; +use std::collections::{hash_map, BTreeMap, HashMap, HashSet}; use std::ops::ControlFlow; use std::ops::Range; use strum::IntoEnumIterator; @@ -193,26 +198,195 @@ impl Timeline { version: Version<'_>, ctx: &RequestContext, ) -> Result { - if tag.relnode == 0 { - return Err(PageReconstructError::Other( - RelationError::InvalidRelnode.into(), - )); + match version { + Version::Lsn(effective_lsn) => { + let pages = smallvec::smallvec![(tag, blknum)]; + let res = self + .get_rel_page_at_lsn_batched(pages, effective_lsn, ctx) + .await; + assert_eq!(res.len(), 1); + res.into_iter().next().unwrap() + } + Version::Modified(modification) => { + if tag.relnode == 0 { + return Err(PageReconstructError::Other( + RelationError::InvalidRelnode.into(), + )); + } + + let nblocks = self.get_rel_size(tag, version, ctx).await?; + if blknum >= nblocks { + debug!( + "read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page", + tag, + blknum, + version.get_lsn(), + nblocks + ); + return Ok(ZERO_PAGE.clone()); + } + + let key = rel_block_to_key(tag, blknum); + modification.get(key, ctx).await + } + } + } + + /// Like [`Self::get_rel_page_at_lsn`], but returns a batch of pages. + /// + /// The ordering of the returned vec corresponds to the ordering of `pages`. + pub(crate) async fn get_rel_page_at_lsn_batched( + &self, + pages: smallvec::SmallVec<[(RelTag, BlockNumber); 1]>, + effective_lsn: Lsn, + ctx: &RequestContext, + ) -> Vec> { + debug_assert_current_span_has_tenant_and_timeline_id(); + + let mut slots_filled = 0; + let page_count = pages.len(); + + // Would be nice to use smallvec here but it doesn't provide the spare_capacity_mut() API. + let mut result = Vec::with_capacity(pages.len()); + let result_slots = result.spare_capacity_mut(); + + let mut keys_slots: BTreeMap> = BTreeMap::default(); + for (response_slot_idx, (tag, blknum)) in pages.into_iter().enumerate() { + if tag.relnode == 0 { + result_slots[response_slot_idx].write(Err(PageReconstructError::Other( + RelationError::InvalidRelnode.into(), + ))); + + slots_filled += 1; + continue; + } + + let nblocks = match self + .get_rel_size(tag, Version::Lsn(effective_lsn), ctx) + .await + { + Ok(nblocks) => nblocks, + Err(err) => { + result_slots[response_slot_idx].write(Err(err)); + slots_filled += 1; + continue; + } + }; + + if blknum >= nblocks { + debug!( + "read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page", + tag, blknum, effective_lsn, nblocks + ); + result_slots[response_slot_idx].write(Ok(ZERO_PAGE.clone())); + slots_filled += 1; + continue; + } + + let key = rel_block_to_key(tag, blknum); + + let key_slots = keys_slots.entry(key).or_default(); + key_slots.push(response_slot_idx); } - let nblocks = self.get_rel_size(tag, version, ctx).await?; - if blknum >= nblocks { - debug!( - "read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page", - tag, - blknum, - version.get_lsn(), - nblocks - ); - return Ok(ZERO_PAGE.clone()); + let keyspace = { + // add_key requires monotonicity + let mut acc = KeySpaceAccum::new(); + for key in keys_slots + .keys() + // in fact it requires strong monotonicity + .dedup() + { + acc.add_key(*key); + } + acc.to_keyspace() + }; + + match self.get_vectored(keyspace, effective_lsn, ctx).await { + Ok(results) => { + for (key, res) in results { + let mut key_slots = keys_slots.remove(&key).unwrap().into_iter(); + let first_slot = key_slots.next().unwrap(); + + for slot in key_slots { + let clone = match &res { + Ok(buf) => Ok(buf.clone()), + Err(err) => Err(match err { + PageReconstructError::Cancelled => { + PageReconstructError::Cancelled + } + + x @ PageReconstructError::Other(_) | + x @ PageReconstructError::AncestorLsnTimeout(_) | + x @ PageReconstructError::WalRedo(_) | + x @ PageReconstructError::MissingKey(_) => { + PageReconstructError::Other(anyhow::anyhow!("there was more than one request for this key in the batch, error logged once: {x:?}")) + }, + }), + }; + + result_slots[slot].write(clone); + slots_filled += 1; + } + + result_slots[first_slot].write(res); + slots_filled += 1; + } + } + Err(err) => { + // this cannot really happen because get_vectored only errors globally on invalid LSN or too large batch size + // (We enforce the max batch size outside of this function, in the code that constructs the batch request.) + for slot in keys_slots.values().flatten() { + // this whole `match` is a lot like `From for PageReconstructError` + // but without taking ownership of the GetVectoredError + let err = match &err { + GetVectoredError::Cancelled => { + Err(PageReconstructError::Cancelled) + } + // TODO: restructure get_vectored API to make this error per-key + GetVectoredError::MissingKey(err) => { + Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more of the requested keys were missing: {err:?}"))) + } + // TODO: restructure get_vectored API to make this error per-key + GetVectoredError::GetReadyAncestorError(err) => { + Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}"))) + } + // TODO: restructure get_vectored API to make this error per-key + GetVectoredError::Other(err) => { + Err(PageReconstructError::Other( + anyhow::anyhow!("whole vectored get request failed: {err:?}"), + )) + } + // TODO: we can prevent this error class by moving this check into the type system + GetVectoredError::InvalidLsn(e) => { + Err(anyhow::anyhow!("invalid LSN: {e:?}").into()) + } + // NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS + // TODO: we can prevent this error class by moving this check into the type system + GetVectoredError::Oversized(err) => { + Err(anyhow::anyhow!( + "batching oversized: {err:?}" + ) + .into()) + } + }; + + result_slots[*slot].write(err); + } + + slots_filled += keys_slots.values().map(|slots| slots.len()).sum::(); + } + }; + + assert_eq!(slots_filled, page_count); + // SAFETY: + // 1. `result` and any of its uninint members are not read from until this point + // 2. The length below is tracked at run-time and matches the number of requested pages. + unsafe { + result.set_len(page_count); } - let key = rel_block_to_key(tag, blknum); - version.get(self, key, ctx).await + result } // Get size of a database in blocks diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index c6fc3bfe6c..8e9e3890ba 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -39,6 +39,7 @@ use remote_timeline_client::UploadQueueNotReadyError; use std::collections::BTreeMap; use std::fmt; use std::future::Future; +use std::sync::atomic::AtomicBool; use std::sync::Weak; use std::time::SystemTime; use storage_broker::BrokerClientChannel; @@ -248,7 +249,8 @@ struct TimelinePreload { pub(crate) struct TenantPreload { tenant_manifest: TenantManifest, - timelines: HashMap, + /// Map from timeline ID to a possible timeline preload. It is None iff the timeline is offloaded according to the manifest. + timelines: HashMap>, } /// When we spawn a tenant, there is a special mode for tenant creation that @@ -524,6 +526,9 @@ pub struct OffloadedTimeline { /// Prevent two tasks from deleting the timeline at the same time. If held, the /// timeline is being deleted. If 'true', the timeline has already been deleted. pub delete_progress: TimelineDeleteProgress, + + /// Part of the `OffloadedTimeline` object's lifecycle: this needs to be set before we drop it + pub deleted_from_ancestor: AtomicBool, } impl OffloadedTimeline { @@ -533,9 +538,16 @@ impl OffloadedTimeline { /// the timeline is not in a stopped state. /// Panics if the timeline is not archived. fn from_timeline(timeline: &Timeline) -> Result { - let ancestor_retain_lsn = timeline - .get_ancestor_timeline_id() - .map(|_timeline_id| timeline.get_ancestor_lsn()); + let (ancestor_retain_lsn, ancestor_timeline_id) = + if let Some(ancestor_timeline) = timeline.ancestor_timeline() { + let ancestor_lsn = timeline.get_ancestor_lsn(); + let ancestor_timeline_id = ancestor_timeline.timeline_id; + let mut gc_info = ancestor_timeline.gc_info.write().unwrap(); + gc_info.insert_child(timeline.timeline_id, ancestor_lsn, MaybeOffloaded::Yes); + (Some(ancestor_lsn), Some(ancestor_timeline_id)) + } else { + (None, None) + }; let archived_at = timeline .remote_client .archived_at_stopped_queue()? @@ -543,14 +555,17 @@ impl OffloadedTimeline { Ok(Self { tenant_shard_id: timeline.tenant_shard_id, timeline_id: timeline.timeline_id, - ancestor_timeline_id: timeline.get_ancestor_timeline_id(), + ancestor_timeline_id, ancestor_retain_lsn, archived_at, delete_progress: timeline.delete_progress.clone(), + deleted_from_ancestor: AtomicBool::new(false), }) } fn from_manifest(tenant_shard_id: TenantShardId, manifest: &OffloadedTimelineManifest) -> Self { + // We expect to reach this case in tenant loading, where the `retain_lsn` is populated in the parent's `gc_info` + // by the `initialize_gc_info` function. let OffloadedTimelineManifest { timeline_id, ancestor_timeline_id, @@ -564,6 +579,7 @@ impl OffloadedTimeline { ancestor_retain_lsn, archived_at, delete_progress: TimelineDeleteProgress::default(), + deleted_from_ancestor: AtomicBool::new(false), } } fn manifest(&self) -> OffloadedTimelineManifest { @@ -581,6 +597,37 @@ impl OffloadedTimeline { archived_at: *archived_at, } } + /// Delete this timeline's retain_lsn from its ancestor, if present in the given tenant + fn delete_from_ancestor_with_timelines( + &self, + timelines: &std::sync::MutexGuard<'_, HashMap>>, + ) { + if let (Some(_retain_lsn), Some(ancestor_timeline_id)) = + (self.ancestor_retain_lsn, self.ancestor_timeline_id) + { + if let Some((_, ancestor_timeline)) = timelines + .iter() + .find(|(tid, _tl)| **tid == ancestor_timeline_id) + { + let removal_happened = ancestor_timeline + .gc_info + .write() + .unwrap() + .remove_child_offloaded(self.timeline_id); + if !removal_happened { + tracing::error!(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id, + "Couldn't remove retain_lsn entry from offloaded timeline's parent: already removed"); + } + } + } + self.deleted_from_ancestor.store(true, Ordering::Release); + } + /// Call [`Self::delete_from_ancestor_with_timelines`] instead if possible. + /// + /// As the entire tenant is being dropped, don't bother deregistering the `retain_lsn` from the ancestor. + fn defuse_for_tenant_drop(&self) { + self.deleted_from_ancestor.store(true, Ordering::Release); + } } impl fmt::Debug for OffloadedTimeline { @@ -589,6 +636,17 @@ impl fmt::Debug for OffloadedTimeline { } } +impl Drop for OffloadedTimeline { + fn drop(&mut self) { + if !self.deleted_from_ancestor.load(Ordering::Acquire) { + tracing::warn!( + "offloaded timeline {} was dropped without having cleaned it up at the ancestor", + self.timeline_id + ); + } + } +} + #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] pub enum MaybeOffloaded { Yes, @@ -1340,7 +1398,7 @@ impl Tenant { // Get list of remote timelines // download index files for every tenant timeline info!("listing remote timelines"); - let (remote_timeline_ids, other_keys) = remote_timeline_client::list_remote_timelines( + let (mut remote_timeline_ids, other_keys) = remote_timeline_client::list_remote_timelines( remote_storage, self.tenant_shard_id, cancel.clone(), @@ -1374,11 +1432,27 @@ impl Tenant { warn!("Unexpected non timeline key {k}"); } + // Avoid downloading IndexPart of offloaded timelines. + let mut offloaded_with_prefix = HashSet::new(); + for offloaded in tenant_manifest.offloaded_timelines.iter() { + if remote_timeline_ids.remove(&offloaded.timeline_id) { + offloaded_with_prefix.insert(offloaded.timeline_id); + } else { + // We'll take care later of timelines in the manifest without a prefix + } + } + + let timelines = self + .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel) + .await?; + Ok(TenantPreload { tenant_manifest, - timelines: self - .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel) - .await?, + timelines: timelines + .into_iter() + .map(|(id, tl)| (id, Some(tl))) + .chain(offloaded_with_prefix.into_iter().map(|id| (id, None))) + .collect(), }) } @@ -1409,6 +1483,19 @@ impl Tenant { offloaded_timelines_list.push((timeline_id, Arc::new(offloaded_timeline))); offloaded_timeline_ids.insert(timeline_id); } + // Complete deletions for offloaded timeline id's from manifest. + // The manifest will be uploaded later in this function. + offloaded_timelines_list + .retain(|(offloaded_id, offloaded)| { + // Existence of a timeline is finally determined by the existence of an index-part.json in remote storage. + // If there is dangling references in another location, they need to be cleaned up. + let delete = !preload.timelines.contains_key(offloaded_id); + if delete { + tracing::info!("Removing offloaded timeline {offloaded_id} from manifest as no remote prefix was found"); + offloaded.defuse_for_tenant_drop(); + } + !delete + }); let mut timelines_to_resume_deletions = vec![]; @@ -1416,10 +1503,9 @@ impl Tenant { let mut timeline_ancestors = HashMap::new(); let mut existent_timelines = HashSet::new(); for (timeline_id, preload) in preload.timelines { - if offloaded_timeline_ids.remove(&timeline_id) { - // The timeline is offloaded, skip loading it. - continue; - } + let Some(preload) = preload else { continue }; + // This is an invariant of the `preload` function's API + assert!(!offloaded_timeline_ids.contains(&timeline_id)); let index_part = match preload.index_part { Ok(i) => { debug!("remote index part exists for timeline {timeline_id}"); @@ -1529,30 +1615,13 @@ impl Tenant { .context("resume_deletion") .map_err(LoadLocalTimelineError::ResumeDeletion)?; } - // Complete deletions for offloaded timeline id's. - offloaded_timelines_list - .retain(|(offloaded_id, _offloaded)| { - // At this point, offloaded_timeline_ids has the list of all offloaded timelines - // without a prefix in S3, so they are inexistent. - // In the end, existence of a timeline is finally determined by the existence of an index-part.json in remote storage. - // If there is a dangling reference in another location, they need to be cleaned up. - let delete = offloaded_timeline_ids.contains(offloaded_id); - if delete { - tracing::info!("Removing offloaded timeline {offloaded_id} from manifest as no remote prefix was found"); - } - !delete - }); - if !offloaded_timelines_list.is_empty() { - tracing::info!( - "Tenant has {} offloaded timelines", - offloaded_timelines_list.len() - ); - } + let needs_manifest_upload = + offloaded_timelines_list.len() != preload.tenant_manifest.offloaded_timelines.len(); { let mut offloaded_timelines_accessor = self.timelines_offloaded.lock().unwrap(); offloaded_timelines_accessor.extend(offloaded_timelines_list.into_iter()); } - if !offloaded_timeline_ids.is_empty() { + if needs_manifest_upload { self.store_tenant_manifest().await?; } @@ -1927,9 +1996,15 @@ impl Tenant { ))); }; let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap(); - if offloaded_timelines.remove(&timeline_id).is_none() { - warn!("timeline already removed from offloaded timelines"); + match offloaded_timelines.remove(&timeline_id) { + Some(offloaded) => { + offloaded.delete_from_ancestor_with_timelines(&timelines); + } + None => warn!("timeline already removed from offloaded timelines"), } + + self.initialize_gc_info(&timelines, &offloaded_timelines, Some(timeline_id)); + Arc::clone(timeline) }; @@ -2382,6 +2457,12 @@ impl Tenant { .remote_client .wait_completion() .await + .map_err(|e| match e { + WaitCompletionError::NotInitialized( + e, // If the queue is already stopped, it's a shutdown error. + ) if e.is_stopping() => CreateTimelineError::ShuttingDown, + e => CreateTimelineError::Other(e.into()), + }) .context("wait for timeline initial uploads to complete")?; // The creating task is responsible for activating the timeline. @@ -2667,7 +2748,7 @@ impl Tenant { .filter(|timeline| !(timeline.is_broken() || timeline.is_stopping())); // Before activation, populate each Timeline's GcInfo with information about its children - self.initialize_gc_info(&timelines_accessor, &timelines_offloaded_accessor); + self.initialize_gc_info(&timelines_accessor, &timelines_offloaded_accessor, None); // Spawn gc and compaction loops. The loops will shut themselves // down when they notice that the tenant is inactive. @@ -2782,8 +2863,14 @@ impl Tenant { let timeline_id = timeline.timeline_id; let span = tracing::info_span!("timeline_shutdown", %timeline_id, ?shutdown_mode); js.spawn(async move { timeline.shutdown(shutdown_mode).instrument(span).await }); - }) - }; + }); + } + { + let timelines_offloaded = self.timelines_offloaded.lock().unwrap(); + timelines_offloaded.values().for_each(|timeline| { + timeline.defuse_for_tenant_drop(); + }); + } // test_long_timeline_create_then_tenant_delete is leaning on this message tracing::info!("Waiting for timelines..."); while let Some(res) = js.join_next().await { @@ -3767,10 +3854,13 @@ impl Tenant { &self, timelines: &std::sync::MutexGuard>>, timelines_offloaded: &std::sync::MutexGuard>>, + restrict_to_timeline: Option, ) { - // This function must be called before activation: after activation timeline create/delete operations - // might happen, and this function is not safe to run concurrently with those. - assert!(!self.is_active()); + if restrict_to_timeline.is_none() { + // This function must be called before activation: after activation timeline create/delete operations + // might happen, and this function is not safe to run concurrently with those. + assert!(!self.is_active()); + } // Scan all timelines. For each timeline, remember the timeline ID and // the branch point where it was created. @@ -3803,7 +3893,12 @@ impl Tenant { let horizon = self.get_gc_horizon(); // Populate each timeline's GcInfo with information about its child branches - for timeline in timelines.values() { + let timelines_to_write = if let Some(timeline_id) = restrict_to_timeline { + itertools::Either::Left(timelines.get(&timeline_id).into_iter()) + } else { + itertools::Either::Right(timelines.values()) + }; + for timeline in timelines_to_write { let mut branchpoints: Vec<(Lsn, TimelineId, MaybeOffloaded)> = all_branchpoints .remove(&timeline.timeline_id) .unwrap_or_default(); @@ -5170,7 +5265,7 @@ mod tests { use storage_layer::PersistentLayerKey; use tests::storage_layer::ValuesReconstructState; use tests::timeline::{GetVectoredError, ShutdownMode}; - use timeline::DeltaLayerTestDesc; + use timeline::{CompactOptions, DeltaLayerTestDesc}; use utils::id::TenantId; #[cfg(feature = "testing")] @@ -7644,7 +7739,7 @@ mod tests { let cancel = CancellationToken::new(); tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); @@ -7721,7 +7816,7 @@ mod tests { guard.cutoffs.space = Lsn(0x40); } tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); @@ -8153,7 +8248,7 @@ mod tests { let cancel = CancellationToken::new(); tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); @@ -8182,7 +8277,7 @@ mod tests { guard.cutoffs.space = Lsn(0x40); } tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); @@ -8735,7 +8830,14 @@ mod tests { dryrun_flags.insert(CompactFlags::DryRun); tline - .compact_with_gc(&cancel, dryrun_flags, &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: dryrun_flags, + compact_range: None, + }, + &ctx, + ) .await .unwrap(); // We expect layer map to be the same b/c the dry run flag, but we don't know whether there will be other background jobs @@ -8743,14 +8845,14 @@ mod tests { verify_result().await; tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; // compact again tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; @@ -8763,14 +8865,14 @@ mod tests { guard.cutoffs.space = Lsn(0x38); } tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result // not increasing the GC horizon and compact again tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; @@ -8964,7 +9066,14 @@ mod tests { dryrun_flags.insert(CompactFlags::DryRun); tline - .compact_with_gc(&cancel, dryrun_flags, &ctx) + .compact_with_gc( + &cancel, + CompactOptions { + flags: dryrun_flags, + compact_range: None, + }, + &ctx, + ) .await .unwrap(); // We expect layer map to be the same b/c the dry run flag, but we don't know whether there will be other background jobs @@ -8972,14 +9081,14 @@ mod tests { verify_result().await; tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; // compact again tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); verify_result().await; @@ -9164,7 +9273,7 @@ mod tests { let cancel = CancellationToken::new(); branch_tline - .compact_with_gc(&cancel, EnumSet::new(), &ctx) + .compact_with_gc(&cancel, CompactOptions::default(), &ctx) .await .unwrap(); @@ -9650,4 +9759,54 @@ mod tests { Ok(()) } + + #[cfg(feature = "testing")] + #[tokio::test] + async fn test_timeline_offload_retain_lsn() -> anyhow::Result<()> { + let harness = TenantHarness::create("test_timeline_offload_retain_lsn") + .await + .unwrap(); + let (tenant, ctx) = harness.load().await; + let tline_parent = tenant + .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) + .await + .unwrap(); + let tline_child = tenant + .branch_timeline_test(&tline_parent, NEW_TIMELINE_ID, Some(Lsn(0x20)), &ctx) + .await + .unwrap(); + { + let gc_info_parent = tline_parent.gc_info.read().unwrap(); + assert_eq!( + gc_info_parent.retain_lsns, + vec![(Lsn(0x20), tline_child.timeline_id, MaybeOffloaded::No)] + ); + } + // We have to directly call the remote_client instead of using the archive function to avoid constructing broker client... + tline_child + .remote_client + .schedule_index_upload_for_timeline_archival_state(TimelineArchivalState::Archived) + .unwrap(); + tline_child.remote_client.wait_completion().await.unwrap(); + offload_timeline(&tenant, &tline_child) + .instrument(tracing::info_span!(parent: None, "offload_test", tenant_id=%"test", shard_id=%"test", timeline_id=%"test")) + .await.unwrap(); + let child_timeline_id = tline_child.timeline_id; + Arc::try_unwrap(tline_child).unwrap(); + + { + let gc_info_parent = tline_parent.gc_info.read().unwrap(); + assert_eq!( + gc_info_parent.retain_lsns, + vec![(Lsn(0x20), child_timeline_id, MaybeOffloaded::Yes)] + ); + } + + tenant + .get_offloaded_timeline(child_timeline_id) + .unwrap() + .defuse_for_tenant_drop(); + + Ok(()) + } } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 09ddb19765..0eb3de21e9 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -477,8 +477,31 @@ impl GcInfo { self.retain_lsns.sort_by_key(|i| i.0); } - pub(super) fn remove_child(&mut self, child_id: TimelineId) { - self.retain_lsns.retain(|i| i.1 != child_id); + pub(super) fn remove_child_maybe_offloaded( + &mut self, + child_id: TimelineId, + maybe_offloaded: MaybeOffloaded, + ) -> bool { + // Remove at most one element. Needed for correctness if there is two live `Timeline` objects referencing + // the same timeline. Shouldn't but maybe can occur when Arc's live longer than intended. + let mut removed = false; + self.retain_lsns.retain(|i| { + if removed { + return true; + } + let remove = i.1 == child_id && i.2 == maybe_offloaded; + removed |= remove; + !remove + }); + removed + } + + pub(super) fn remove_child_not_offloaded(&mut self, child_id: TimelineId) -> bool { + self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::No) + } + + pub(super) fn remove_child_offloaded(&mut self, child_id: TimelineId) -> bool { + self.remove_child_maybe_offloaded(child_id, MaybeOffloaded::Yes) } } @@ -751,6 +774,21 @@ pub(crate) enum CompactFlags { DryRun, } +#[serde_with::serde_as] +#[derive(Debug, Clone, serde::Deserialize)] +pub(crate) struct CompactRange { + #[serde_as(as = "serde_with::DisplayFromStr")] + pub start: Key, + #[serde_as(as = "serde_with::DisplayFromStr")] + pub end: Key, +} + +#[derive(Clone, Default)] +pub(crate) struct CompactOptions { + pub flags: EnumSet, + pub compact_range: Option, +} + impl std::fmt::Debug for Timeline { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "Timeline<{}>", self.timeline_id) @@ -1589,6 +1627,25 @@ impl Timeline { cancel: &CancellationToken, flags: EnumSet, ctx: &RequestContext, + ) -> Result { + self.compact_with_options( + cancel, + CompactOptions { + flags, + compact_range: None, + }, + ctx, + ) + .await + } + + /// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending + /// compaction tasks. + pub(crate) async fn compact_with_options( + self: &Arc, + cancel: &CancellationToken, + options: CompactOptions, + ctx: &RequestContext, ) -> Result { // most likely the cancellation token is from background task, but in tests it could be the // request task as well. @@ -1626,7 +1683,7 @@ impl Timeline { self.compact_tiered(cancel, ctx).await?; Ok(false) } - CompactionAlgorithm::Legacy => self.compact_legacy(cancel, flags, ctx).await, + CompactionAlgorithm::Legacy => self.compact_legacy(cancel, options, ctx).await, } } @@ -4501,7 +4558,10 @@ impl Drop for Timeline { // This lock should never be poisoned, but in case it is we do a .map() instead of // an unwrap(), to avoid panicking in a destructor and thereby aborting the process. if let Ok(mut gc_info) = ancestor.gc_info.write() { - gc_info.remove_child(self.timeline_id) + if !gc_info.remove_child_not_offloaded(self.timeline_id) { + tracing::error!(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id, + "Couldn't remove retain_lsn entry from offloaded timeline's parent: already removed"); + } } } } @@ -5030,7 +5090,7 @@ impl Timeline { // 1. Is it newer than GC horizon cutoff point? if l.get_lsn_range().end > space_cutoff { - debug!( + info!( "keeping {} because it's newer than space_cutoff {}", l.layer_name(), space_cutoff, @@ -5041,7 +5101,7 @@ impl Timeline { // 2. It is newer than PiTR cutoff point? if l.get_lsn_range().end > time_cutoff { - debug!( + info!( "keeping {} because it's newer than time_cutoff {}", l.layer_name(), time_cutoff, @@ -5060,7 +5120,7 @@ impl Timeline { for retain_lsn in &retain_lsns { // start_lsn is inclusive if &l.get_lsn_range().start <= retain_lsn { - debug!( + info!( "keeping {} because it's still might be referenced by child branch forked at {} is_dropped: xx is_incremental: {}", l.layer_name(), retain_lsn, @@ -5075,7 +5135,7 @@ impl Timeline { if let Some(lsn) = &max_lsn_with_valid_lease { // keep if layer start <= any of the lease if &l.get_lsn_range().start <= lsn { - debug!( + info!( "keeping {} because there is a valid lease preventing GC at {}", l.layer_name(), lsn, @@ -5107,13 +5167,13 @@ impl Timeline { if !layers .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff)) { - debug!("keeping {} because it is the latest layer", l.layer_name()); + info!("keeping {} because it is the latest layer", l.layer_name()); result.layers_not_updated += 1; continue 'outer; } // We didn't find any reason to keep this file, so remove it. - debug!( + info!( "garbage collecting {} is_dropped: xx is_incremental: {}", l.layer_name(), l.is_incremental(), diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index e6ef1aae2b..ecd68ba55e 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -10,7 +10,7 @@ use std::sync::Arc; use super::layer_manager::LayerManager; use super::{ - CompactFlags, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode, + CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, ImageLayerCreationMode, RecordedDuration, Timeline, }; @@ -273,22 +273,32 @@ impl Timeline { pub(crate) async fn compact_legacy( self: &Arc, cancel: &CancellationToken, - flags: EnumSet, + options: CompactOptions, ctx: &RequestContext, ) -> Result { - if flags.contains(CompactFlags::EnhancedGcBottomMostCompaction) { - self.compact_with_gc(cancel, flags, ctx) + if options + .flags + .contains(CompactFlags::EnhancedGcBottomMostCompaction) + { + self.compact_with_gc(cancel, options, ctx) .await .map_err(CompactionError::Other)?; return Ok(false); } - if flags.contains(CompactFlags::DryRun) { + if options.flags.contains(CompactFlags::DryRun) { return Err(CompactionError::Other(anyhow!( "dry-run mode is not supported for legacy compaction for now" ))); } + if options.compact_range.is_some() { + // maybe useful in the future? could implement this at some point + return Err(CompactionError::Other(anyhow!( + "compaction range is not supported for legacy compaction for now" + ))); + } + // High level strategy for compaction / image creation: // // 1. First, calculate the desired "partitioning" of the @@ -338,7 +348,7 @@ impl Timeline { .repartition( self.get_last_record_lsn(), self.get_compaction_target_size(), - flags, + options.flags, ctx, ) .await @@ -354,7 +364,7 @@ impl Timeline { let fully_compacted = self .compact_level0( target_file_size, - flags.contains(CompactFlags::ForceL0Compaction), + options.flags.contains(CompactFlags::ForceL0Compaction), ctx, ) .await?; @@ -372,7 +382,10 @@ impl Timeline { .create_image_layers( &partitioning, lsn, - if flags.contains(CompactFlags::ForceImageLayerCreation) { + if options + .flags + .contains(CompactFlags::ForceImageLayerCreation) + { ImageLayerCreationMode::Force } else { ImageLayerCreationMode::Try @@ -1736,11 +1749,19 @@ impl Timeline { pub(crate) async fn compact_with_gc( self: &Arc, cancel: &CancellationToken, - flags: EnumSet, + options: CompactOptions, ctx: &RequestContext, ) -> anyhow::Result<()> { - self.partial_compact_with_gc(Key::MIN..Key::MAX, cancel, flags, ctx) - .await + self.partial_compact_with_gc( + options + .compact_range + .map(|range| range.start..range.end) + .unwrap_or_else(|| Key::MIN..Key::MAX), + cancel, + options.flags, + ctx, + ) + .await } /// An experimental compaction building block that combines compaction with garbage collection. @@ -2021,6 +2042,14 @@ impl Timeline { if cancel.is_cancelled() { return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error } + if self.shard_identity.is_key_disposable(&key) { + // If this shard does not need to store this key, simply skip it. + // + // This is not handled in the filter iterator because shard is determined by hash. + // Therefore, it does not give us any performance benefit to do things like skip + // a whole layer file as handling key spaces (ranges). + continue; + } if !job_desc.compaction_key_range.contains(&key) { if !desc.is_delta { continue; diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index 69001a6c40..13a8dfa51a 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -141,9 +141,10 @@ async fn remove_maybe_offloaded_timeline_from_tenant( ); } TimelineOrOffloaded::Offloaded(timeline) => { - timelines_offloaded + let offloaded_timeline = timelines_offloaded .remove(&timeline.timeline_id) .expect("timeline that we were deleting was concurrently removed from 'timelines_offloaded' map"); + offloaded_timeline.delete_from_ancestor_with_timelines(&timelines); } } diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs index 1394843467..3595d743bc 100644 --- a/pageserver/src/tenant/timeline/offload.rs +++ b/pageserver/src/tenant/timeline/offload.rs @@ -66,7 +66,7 @@ pub(crate) async fn offload_timeline( let conf = &tenant.conf; delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await; - remove_timeline_from_tenant(tenant, &timeline, &guard); + let remaining_refcount = remove_timeline_from_tenant(tenant, &timeline, &guard); { let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap(); @@ -87,16 +87,20 @@ pub(crate) async fn offload_timeline( // not our actual state of offloaded timelines. tenant.store_tenant_manifest().await?; + tracing::info!("Timeline offload complete (remaining arc refcount: {remaining_refcount})"); + Ok(()) } /// It is important that this gets called when DeletionGuard is being held. /// For more context see comments in [`DeleteTimelineFlow::prepare`] +/// +/// Returns the strong count of the timeline `Arc` fn remove_timeline_from_tenant( tenant: &Tenant, timeline: &Timeline, _: &DeletionGuard, // using it as a witness -) { +) -> usize { // Remove the timeline from the map. let mut timelines = tenant.timelines.lock().unwrap(); let children_exist = timelines @@ -109,7 +113,9 @@ fn remove_timeline_from_tenant( panic!("Timeline grew children while we removed layer files"); } - timelines + let timeline = timelines .remove(&timeline.timeline_id) .expect("timeline that we were deleting was concurrently removed from 'timelines' map"); + + Arc::strong_count(&timeline) } diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs index 34bf959058..6ac6920d47 100644 --- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs +++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs @@ -331,11 +331,11 @@ pub(super) async fn handle_walreceiver_connection( Ok(()) } - while let Some((record_end_lsn, recdata)) = waldecoder.poll_decode()? { + while let Some((next_record_lsn, recdata)) = waldecoder.poll_decode()? { // It is important to deal with the aligned records as lsn in getPage@LSN is // aligned and can be several bytes bigger. Without this alignment we are // at risk of hitting a deadlock. - if !record_end_lsn.is_aligned() { + if !next_record_lsn.is_aligned() { return Err(WalReceiverError::Other(anyhow!("LSN not aligned"))); } @@ -343,7 +343,7 @@ pub(super) async fn handle_walreceiver_connection( let interpreted = InterpretedWalRecord::from_bytes_filtered( recdata, modification.tline.get_shard_identity(), - record_end_lsn, + next_record_lsn, modification.tline.pg_version, )?; @@ -367,10 +367,10 @@ pub(super) async fn handle_walreceiver_connection( .ingest_record(interpreted, &mut modification, &ctx) .await .with_context(|| { - format!("could not ingest record at {record_end_lsn}") + format!("could not ingest record at {next_record_lsn}") })?; if !ingested { - tracing::debug!("ingest: filtered out record @ LSN {record_end_lsn}"); + tracing::debug!("ingest: filtered out record @ LSN {next_record_lsn}"); WAL_INGEST.records_filtered.inc(); filtered_records += 1; } @@ -380,7 +380,7 @@ pub(super) async fn handle_walreceiver_connection( // to timeout the tests. fail_point!("walreceiver-after-ingest"); - last_rec_lsn = record_end_lsn; + last_rec_lsn = next_record_lsn; // Commit every ingest_batch_size records. Even if we filtered out // all records, we still need to call commit to advance the LSN. diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index daa8b99ab0..b9f8c7ea20 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -175,10 +175,16 @@ impl VirtualFile { } pub async fn sync_all(&self) -> Result<(), Error> { + if SYNC_MODE.load(std::sync::atomic::Ordering::Relaxed) == SyncMode::UnsafeNoSync as u8 { + return Ok(()); + } self.inner.sync_all().await } pub async fn sync_data(&self) -> Result<(), Error> { + if SYNC_MODE.load(std::sync::atomic::Ordering::Relaxed) == SyncMode::UnsafeNoSync as u8 { + return Ok(()); + } self.inner.sync_data().await } @@ -233,6 +239,27 @@ impl VirtualFile { } } +/// Indicates whether to enable fsync, fdatasync, or O_SYNC/O_DSYNC when writing +/// files. Switching this off is unsafe and only used for testing on machines +/// with slow drives. +#[repr(u8)] +pub enum SyncMode { + Sync, + UnsafeNoSync, +} + +impl TryFrom for SyncMode { + type Error = u8; + + fn try_from(value: u8) -> Result { + Ok(match value { + v if v == (SyncMode::Sync as u8) => SyncMode::Sync, + v if v == (SyncMode::UnsafeNoSync as u8) => SyncMode::UnsafeNoSync, + x => return Err(x), + }) + } +} + /// /// A virtual file descriptor. You can use this just like std::fs::File, but internally /// the underlying file is closed if the system is low on file descriptors, @@ -1332,12 +1359,13 @@ impl OpenFiles { /// server startup. /// #[cfg(not(test))] -pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoMode) { +pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoMode, sync_mode: SyncMode) { if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() { panic!("virtual_file::init called twice"); } set_io_mode(mode); io_engine::init(engine); + SYNC_MODE.store(sync_mode as u8, std::sync::atomic::Ordering::Relaxed); crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64); } @@ -1379,6 +1407,9 @@ pub(crate) fn set_io_mode(mode: IoMode) { pub(crate) fn get_io_mode() -> IoMode { IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap() } + +static SYNC_MODE: AtomicU8 = AtomicU8::new(SyncMode::Sync as u8); + #[cfg(test)] mod tests { use crate::context::DownloadBehavior; diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index 84e553f330..ad6ccbc854 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -154,7 +154,7 @@ impl WalIngest { WAL_INGEST.records_received.inc(); let prev_len = modification.len(); - modification.set_lsn(interpreted.end_lsn)?; + modification.set_lsn(interpreted.next_record_lsn)?; if matches!(interpreted.flush_uncommitted, FlushUncommittedRecords::Yes) { // Records of this type should always be preceded by a commit(), as they @@ -1528,6 +1528,11 @@ mod tests { assert_current_logical_size(&tline, Lsn(0x50)); + let test_span = tracing::info_span!(parent: None, "test", + tenant_id=%tline.tenant_shard_id.tenant_id, + shard_id=%tline.tenant_shard_id.shard_slug(), + timeline_id=%tline.timeline_id); + // The relation was created at LSN 2, not visible at LSN 1 yet. assert_eq!( tline @@ -1562,6 +1567,7 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x20)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 0 at 2") ); @@ -1569,6 +1575,7 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x30)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 0 at 3") ); @@ -1576,12 +1583,14 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x40)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 0 at 3") ); assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x40)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 1 at 4") ); @@ -1589,18 +1598,21 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x50)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 0 at 3") ); assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x50)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 1 at 4") ); assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 2, Version::Lsn(Lsn(0x50)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 2 at 5") ); @@ -1623,12 +1635,14 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x60)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 0 at 3") ); assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x60)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 1 at 4") ); @@ -1643,6 +1657,7 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 2, Version::Lsn(Lsn(0x50)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 2 at 5") ); @@ -1675,12 +1690,14 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 0, Version::Lsn(Lsn(0x70)), &ctx) + .instrument(test_span.clone()) .await?, ZERO_PAGE ); assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 1, Version::Lsn(Lsn(0x70)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 1") ); @@ -1701,6 +1718,7 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, blk, Version::Lsn(Lsn(0x80)), &ctx) + .instrument(test_span.clone()) .await?, ZERO_PAGE ); @@ -1708,6 +1726,7 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, 1500, Version::Lsn(Lsn(0x80)), &ctx) + .instrument(test_span.clone()) .await?, test_img("foo blk 1500") ); @@ -1815,6 +1834,11 @@ mod tests { } m.commit(&ctx).await?; + let test_span = tracing::info_span!(parent: None, "test", + tenant_id=%tline.tenant_shard_id.tenant_id, + shard_id=%tline.tenant_shard_id.shard_slug(), + timeline_id=%tline.timeline_id); + // The relation was created at LSN 20, not visible at LSN 1 yet. assert_eq!( tline @@ -1847,6 +1871,7 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(lsn), &ctx) + .instrument(test_span.clone()) .await?, test_img(&data) ); @@ -1874,6 +1899,7 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x60)), &ctx) + .instrument(test_span.clone()) .await?, test_img(&data) ); @@ -1892,6 +1918,7 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x50)), &ctx) + .instrument(test_span.clone()) .await?, test_img(&data) ); @@ -1928,6 +1955,7 @@ mod tests { assert_eq!( tline .get_rel_page_at_lsn(TESTREL_A, blkno, Version::Lsn(Lsn(0x80)), &ctx) + .instrument(test_span.clone()) .await?, test_img(&data) ); diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c index dc87d79e87..51b9f58bbc 100644 --- a/pgxn/neon/neon.c +++ b/pgxn/neon/neon.c @@ -421,9 +421,7 @@ _PG_init(void) pg_init_libpagestore(); pg_init_walproposer(); - WalSender_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines; - LogicalFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines; - SlotFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines; + Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines; InitUnstableExtensionsSupport(); InitLogicalReplicationMonitor(); @@ -512,7 +510,7 @@ neon_shmem_startup_hook(void) if (prev_shmem_startup_hook) prev_shmem_startup_hook(); -#if PG_PG_MAJORVERSION_NUM >= 17 +#if PG_MAJORVERSION_NUM >= 17 WAIT_EVENT_NEON_LFC_MAINTENANCE = WaitEventExtensionNew("Neon/FileCache_Maintenance"); WAIT_EVENT_NEON_LFC_READ = WaitEventExtensionNew("Neon/FileCache_Read"); WAIT_EVENT_NEON_LFC_TRUNCATE = WaitEventExtensionNew("Neon/FileCache_Truncate"); diff --git a/pgxn/neon_walredo/walredoproc.c b/pgxn/neon_walredo/walredoproc.c index 37abb3fa03..619b7255ae 100644 --- a/pgxn/neon_walredo/walredoproc.c +++ b/pgxn/neon_walredo/walredoproc.c @@ -453,7 +453,6 @@ WalRedoMain(int argc, char *argv[]) static void CreateFakeSharedMemoryAndSemaphores(void) { - PGShmemHeader *shim = NULL; PGShmemHeader *hdr; Size size; int numSemas; @@ -486,7 +485,6 @@ CreateFakeSharedMemoryAndSemaphores(void) hdr->totalsize = size; hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader)); - shim = hdr; UsedShmemSegAddr = hdr; UsedShmemSegID = (unsigned long) 42; /* not relevant for non-shared memory */ } @@ -523,8 +521,6 @@ CreateFakeSharedMemoryAndSemaphores(void) */ InitShmemIndex(); - dsm_shmem_init(); - /* * Set up xlog, clog, and buffers */ @@ -599,10 +595,6 @@ CreateFakeSharedMemoryAndSemaphores(void) ShmemBackendArrayAllocation(); #endif - /* Initialize dynamic shared memory facilities. */ - if (!IsUnderPostmaster) - dsm_postmaster_startup(shim); - /* * Now give loadable modules a chance to set up their shmem allocations */ diff --git a/poetry.lock b/poetry.lock index d869761e8e..6171f92391 100644 --- a/poetry.lock +++ b/poetry.lock @@ -13,97 +13,112 @@ files = [ [[package]] name = "aiohttp" -version = "3.10.2" +version = "3.10.11" description = "Async http client/server framework (asyncio)" optional = false python-versions = ">=3.8" files = [ - {file = "aiohttp-3.10.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:95213b3d79c7e387144e9cb7b9d2809092d6ff2c044cb59033aedc612f38fb6d"}, - {file = "aiohttp-3.10.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1aa005f060aff7124cfadaa2493f00a4e28ed41b232add5869e129a2e395935a"}, - {file = "aiohttp-3.10.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eabe6bf4c199687592f5de4ccd383945f485779c7ffb62a9b9f1f8a3f9756df8"}, - {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96e010736fc16d21125c7e2dc5c350cd43c528b85085c04bf73a77be328fe944"}, - {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99f81f9c1529fd8e03be4a7bd7df32d14b4f856e90ef6e9cbad3415dbfa9166c"}, - {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d611d1a01c25277bcdea06879afbc11472e33ce842322496b211319aa95441bb"}, - {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e00191d38156e09e8c81ef3d75c0d70d4f209b8381e71622165f22ef7da6f101"}, - {file = "aiohttp-3.10.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74c091a5ded6cb81785de2d7a8ab703731f26de910dbe0f3934eabef4ae417cc"}, - {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:18186a80ec5a701816adbf1d779926e1069392cf18504528d6e52e14b5920525"}, - {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5a7ceb2a0d2280f23a02c64cd0afdc922079bb950400c3dd13a1ab2988428aac"}, - {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8bd7be6ff6c162a60cb8fce65ee879a684fbb63d5466aba3fa5b9288eb04aefa"}, - {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:fae962b62944eaebff4f4fddcf1a69de919e7b967136a318533d82d93c3c6bd1"}, - {file = "aiohttp-3.10.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a0fde16d284efcacbe15fb0c1013f0967b6c3e379649239d783868230bf1db42"}, - {file = "aiohttp-3.10.2-cp310-cp310-win32.whl", hash = "sha256:f81cd85a0e76ec7b8e2b6636fe02952d35befda4196b8c88f3cec5b4fb512839"}, - {file = "aiohttp-3.10.2-cp310-cp310-win_amd64.whl", hash = "sha256:54ba10eb5a3481c28282eb6afb5f709aedf53cf9c3a31875ffbdc9fc719ffd67"}, - {file = "aiohttp-3.10.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:87fab7f948e407444c2f57088286e00e2ed0003ceaf3d8f8cc0f60544ba61d91"}, - {file = "aiohttp-3.10.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ec6ad66ed660d46503243cbec7b2b3d8ddfa020f984209b3b8ef7d98ce69c3f2"}, - {file = "aiohttp-3.10.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a4be88807283bd96ae7b8e401abde4ca0bab597ba73b5e9a2d98f36d451e9aac"}, - {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01c98041f90927c2cbd72c22a164bb816fa3010a047d264969cf82e1d4bcf8d1"}, - {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54e36c67e1a9273ecafab18d6693da0fb5ac48fd48417e4548ac24a918c20998"}, - {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7de3ddb6f424af54535424082a1b5d1ae8caf8256ebd445be68c31c662354720"}, - {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dd9c7db94b4692b827ce51dcee597d61a0e4f4661162424faf65106775b40e7"}, - {file = "aiohttp-3.10.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e57e21e1167705f8482ca29cc5d02702208d8bf4aff58f766d94bcd6ead838cd"}, - {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a1a50e59b720060c29e2951fd9f13c01e1ea9492e5a527b92cfe04dd64453c16"}, - {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:686c87782481fda5ee6ba572d912a5c26d9f98cc5c243ebd03f95222af3f1b0f"}, - {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:dafb4abb257c0ed56dc36f4e928a7341b34b1379bd87e5a15ce5d883c2c90574"}, - {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:494a6f77560e02bd7d1ab579fdf8192390567fc96a603f21370f6e63690b7f3d"}, - {file = "aiohttp-3.10.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6fe8503b1b917508cc68bf44dae28823ac05e9f091021e0c41f806ebbb23f92f"}, - {file = "aiohttp-3.10.2-cp311-cp311-win32.whl", hash = "sha256:4ddb43d06ce786221c0dfd3c91b4892c318eaa36b903f7c4278e7e2fa0dd5102"}, - {file = "aiohttp-3.10.2-cp311-cp311-win_amd64.whl", hash = "sha256:ca2f5abcb0a9a47e56bac173c01e9f6c6e7f27534d91451c5f22e6a35a5a2093"}, - {file = "aiohttp-3.10.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:14eb6b17f6246959fb0b035d4f4ae52caa870c4edfb6170aad14c0de5bfbf478"}, - {file = "aiohttp-3.10.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:465e445ec348d4e4bd349edd8b22db75f025da9d7b6dc1369c48e7935b85581e"}, - {file = "aiohttp-3.10.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:341f8ece0276a828d95b70cd265d20e257f5132b46bf77d759d7f4e0443f2906"}, - {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c01fbb87b5426381cd9418b3ddcf4fc107e296fa2d3446c18ce6c76642f340a3"}, - {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c474af073e1a6763e1c5522bbb2d85ff8318197e4c6c919b8d7886e16213345"}, - {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d9076810a5621236e29b2204e67a68e1fe317c8727ee4c9abbfbb1083b442c38"}, - {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8f515d6859e673940e08de3922b9c4a2249653b0ac181169313bd6e4b1978ac"}, - {file = "aiohttp-3.10.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:655e583afc639bef06f3b2446972c1726007a21003cd0ef57116a123e44601bc"}, - {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8da9449a575133828cc99985536552ea2dcd690e848f9d41b48d8853a149a959"}, - {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:19073d57d0feb1865d12361e2a1f5a49cb764bf81a4024a3b608ab521568093a"}, - {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c8e98e1845805f184d91fda6f9ab93d7c7b0dddf1c07e0255924bfdb151a8d05"}, - {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:377220a5efde6f9497c5b74649b8c261d3cce8a84cb661be2ed8099a2196400a"}, - {file = "aiohttp-3.10.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:92f7f4a4dc9cdb5980973a74d43cdbb16286dacf8d1896b6c3023b8ba8436f8e"}, - {file = "aiohttp-3.10.2-cp312-cp312-win32.whl", hash = "sha256:9bb2834a6f11d65374ce97d366d6311a9155ef92c4f0cee543b2155d06dc921f"}, - {file = "aiohttp-3.10.2-cp312-cp312-win_amd64.whl", hash = "sha256:518dc3cb37365255708283d1c1c54485bbacccd84f0a0fb87ed8917ba45eda5b"}, - {file = "aiohttp-3.10.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:7f98e70bbbf693086efe4b86d381efad8edac040b8ad02821453083d15ec315f"}, - {file = "aiohttp-3.10.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9f6f0b252a009e98fe84028a4ec48396a948e7a65b8be06ccfc6ef68cf1f614d"}, - {file = "aiohttp-3.10.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9360e3ffc7b23565600e729e8c639c3c50d5520e05fdf94aa2bd859eef12c407"}, - {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3988044d1635c7821dd44f0edfbe47e9875427464e59d548aece447f8c22800a"}, - {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30a9d59da1543a6f1478c3436fd49ec59be3868bca561a33778b4391005e499d"}, - {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f49bdb94809ac56e09a310a62f33e5f22973d6fd351aac72a39cd551e98194"}, - {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddfd2dca3f11c365d6857a07e7d12985afc59798458a2fdb2ffa4a0332a3fd43"}, - {file = "aiohttp-3.10.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:685c1508ec97b2cd3e120bfe309a4ff8e852e8a7460f1ef1de00c2c0ed01e33c"}, - {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:49904f38667c44c041a0b44c474b3ae36948d16a0398a8f8cd84e2bb3c42a069"}, - {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:352f3a4e5f11f3241a49b6a48bc5b935fabc35d1165fa0d87f3ca99c1fcca98b"}, - {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:fc61f39b534c5d5903490478a0dd349df397d2284a939aa3cbaa2fb7a19b8397"}, - {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:ad2274e707be37420d0b6c3d26a8115295fe9d8e6e530fa6a42487a8ca3ad052"}, - {file = "aiohttp-3.10.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c836bf3c7512100219fe1123743fd8dd9a2b50dd7cfb0c3bb10d041309acab4b"}, - {file = "aiohttp-3.10.2-cp38-cp38-win32.whl", hash = "sha256:53e8898adda402be03ff164b0878abe2d884e3ea03a4701e6ad55399d84b92dc"}, - {file = "aiohttp-3.10.2-cp38-cp38-win_amd64.whl", hash = "sha256:7cc8f65f5b22304693de05a245b6736b14cb5bc9c8a03da6e2ae9ef15f8b458f"}, - {file = "aiohttp-3.10.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9dfc906d656e14004c5bc672399c1cccc10db38df2b62a13fb2b6e165a81c316"}, - {file = "aiohttp-3.10.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:91b10208b222ddf655c3a3d5b727879d7163db12b634492df41a9182a76edaae"}, - {file = "aiohttp-3.10.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fd16b5e1a7bdd14668cd6bde60a2a29b49147a535c74f50d8177d11b38433a7"}, - {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2bfdda4971bd79201f59adbad24ec2728875237e1c83bba5221284dbbf57bda"}, - {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:69d73f869cf29e8a373127fc378014e2b17bcfbe8d89134bc6fb06a2f67f3cb3"}, - {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df59f8486507c421c0620a2c3dce81fbf1d54018dc20ff4fecdb2c106d6e6abc"}, - {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0df930015db36b460aa9badbf35eccbc383f00d52d4b6f3de2ccb57d064a6ade"}, - {file = "aiohttp-3.10.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:562b1153ab7f766ee6b8b357ec777a302770ad017cf18505d34f1c088fccc448"}, - {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d984db6d855de58e0fde1ef908d48fe9a634cadb3cf715962722b4da1c40619d"}, - {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:14dc3fcb0d877911d775d511eb617a486a8c48afca0a887276e63db04d3ee920"}, - {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b52a27a5c97275e254704e1049f4b96a81e67d6205f52fa37a4777d55b0e98ef"}, - {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:cd33d9de8cfd006a0d0fe85f49b4183c57e91d18ffb7e9004ce855e81928f704"}, - {file = "aiohttp-3.10.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1238fc979160bc03a92fff9ad021375ff1c8799c6aacb0d8ea1b357ea40932bb"}, - {file = "aiohttp-3.10.2-cp39-cp39-win32.whl", hash = "sha256:e2f43d238eae4f0b04f58d4c0df4615697d4ca3e9f9b1963d49555a94f0f5a04"}, - {file = "aiohttp-3.10.2-cp39-cp39-win_amd64.whl", hash = "sha256:947847f07a8f81d7b39b2d0202fd73e61962ebe17ac2d8566f260679e467da7b"}, - {file = "aiohttp-3.10.2.tar.gz", hash = "sha256:4d1f694b5d6e459352e5e925a42e05bac66655bfde44d81c59992463d2897014"}, + {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5077b1a5f40ffa3ba1f40d537d3bec4383988ee51fbba6b74aa8fb1bc466599e"}, + {file = "aiohttp-3.10.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8d6a14a4d93b5b3c2891fca94fa9d41b2322a68194422bef0dd5ec1e57d7d298"}, + {file = "aiohttp-3.10.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ffbfde2443696345e23a3c597049b1dd43049bb65337837574205e7368472177"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20b3d9e416774d41813bc02fdc0663379c01817b0874b932b81c7f777f67b217"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b943011b45ee6bf74b22245c6faab736363678e910504dd7531a58c76c9015a"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48bc1d924490f0d0b3658fe5c4b081a4d56ebb58af80a6729d4bd13ea569797a"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e12eb3f4b1f72aaaf6acd27d045753b18101524f72ae071ae1c91c1cd44ef115"}, + {file = "aiohttp-3.10.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f14ebc419a568c2eff3c1ed35f634435c24ead2fe19c07426af41e7adb68713a"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:72b191cdf35a518bfc7ca87d770d30941decc5aaf897ec8b484eb5cc8c7706f3"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5ab2328a61fdc86424ee540d0aeb8b73bbcad7351fb7cf7a6546fc0bcffa0038"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:aa93063d4af05c49276cf14e419550a3f45258b6b9d1f16403e777f1addf4519"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:30283f9d0ce420363c24c5c2421e71a738a2155f10adbb1a11a4d4d6d2715cfc"}, + {file = "aiohttp-3.10.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e5358addc8044ee49143c546d2182c15b4ac3a60be01c3209374ace05af5733d"}, + {file = "aiohttp-3.10.11-cp310-cp310-win32.whl", hash = "sha256:e1ffa713d3ea7cdcd4aea9cddccab41edf6882fa9552940344c44e59652e1120"}, + {file = "aiohttp-3.10.11-cp310-cp310-win_amd64.whl", hash = "sha256:778cbd01f18ff78b5dd23c77eb82987ee4ba23408cbed233009fd570dda7e674"}, + {file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:80ff08556c7f59a7972b1e8919f62e9c069c33566a6d28586771711e0eea4f07"}, + {file = "aiohttp-3.10.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c8f96e9ee19f04c4914e4e7a42a60861066d3e1abf05c726f38d9d0a466e695"}, + {file = "aiohttp-3.10.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fb8601394d537da9221947b5d6e62b064c9a43e88a1ecd7414d21a1a6fba9c24"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ea224cf7bc2d8856d6971cea73b1d50c9c51d36971faf1abc169a0d5f85a382"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db9503f79e12d5d80b3efd4d01312853565c05367493379df76d2674af881caa"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0f449a50cc33f0384f633894d8d3cd020e3ccef81879c6e6245c3c375c448625"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82052be3e6d9e0c123499127782a01a2b224b8af8c62ab46b3f6197035ad94e9"}, + {file = "aiohttp-3.10.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:20063c7acf1eec550c8eb098deb5ed9e1bb0521613b03bb93644b810986027ac"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:489cced07a4c11488f47aab1f00d0c572506883f877af100a38f1fedaa884c3a"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ea9b3bab329aeaa603ed3bf605f1e2a6f36496ad7e0e1aa42025f368ee2dc07b"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ca117819d8ad113413016cb29774b3f6d99ad23c220069789fc050267b786c16"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2dfb612dcbe70fb7cdcf3499e8d483079b89749c857a8f6e80263b021745c730"}, + {file = "aiohttp-3.10.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f9b615d3da0d60e7d53c62e22b4fd1c70f4ae5993a44687b011ea3a2e49051b8"}, + {file = "aiohttp-3.10.11-cp311-cp311-win32.whl", hash = "sha256:29103f9099b6068bbdf44d6a3d090e0a0b2be6d3c9f16a070dd9d0d910ec08f9"}, + {file = "aiohttp-3.10.11-cp311-cp311-win_amd64.whl", hash = "sha256:236b28ceb79532da85d59aa9b9bf873b364e27a0acb2ceaba475dc61cffb6f3f"}, + {file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7480519f70e32bfb101d71fb9a1f330fbd291655a4c1c922232a48c458c52710"}, + {file = "aiohttp-3.10.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f65267266c9aeb2287a6622ee2bb39490292552f9fbf851baabc04c9f84e048d"}, + {file = "aiohttp-3.10.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7400a93d629a0608dc1d6c55f1e3d6e07f7375745aaa8bd7f085571e4d1cee97"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f34b97e4b11b8d4eb2c3a4f975be626cc8af99ff479da7de49ac2c6d02d35725"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e7b825da878464a252ccff2958838f9caa82f32a8dbc334eb9b34a026e2c636"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f9f92a344c50b9667827da308473005f34767b6a2a60d9acff56ae94f895f385"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc6f1ab987a27b83c5268a17218463c2ec08dbb754195113867a27b166cd6087"}, + {file = "aiohttp-3.10.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1dc0f4ca54842173d03322793ebcf2c8cc2d34ae91cc762478e295d8e361e03f"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7ce6a51469bfaacff146e59e7fb61c9c23006495d11cc24c514a455032bcfa03"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:aad3cd91d484d065ede16f3cf15408254e2469e3f613b241a1db552c5eb7ab7d"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f4df4b8ca97f658c880fb4b90b1d1ec528315d4030af1ec763247ebfd33d8b9a"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2e4e18a0a2d03531edbc06c366954e40a3f8d2a88d2b936bbe78a0c75a3aab3e"}, + {file = "aiohttp-3.10.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ce66780fa1a20e45bc753cda2a149daa6dbf1561fc1289fa0c308391c7bc0a4"}, + {file = "aiohttp-3.10.11-cp312-cp312-win32.whl", hash = "sha256:a919c8957695ea4c0e7a3e8d16494e3477b86f33067478f43106921c2fef15bb"}, + {file = "aiohttp-3.10.11-cp312-cp312-win_amd64.whl", hash = "sha256:b5e29706e6389a2283a91611c91bf24f218962717c8f3b4e528ef529d112ee27"}, + {file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:703938e22434d7d14ec22f9f310559331f455018389222eed132808cd8f44127"}, + {file = "aiohttp-3.10.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9bc50b63648840854e00084c2b43035a62e033cb9b06d8c22b409d56eb098413"}, + {file = "aiohttp-3.10.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f0463bf8b0754bc744e1feb61590706823795041e63edf30118a6f0bf577461"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6c6dec398ac5a87cb3a407b068e1106b20ef001c344e34154616183fe684288"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bcaf2d79104d53d4dcf934f7ce76d3d155302d07dae24dff6c9fffd217568067"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:25fd5470922091b5a9aeeb7e75be609e16b4fba81cdeaf12981393fb240dd10e"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbde2ca67230923a42161b1f408c3992ae6e0be782dca0c44cb3206bf330dee1"}, + {file = "aiohttp-3.10.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:249c8ff8d26a8b41a0f12f9df804e7c685ca35a207e2410adbd3e924217b9006"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:878ca6a931ee8c486a8f7b432b65431d095c522cbeb34892bee5be97b3481d0f"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8663f7777ce775f0413324be0d96d9730959b2ca73d9b7e2c2c90539139cbdd6"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6cd3f10b01f0c31481fba8d302b61603a2acb37b9d30e1d14e0f5a58b7b18a31"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:4e8d8aad9402d3aa02fdc5ca2fe68bcb9fdfe1f77b40b10410a94c7f408b664d"}, + {file = "aiohttp-3.10.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:38e3c4f80196b4f6c3a85d134a534a56f52da9cb8d8e7af1b79a32eefee73a00"}, + {file = "aiohttp-3.10.11-cp313-cp313-win32.whl", hash = "sha256:fc31820cfc3b2863c6e95e14fcf815dc7afe52480b4dc03393c4873bb5599f71"}, + {file = "aiohttp-3.10.11-cp313-cp313-win_amd64.whl", hash = "sha256:4996ff1345704ffdd6d75fb06ed175938c133425af616142e7187f28dc75f14e"}, + {file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:74baf1a7d948b3d640badeac333af581a367ab916b37e44cf90a0334157cdfd2"}, + {file = "aiohttp-3.10.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:473aebc3b871646e1940c05268d451f2543a1d209f47035b594b9d4e91ce8339"}, + {file = "aiohttp-3.10.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c2f746a6968c54ab2186574e15c3f14f3e7f67aef12b761e043b33b89c5b5f95"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d110cabad8360ffa0dec8f6ec60e43286e9d251e77db4763a87dcfe55b4adb92"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0099c7d5d7afff4202a0c670e5b723f7718810000b4abcbc96b064129e64bc7"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0316e624b754dbbf8c872b62fe6dcb395ef20c70e59890dfa0de9eafccd2849d"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a5f7ab8baf13314e6b2485965cbacb94afff1e93466ac4d06a47a81c50f9cca"}, + {file = "aiohttp-3.10.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c891011e76041e6508cbfc469dd1a8ea09bc24e87e4c204e05f150c4c455a5fa"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9208299251370ee815473270c52cd3f7069ee9ed348d941d574d1457d2c73e8b"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:459f0f32c8356e8125f45eeff0ecf2b1cb6db1551304972702f34cd9e6c44658"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:14cdc8c1810bbd4b4b9f142eeee23cda528ae4e57ea0923551a9af4820980e39"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:971aa438a29701d4b34e4943e91b5e984c3ae6ccbf80dd9efaffb01bd0b243a9"}, + {file = "aiohttp-3.10.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:9a309c5de392dfe0f32ee57fa43ed8fc6ddf9985425e84bd51ed66bb16bce3a7"}, + {file = "aiohttp-3.10.11-cp38-cp38-win32.whl", hash = "sha256:9ec1628180241d906a0840b38f162a3215114b14541f1a8711c368a8739a9be4"}, + {file = "aiohttp-3.10.11-cp38-cp38-win_amd64.whl", hash = "sha256:9c6e0ffd52c929f985c7258f83185d17c76d4275ad22e90aa29f38e211aacbec"}, + {file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc493a2e5d8dc79b2df5bec9558425bcd39aff59fc949810cbd0832e294b106"}, + {file = "aiohttp-3.10.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b3e70f24e7d0405be2348da9d5a7836936bf3a9b4fd210f8c37e8d48bc32eca6"}, + {file = "aiohttp-3.10.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:968b8fb2a5eee2770eda9c7b5581587ef9b96fbdf8dcabc6b446d35ccc69df01"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deef4362af9493d1382ef86732ee2e4cbc0d7c005947bd54ad1a9a16dd59298e"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:686b03196976e327412a1b094f4120778c7c4b9cff9bce8d2fdfeca386b89829"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3bf6d027d9d1d34e1c2e1645f18a6498c98d634f8e373395221121f1c258ace8"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:099fd126bf960f96d34a760e747a629c27fb3634da5d05c7ef4d35ef4ea519fc"}, + {file = "aiohttp-3.10.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c73c4d3dae0b4644bc21e3de546530531d6cdc88659cdeb6579cd627d3c206aa"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0c5580f3c51eea91559db3facd45d72e7ec970b04528b4709b1f9c2555bd6d0b"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fdf6429f0caabfd8a30c4e2eaecb547b3c340e4730ebfe25139779b9815ba138"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d97187de3c276263db3564bb9d9fad9e15b51ea10a371ffa5947a5ba93ad6777"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:0acafb350cfb2eba70eb5d271f55e08bd4502ec35e964e18ad3e7d34d71f7261"}, + {file = "aiohttp-3.10.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c13ed0c779911c7998a58e7848954bd4d63df3e3575f591e321b19a2aec8df9f"}, + {file = "aiohttp-3.10.11-cp39-cp39-win32.whl", hash = "sha256:22b7c540c55909140f63ab4f54ec2c20d2635c0289cdd8006da46f3327f971b9"}, + {file = "aiohttp-3.10.11-cp39-cp39-win_amd64.whl", hash = "sha256:7b26b1551e481012575dab8e3727b16fe7dd27eb2711d2e63ced7368756268fb"}, + {file = "aiohttp-3.10.11.tar.gz", hash = "sha256:9dc2b8f3dcab2e39e0fa309c8da50c3b55e6f34ab25f1a71d3288f24924d33a7"}, ] [package.dependencies] aiohappyeyeballs = ">=2.3.0" aiosignal = ">=1.1.2" -async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} +async-timeout = {version = ">=4.0,<6.0", markers = "python_version < \"3.11\""} attrs = ">=17.3.0" frozenlist = ">=1.1.1" multidict = ">=4.5,<7.0" -yarl = ">=1.0,<2.0" +yarl = ">=1.12.0,<2.0" [package.extras] speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] @@ -2078,6 +2093,113 @@ files = [ [package.extras] twisted = ["twisted"] +[[package]] +name = "propcache" +version = "0.2.0" +description = "Accelerated property cache" +optional = false +python-versions = ">=3.8" +files = [ + {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"}, + {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"}, + {file = "propcache-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:33ac8f098df0585c0b53009f039dfd913b38c1d2edafed0cedcc0c32a05aa110"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e48e8875e6c13909c800fa344cd54cc4b2b0db1d5f911f840458a500fde2c2"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388f3217649d6d59292b722d940d4d2e1e6a7003259eb835724092a1cca0203a"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f571aea50ba5623c308aa146eb650eebf7dbe0fd8c5d946e28343cb3b5aad577"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3dfafb44f7bb35c0c06eda6b2ab4bfd58f02729e7c4045e179f9a861b07c9850"}, + {file = "propcache-0.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3ebe9a75be7ab0b7da2464a77bb27febcb4fab46a34f9288f39d74833db7f61"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d2f0d0f976985f85dfb5f3d685697ef769faa6b71993b46b295cdbbd6be8cc37"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a3dc1a4b165283bd865e8f8cb5f0c64c05001e0718ed06250d8cac9bec115b48"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9e0f07b42d2a50c7dd2d8675d50f7343d998c64008f1da5fef888396b7f84630"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e63e3e1e0271f374ed489ff5ee73d4b6e7c60710e1f76af5f0e1a6117cd26394"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:56bb5c98f058a41bb58eead194b4db8c05b088c93d94d5161728515bd52b052b"}, + {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7665f04d0c7f26ff8bb534e1c65068409bf4687aa2534faf7104d7182debb336"}, + {file = "propcache-0.2.0-cp310-cp310-win32.whl", hash = "sha256:7cf18abf9764746b9c8704774d8b06714bcb0a63641518a3a89c7f85cc02c2ad"}, + {file = "propcache-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:cfac69017ef97db2438efb854edf24f5a29fd09a536ff3a992b75990720cdc99"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:63f13bf09cc3336eb04a837490b8f332e0db41da66995c9fd1ba04552e516354"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608cce1da6f2672a56b24a015b42db4ac612ee709f3d29f27a00c943d9e851de"}, + {file = "propcache-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:466c219deee4536fbc83c08d09115249db301550625c7fef1c5563a584c9bc87"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc2db02409338bf36590aa985a461b2c96fce91f8e7e0f14c50c5fcc4f229016"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6ed8db0a556343d566a5c124ee483ae113acc9a557a807d439bcecc44e7dfbb"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91997d9cb4a325b60d4e3f20967f8eb08dfcb32b22554d5ef78e6fd1dda743a2"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c7dde9e533c0a49d802b4f3f218fa9ad0a1ce21f2c2eb80d5216565202acab4"}, + {file = "propcache-0.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffcad6c564fe6b9b8916c1aefbb37a362deebf9394bd2974e9d84232e3e08504"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:97a58a28bcf63284e8b4d7b460cbee1edaab24634e82059c7b8c09e65284f178"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:945db8ee295d3af9dbdbb698cce9bbc5c59b5c3fe328bbc4387f59a8a35f998d"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39e104da444a34830751715f45ef9fc537475ba21b7f1f5b0f4d71a3b60d7fe2"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c5ecca8f9bab618340c8e848d340baf68bcd8ad90a8ecd7a4524a81c1764b3db"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c436130cc779806bdf5d5fae0d848713105472b8566b75ff70048c47d3961c5b"}, + {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:191db28dc6dcd29d1a3e063c3be0b40688ed76434622c53a284e5427565bbd9b"}, + {file = "propcache-0.2.0-cp311-cp311-win32.whl", hash = "sha256:5f2564ec89058ee7c7989a7b719115bdfe2a2fb8e7a4543b8d1c0cc4cf6478c1"}, + {file = "propcache-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e2e54267980349b723cff366d1e29b138b9a60fa376664a157a342689553f71"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ee7606193fb267be4b2e3b32714f2d58cad27217638db98a60f9efb5efeccc2"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:91ee8fc02ca52e24bcb77b234f22afc03288e1dafbb1f88fe24db308910c4ac7"}, + {file = "propcache-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e900bad2a8456d00a113cad8c13343f3b1f327534e3589acc2219729237a2e8"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f52a68c21363c45297aca15561812d542f8fc683c85201df0bebe209e349f793"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e41d67757ff4fbc8ef2af99b338bfb955010444b92929e9e55a6d4dcc3c4f09"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a64e32f8bd94c105cc27f42d3b658902b5bcc947ece3c8fe7bc1b05982f60e89"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55346705687dbd7ef0d77883ab4f6fabc48232f587925bdaf95219bae072491e"}, + {file = "propcache-0.2.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00181262b17e517df2cd85656fcd6b4e70946fe62cd625b9d74ac9977b64d8d9"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6994984550eaf25dd7fc7bd1b700ff45c894149341725bb4edc67f0ffa94efa4"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:56295eb1e5f3aecd516d91b00cfd8bf3a13991de5a479df9e27dd569ea23959c"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:439e76255daa0f8151d3cb325f6dd4a3e93043e6403e6491813bcaaaa8733887"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f6475a1b2ecb310c98c28d271a30df74f9dd436ee46d09236a6b750a7599ce57"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3444cdba6628accf384e349014084b1cacd866fbb88433cd9d279d90a54e0b23"}, + {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4a9d9b4d0a9b38d1c391bb4ad24aa65f306c6f01b512e10a8a34a2dc5675d348"}, + {file = "propcache-0.2.0-cp312-cp312-win32.whl", hash = "sha256:69d3a98eebae99a420d4b28756c8ce6ea5a29291baf2dc9ff9414b42676f61d5"}, + {file = "propcache-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad9c9b99b05f163109466638bd30ada1722abb01bbb85c739c50b6dc11f92dc3"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ecddc221a077a8132cf7c747d5352a15ed763b674c0448d811f408bf803d9ad7"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0e53cb83fdd61cbd67202735e6a6687a7b491c8742dfc39c9e01e80354956763"}, + {file = "propcache-0.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92fe151145a990c22cbccf9ae15cae8ae9eddabfc949a219c9f667877e40853d"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a21ef516d36909931a2967621eecb256018aeb11fc48656e3257e73e2e247a"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f88a4095e913f98988f5b338c1d4d5d07dbb0b6bad19892fd447484e483ba6b"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a5b3bb545ead161be780ee85a2b54fdf7092815995661947812dde94a40f6fb"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67aeb72e0f482709991aa91345a831d0b707d16b0257e8ef88a2ad246a7280bf"}, + {file = "propcache-0.2.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c997f8c44ec9b9b0bcbf2d422cc00a1d9b9c681f56efa6ca149a941e5560da2"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a66df3d4992bc1d725b9aa803e8c5a66c010c65c741ad901e260ece77f58d2f"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:3ebbcf2a07621f29638799828b8d8668c421bfb94c6cb04269130d8de4fb7136"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1235c01ddaa80da8235741e80815ce381c5267f96cc49b1477fdcf8c047ef325"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3947483a381259c06921612550867b37d22e1df6d6d7e8361264b6d037595f44"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d5bed7f9805cc29c780f3aee05de3262ee7ce1f47083cfe9f77471e9d6777e83"}, + {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4a91d44379f45f5e540971d41e4626dacd7f01004826a18cb048e7da7e96544"}, + {file = "propcache-0.2.0-cp313-cp313-win32.whl", hash = "sha256:f902804113e032e2cdf8c71015651c97af6418363bea8d78dc0911d56c335032"}, + {file = "propcache-0.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:8f188cfcc64fb1266f4684206c9de0e80f54622c3f22a910cbd200478aeae61e"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:53d1bd3f979ed529f0805dd35ddaca330f80a9a6d90bc0121d2ff398f8ed8861"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:83928404adf8fb3d26793665633ea79b7361efa0287dfbd372a7e74311d51ee6"}, + {file = "propcache-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77a86c261679ea5f3896ec060be9dc8e365788248cc1e049632a1be682442063"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:218db2a3c297a3768c11a34812e63b3ac1c3234c3a086def9c0fee50d35add1f"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7735e82e3498c27bcb2d17cb65d62c14f1100b71723b68362872bca7d0913d90"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:20a617c776f520c3875cf4511e0d1db847a076d720714ae35ffe0df3e440be68"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67b69535c870670c9f9b14a75d28baa32221d06f6b6fa6f77a0a13c5a7b0a5b9"}, + {file = "propcache-0.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4569158070180c3855e9c0791c56be3ceeb192defa2cdf6a3f39e54319e56b89"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:db47514ffdbd91ccdc7e6f8407aac4ee94cc871b15b577c1c324236b013ddd04"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:2a60ad3e2553a74168d275a0ef35e8c0a965448ffbc3b300ab3a5bb9956c2162"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:662dd62358bdeaca0aee5761de8727cfd6861432e3bb828dc2a693aa0471a563"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:25a1f88b471b3bc911d18b935ecb7115dff3a192b6fef46f0bfaf71ff4f12418"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:f60f0ac7005b9f5a6091009b09a419ace1610e163fa5deaba5ce3484341840e7"}, + {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:74acd6e291f885678631b7ebc85d2d4aec458dd849b8c841b57ef04047833bed"}, + {file = "propcache-0.2.0-cp38-cp38-win32.whl", hash = "sha256:d9b6ddac6408194e934002a69bcaadbc88c10b5f38fb9307779d1c629181815d"}, + {file = "propcache-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:676135dcf3262c9c5081cc8f19ad55c8a64e3f7282a21266d05544450bffc3a5"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:25c8d773a62ce0451b020c7b29a35cfbc05de8b291163a7a0f3b7904f27253e6"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:375a12d7556d462dc64d70475a9ee5982465fbb3d2b364f16b86ba9135793638"}, + {file = "propcache-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1ec43d76b9677637a89d6ab86e1fef70d739217fefa208c65352ecf0282be957"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f45eec587dafd4b2d41ac189c2156461ebd0c1082d2fe7013571598abb8505d1"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc092ba439d91df90aea38168e11f75c655880c12782facf5cf9c00f3d42b562"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa1076244f54bb76e65e22cb6910365779d5c3d71d1f18b275f1dfc7b0d71b4d"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:682a7c79a2fbf40f5dbb1eb6bfe2cd865376deeac65acf9beb607505dced9e12"}, + {file = "propcache-0.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e40876731f99b6f3c897b66b803c9e1c07a989b366c6b5b475fafd1f7ba3fb8"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:363ea8cd3c5cb6679f1c2f5f1f9669587361c062e4899fce56758efa928728f8"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:140fbf08ab3588b3468932974a9331aff43c0ab8a2ec2c608b6d7d1756dbb6cb"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e70fac33e8b4ac63dfc4c956fd7d85a0b1139adcfc0d964ce288b7c527537fea"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b33d7a286c0dc1a15f5fc864cc48ae92a846df287ceac2dd499926c3801054a6"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f6d5749fdd33d90e34c2efb174c7e236829147a2713334d708746e94c4bde40d"}, + {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22aa8f2272d81d9317ff5756bb108021a056805ce63dd3630e27d042c8092798"}, + {file = "propcache-0.2.0-cp39-cp39-win32.whl", hash = "sha256:73e4b40ea0eda421b115248d7e79b59214411109a5bc47d0d48e4c73e3b8fcf9"}, + {file = "propcache-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:9517d5e9e0731957468c29dbfd0f976736a0e55afaea843726e887f36fe017df"}, + {file = "propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036"}, + {file = "propcache-0.2.0.tar.gz", hash = "sha256:df81779732feb9d01e5d513fad0122efb3d53bbc75f61b2a4f29a020bc985e70"}, +] + [[package]] name = "psutil" version = "5.9.4" @@ -3307,106 +3429,99 @@ files = [ [[package]] name = "yarl" -version = "1.9.4" +version = "1.17.2" description = "Yet another URL library" optional = false -python-versions = ">=3.7" +python-versions = ">=3.9" files = [ - {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"}, - {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"}, - {file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"}, - {file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"}, - {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"}, - {file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"}, - {file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c"}, - {file = "yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42"}, - {file = "yarl-1.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958"}, - {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98"}, - {file = "yarl-1.9.4-cp311-cp311-win32.whl", hash = "sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31"}, - {file = "yarl-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142"}, - {file = "yarl-1.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4"}, - {file = "yarl-1.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc"}, - {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10"}, - {file = "yarl-1.9.4-cp312-cp312-win32.whl", hash = "sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7"}, - {file = "yarl-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984"}, - {file = "yarl-1.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd"}, - {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead"}, - {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434"}, - {file = "yarl-1.9.4-cp37-cp37m-win32.whl", hash = "sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749"}, - {file = "yarl-1.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f"}, - {file = "yarl-1.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130"}, - {file = "yarl-1.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be"}, - {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3"}, - {file = "yarl-1.9.4-cp38-cp38-win32.whl", hash = "sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece"}, - {file = "yarl-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1"}, - {file = "yarl-1.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"}, - {file = "yarl-1.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c"}, - {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0"}, - {file = "yarl-1.9.4-cp39-cp39-win32.whl", hash = "sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575"}, - {file = "yarl-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15"}, - {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"}, - {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"}, + {file = "yarl-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:93771146ef048b34201bfa382c2bf74c524980870bb278e6df515efaf93699ff"}, + {file = "yarl-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8281db240a1616af2f9c5f71d355057e73a1409c4648c8949901396dc0a3c151"}, + {file = "yarl-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:170ed4971bf9058582b01a8338605f4d8c849bd88834061e60e83b52d0c76870"}, + {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc61b005f6521fcc00ca0d1243559a5850b9dd1e1fe07b891410ee8fe192d0c0"}, + {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:871e1b47eec7b6df76b23c642a81db5dd6536cbef26b7e80e7c56c2fd371382e"}, + {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3a58a2f2ca7aaf22b265388d40232f453f67a6def7355a840b98c2d547bd037f"}, + {file = "yarl-1.17.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:736bb076f7299c5c55dfef3eb9e96071a795cb08052822c2bb349b06f4cb2e0a"}, + {file = "yarl-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8fd51299e21da709eabcd5b2dd60e39090804431292daacbee8d3dabe39a6bc0"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:358dc7ddf25e79e1cc8ee16d970c23faee84d532b873519c5036dbb858965795"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:50d866f7b1a3f16f98603e095f24c0eeba25eb508c85a2c5939c8b3870ba2df8"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8b9c4643e7d843a0dca9cd9d610a0876e90a1b2cbc4c5ba7930a0d90baf6903f"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d63123bfd0dce5f91101e77c8a5427c3872501acece8c90df457b486bc1acd47"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:4e76381be3d8ff96a4e6c77815653063e87555981329cf8f85e5be5abf449021"}, + {file = "yarl-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:734144cd2bd633a1516948e477ff6c835041c0536cef1d5b9a823ae29899665b"}, + {file = "yarl-1.17.2-cp310-cp310-win32.whl", hash = "sha256:26bfb6226e0c157af5da16d2d62258f1ac578d2899130a50433ffee4a5dfa673"}, + {file = "yarl-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:76499469dcc24759399accd85ec27f237d52dec300daaca46a5352fcbebb1071"}, + {file = "yarl-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:792155279dc093839e43f85ff7b9b6493a8eaa0af1f94f1f9c6e8f4de8c63500"}, + {file = "yarl-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:38bc4ed5cae853409cb193c87c86cd0bc8d3a70fd2268a9807217b9176093ac6"}, + {file = "yarl-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4a8c83f6fcdc327783bdc737e8e45b2e909b7bd108c4da1892d3bc59c04a6d84"}, + {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6d5fed96f0646bfdf698b0a1cebf32b8aae6892d1bec0c5d2d6e2df44e1e2d"}, + {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:782ca9c58f5c491c7afa55518542b2b005caedaf4685ec814fadfcee51f02493"}, + {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ff6af03cac0d1a4c3c19e5dcc4c05252411bf44ccaa2485e20d0a7c77892ab6e"}, + {file = "yarl-1.17.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a3f47930fbbed0f6377639503848134c4aa25426b08778d641491131351c2c8"}, + {file = "yarl-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1fa68a3c921365c5745b4bd3af6221ae1f0ea1bf04b69e94eda60e57958907f"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:187df91395c11e9f9dc69b38d12406df85aa5865f1766a47907b1cc9855b6303"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:93d1c8cc5bf5df401015c5e2a3ce75a5254a9839e5039c881365d2a9dcfc6dc2"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:11d86c6145ac5c706c53d484784cf504d7d10fa407cb73b9d20f09ff986059ef"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c42774d1d1508ec48c3ed29e7b110e33f5e74a20957ea16197dbcce8be6b52ba"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8e589379ef0407b10bed16cc26e7392ef8f86961a706ade0a22309a45414d7"}, + {file = "yarl-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1056cadd5e850a1c026f28e0704ab0a94daaa8f887ece8dfed30f88befb87bb0"}, + {file = "yarl-1.17.2-cp311-cp311-win32.whl", hash = "sha256:be4c7b1c49d9917c6e95258d3d07f43cfba2c69a6929816e77daf322aaba6628"}, + {file = "yarl-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:ac8eda86cc75859093e9ce390d423aba968f50cf0e481e6c7d7d63f90bae5c9c"}, + {file = "yarl-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:dd90238d3a77a0e07d4d6ffdebc0c21a9787c5953a508a2231b5f191455f31e9"}, + {file = "yarl-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c74f0b0472ac40b04e6d28532f55cac8090e34c3e81f118d12843e6df14d0909"}, + {file = "yarl-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d486ddcaca8c68455aa01cf53d28d413fb41a35afc9f6594a730c9779545876"}, + {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25b7e93f5414b9a983e1a6c1820142c13e1782cc9ed354c25e933aebe97fcf2"}, + {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3a0baff7827a632204060f48dca9e63fbd6a5a0b8790c1a2adfb25dc2c9c0d50"}, + {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:460024cacfc3246cc4d9f47a7fc860e4fcea7d1dc651e1256510d8c3c9c7cde0"}, + {file = "yarl-1.17.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5870d620b23b956f72bafed6a0ba9a62edb5f2ef78a8849b7615bd9433384171"}, + {file = "yarl-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2941756754a10e799e5b87e2319bbec481ed0957421fba0e7b9fb1c11e40509f"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9611b83810a74a46be88847e0ea616794c406dbcb4e25405e52bff8f4bee2d0a"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:cd7e35818d2328b679a13268d9ea505c85cd773572ebb7a0da7ccbca77b6a52e"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6b981316fcd940f085f646b822c2ff2b8b813cbd61281acad229ea3cbaabeb6b"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:688058e89f512fb7541cb85c2f149c292d3fa22f981d5a5453b40c5da49eb9e8"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:56afb44a12b0864d17b597210d63a5b88915d680f6484d8d202ed68ade38673d"}, + {file = "yarl-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:17931dfbb84ae18b287279c1f92b76a3abcd9a49cd69b92e946035cff06bcd20"}, + {file = "yarl-1.17.2-cp312-cp312-win32.whl", hash = "sha256:ff8d95e06546c3a8c188f68040e9d0360feb67ba8498baf018918f669f7bc39b"}, + {file = "yarl-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:4c840cc11163d3c01a9d8aad227683c48cd3e5be5a785921bcc2a8b4b758c4f3"}, + {file = "yarl-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:3294f787a437cb5d81846de3a6697f0c35ecff37a932d73b1fe62490bef69211"}, + {file = "yarl-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f1e7fedb09c059efee2533119666ca7e1a2610072076926fa028c2ba5dfeb78c"}, + {file = "yarl-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:da9d3061e61e5ae3f753654813bc1cd1c70e02fb72cf871bd6daf78443e9e2b1"}, + {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91c012dceadc695ccf69301bfdccd1fc4472ad714fe2dd3c5ab4d2046afddf29"}, + {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f11fd61d72d93ac23718d393d2a64469af40be2116b24da0a4ca6922df26807e"}, + {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46c465ad06971abcf46dd532f77560181387b4eea59084434bdff97524444032"}, + {file = "yarl-1.17.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef6eee1a61638d29cd7c85f7fd3ac7b22b4c0fabc8fd00a712b727a3e73b0685"}, + {file = "yarl-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4434b739a8a101a837caeaa0137e0e38cb4ea561f39cb8960f3b1e7f4967a3fc"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:752485cbbb50c1e20908450ff4f94217acba9358ebdce0d8106510859d6eb19a"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:17791acaa0c0f89323c57da7b9a79f2174e26d5debbc8c02d84ebd80c2b7bff8"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5c6ea72fe619fee5e6b5d4040a451d45d8175f560b11b3d3e044cd24b2720526"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db5ac3871ed76340210fe028f535392f097fb31b875354bcb69162bba2632ef4"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:7a1606ba68e311576bcb1672b2a1543417e7e0aa4c85e9e718ba6466952476c0"}, + {file = "yarl-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9bc27dd5cfdbe3dc7f381b05e6260ca6da41931a6e582267d5ca540270afeeb2"}, + {file = "yarl-1.17.2-cp313-cp313-win32.whl", hash = "sha256:52492b87d5877ec405542f43cd3da80bdcb2d0c2fbc73236526e5f2c28e6db28"}, + {file = "yarl-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:8e1bf59e035534ba4077f5361d8d5d9194149f9ed4f823d1ee29ef3e8964ace3"}, + {file = "yarl-1.17.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c556fbc6820b6e2cda1ca675c5fa5589cf188f8da6b33e9fc05b002e603e44fa"}, + {file = "yarl-1.17.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f2f44a4247461965fed18b2573f3a9eb5e2c3cad225201ee858726cde610daca"}, + {file = "yarl-1.17.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3a3ede8c248f36b60227eb777eac1dbc2f1022dc4d741b177c4379ca8e75571a"}, + {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2654caaf5584449d49c94a6b382b3cb4a246c090e72453493ea168b931206a4d"}, + {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0d41c684f286ce41fa05ab6af70f32d6da1b6f0457459a56cf9e393c1c0b2217"}, + {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2270d590997445a0dc29afa92e5534bfea76ba3aea026289e811bf9ed4b65a7f"}, + {file = "yarl-1.17.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18662443c6c3707e2fc7fad184b4dc32dd428710bbe72e1bce7fe1988d4aa654"}, + {file = "yarl-1.17.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:75ac158560dec3ed72f6d604c81090ec44529cfb8169b05ae6fcb3e986b325d9"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1fee66b32e79264f428dc8da18396ad59cc48eef3c9c13844adec890cd339db5"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:585ce7cd97be8f538345de47b279b879e091c8b86d9dbc6d98a96a7ad78876a3"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c019abc2eca67dfa4d8fb72ba924871d764ec3c92b86d5b53b405ad3d6aa56b0"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c6e659b9a24d145e271c2faf3fa6dd1fcb3e5d3f4e17273d9e0350b6ab0fe6e2"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:d17832ba39374134c10e82d137e372b5f7478c4cceeb19d02ae3e3d1daed8721"}, + {file = "yarl-1.17.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:bc3003710e335e3f842ae3fd78efa55f11a863a89a72e9a07da214db3bf7e1f8"}, + {file = "yarl-1.17.2-cp39-cp39-win32.whl", hash = "sha256:f5ffc6b7ace5b22d9e73b2a4c7305740a339fbd55301d52735f73e21d9eb3130"}, + {file = "yarl-1.17.2-cp39-cp39-win_amd64.whl", hash = "sha256:48e424347a45568413deec6f6ee2d720de2cc0385019bedf44cd93e8638aa0ed"}, + {file = "yarl-1.17.2-py3-none-any.whl", hash = "sha256:dd7abf4f717e33b7487121faf23560b3a50924f80e4bef62b22dab441ded8f3b"}, + {file = "yarl-1.17.2.tar.gz", hash = "sha256:753eaaa0c7195244c84b5cc159dc8204b7fd99f716f11198f999f2332a86b178"}, ] [package.dependencies] idna = ">=2.0" multidict = ">=4.0" +propcache = ">=0.2.0" [[package]] name = "zipp" @@ -3484,4 +3599,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "c656496f9fbb7c29b2df3143c1d72c95b5e121cb6340134c0b8d070f54a08508" +content-hash = "8cb9c38d83eec441391c0528ac2fbefde18c734373b2399e07c69382044e8ced" diff --git a/proxy/src/auth/backend/classic.rs b/proxy/src/auth/backend/classic.rs index 6d26c99832..491b272ac4 100644 --- a/proxy/src/auth/backend/classic.rs +++ b/proxy/src/auth/backend/classic.rs @@ -1,17 +1,17 @@ use tokio::io::{AsyncRead, AsyncWrite}; -use tracing::{info, warn}; +use tracing::{debug, info, warn}; use super::{ComputeCredentials, ComputeUserInfo}; use crate::auth::backend::ComputeCredentialKeys; use crate::auth::{self, AuthFlow}; use crate::config::AuthenticationConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::AuthSecret; use crate::stream::{PqStream, Stream}; use crate::{compute, sasl}; pub(super) async fn authenticate( - ctx: &RequestMonitoring, + ctx: &RequestContext, creds: ComputeUserInfo, client: &mut PqStream>, config: &'static AuthenticationConfig, @@ -21,11 +21,11 @@ pub(super) async fn authenticate( let scram_keys = match secret { #[cfg(any(test, feature = "testing"))] AuthSecret::Md5(_) => { - info!("auth endpoint chooses MD5"); + debug!("auth endpoint chooses MD5"); return Err(auth::AuthError::bad_auth_method("MD5")); } AuthSecret::Scram(secret) => { - info!("auth endpoint chooses SCRAM"); + debug!("auth endpoint chooses SCRAM"); let scram = auth::Scram(&secret, ctx); let auth_outcome = tokio::time::timeout( @@ -50,6 +50,8 @@ pub(super) async fn authenticate( let client_key = match auth_outcome { sasl::Outcome::Success(key) => key, sasl::Outcome::Failure(reason) => { + // TODO: warnings? + // TODO: should we get rid of this because double logging? info!("auth backend failed with an error: {reason}"); return Err(auth::AuthError::password_failed(&*creds.user)); } diff --git a/proxy/src/auth/backend/console_redirect.rs b/proxy/src/auth/backend/console_redirect.rs index e25dc3d45e..5772471486 100644 --- a/proxy/src/auth/backend/console_redirect.rs +++ b/proxy/src/auth/backend/console_redirect.rs @@ -8,7 +8,7 @@ use tracing::{info, info_span}; use super::ComputeCredentialKeys; use crate::cache::Cached; use crate::config::AuthenticationConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::{self, CachedNodeInfo, NodeInfo}; use crate::error::{ReportableError, UserFacingError}; use crate::proxy::connect_compute::ComputeConnectBackend; @@ -71,7 +71,7 @@ impl ConsoleRedirectBackend { pub(crate) async fn authenticate( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, auth_config: &'static AuthenticationConfig, client: &mut PqStream, ) -> auth::Result { @@ -87,7 +87,7 @@ pub struct ConsoleRedirectNodeInfo(pub(super) NodeInfo); impl ComputeConnectBackend for ConsoleRedirectNodeInfo { async fn wake_compute( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, ) -> Result { Ok(Cached::new_uncached(self.0.clone())) } @@ -98,7 +98,7 @@ impl ComputeConnectBackend for ConsoleRedirectNodeInfo { } async fn authenticate( - ctx: &RequestMonitoring, + ctx: &RequestContext, auth_config: &'static AuthenticationConfig, link_uri: &reqwest::Url, client: &mut PqStream, diff --git a/proxy/src/auth/backend/hacks.rs b/proxy/src/auth/backend/hacks.rs index 1411d908a5..3316543022 100644 --- a/proxy/src/auth/backend/hacks.rs +++ b/proxy/src/auth/backend/hacks.rs @@ -4,7 +4,7 @@ use tracing::{debug, info}; use super::{ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint}; use crate::auth::{self, AuthFlow}; use crate::config::AuthenticationConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::AuthSecret; use crate::intern::EndpointIdInt; use crate::sasl; @@ -15,7 +15,7 @@ use crate::stream::{self, Stream}; /// These properties are benefical for serverless JS workers, so we /// use this mechanism for websocket connections. pub(crate) async fn authenticate_cleartext( - ctx: &RequestMonitoring, + ctx: &RequestContext, info: ComputeUserInfo, client: &mut stream::PqStream>, secret: AuthSecret, @@ -57,7 +57,7 @@ pub(crate) async fn authenticate_cleartext( /// Similar to [`authenticate_cleartext`], but there's a specific password format, /// and passwords are not yet validated (we don't know how to validate them!) pub(crate) async fn password_hack_no_authentication( - ctx: &RequestMonitoring, + ctx: &RequestContext, info: ComputeUserInfoNoEndpoint, client: &mut stream::PqStream>, ) -> auth::Result<(ComputeUserInfo, Vec)> { @@ -73,7 +73,7 @@ pub(crate) async fn password_hack_no_authentication( .get_password() .await?; - info!(project = &*payload.endpoint, "received missing parameter"); + debug!(project = &*payload.endpoint, "received missing parameter"); // Report tentative success; compute node will check the password anyway. Ok(( diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs index bfc674139b..f721d81aa2 100644 --- a/proxy/src/auth/backend/jwt.rs +++ b/proxy/src/auth/backend/jwt.rs @@ -17,7 +17,7 @@ use thiserror::Error; use tokio::time::Instant; use crate::auth::backend::ComputeCredentialKeys; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::errors::GetEndpointJwksError; use crate::http::read_body_with_limit; use crate::intern::RoleNameInt; @@ -39,7 +39,7 @@ const JWKS_FETCH_RETRIES: u32 = 3; pub(crate) trait FetchAuthRules: Clone + Send + Sync + 'static { fn fetch_auth_rules( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> impl Future, FetchAuthRulesError>> + Send; } @@ -144,7 +144,7 @@ impl JwkCacheEntryLock { async fn renew_jwks( &self, _permit: JwkRenewalPermit<'_>, - ctx: &RequestMonitoring, + ctx: &RequestContext, client: &reqwest_middleware::ClientWithMiddleware, endpoint: EndpointId, auth_rules: &F, @@ -261,7 +261,7 @@ impl JwkCacheEntryLock { async fn get_or_update_jwk_cache( self: &Arc, - ctx: &RequestMonitoring, + ctx: &RequestContext, client: &reqwest_middleware::ClientWithMiddleware, endpoint: EndpointId, fetch: &F, @@ -314,7 +314,7 @@ impl JwkCacheEntryLock { async fn check_jwt( self: &Arc, - ctx: &RequestMonitoring, + ctx: &RequestContext, jwt: &str, client: &reqwest_middleware::ClientWithMiddleware, endpoint: EndpointId, @@ -409,7 +409,7 @@ impl JwkCacheEntryLock { impl JwkCache { pub(crate) async fn check_jwt( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, role_name: &RoleName, fetch: &F, @@ -941,7 +941,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL impl FetchAuthRules for Fetch { async fn fetch_auth_rules( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _endpoint: EndpointId, ) -> Result, FetchAuthRulesError> { Ok(self.0.clone()) @@ -1039,7 +1039,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL for token in &tokens { jwk_cache .check_jwt( - &RequestMonitoring::test(), + &RequestContext::test(), endpoint.clone(), role, &fetch, @@ -1097,7 +1097,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL jwk_cache .check_jwt( - &RequestMonitoring::test(), + &RequestContext::test(), endpoint.clone(), &role_name, &fetch, @@ -1136,7 +1136,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL let ep = EndpointId::from("ep"); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let err = jwk_cache .check_jwt(&ctx, ep, &role, &fetch, &bad_jwt) .await @@ -1175,7 +1175,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL // this role_name is not accepted let bad_role_name = RoleName::from("cloud_admin"); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let err = jwk_cache .check_jwt(&ctx, ep, &bad_role_name, &fetch, &jwt) .await @@ -1268,7 +1268,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL let ep = EndpointId::from("ep"); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); for test in table { let jwt = new_custom_ec_jwt("1".into(), &key, test.body); @@ -1336,7 +1336,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL jwk_cache .check_jwt( - &RequestMonitoring::test(), + &RequestContext::test(), endpoint.clone(), &role_name, &fetch, diff --git a/proxy/src/auth/backend/local.rs b/proxy/src/auth/backend/local.rs index f9cb085daf..32e0f53615 100644 --- a/proxy/src/auth/backend/local.rs +++ b/proxy/src/auth/backend/local.rs @@ -7,7 +7,7 @@ use super::jwt::{AuthRule, FetchAuthRules}; use crate::auth::backend::jwt::FetchAuthRulesError; use crate::compute::ConnCfg; use crate::compute_ctl::ComputeCtlApi; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo}; use crate::control_plane::NodeInfo; use crate::http; @@ -56,7 +56,7 @@ pub static JWKS_ROLE_MAP: ArcSwapOption = ArcSwapOption::c impl FetchAuthRules for StaticAuthRules { async fn fetch_auth_rules( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _endpoint: EndpointId, ) -> Result, FetchAuthRulesError> { let mappings = JWKS_ROLE_MAP.load(); diff --git a/proxy/src/auth/backend/mod.rs b/proxy/src/auth/backend/mod.rs index 242fe99de2..57ecd5e499 100644 --- a/proxy/src/auth/backend/mod.rs +++ b/proxy/src/auth/backend/mod.rs @@ -14,13 +14,13 @@ use ipnet::{Ipv4Net, Ipv6Net}; use local::LocalBackend; use tokio::io::{AsyncRead, AsyncWrite}; use tokio_postgres::config::AuthKeys; -use tracing::{info, warn}; +use tracing::{debug, info, warn}; use crate::auth::credentials::check_peer_addr_is_in_list; use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserInfoMaybeEndpoint}; use crate::cache::Cached; use crate::config::AuthenticationConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::client::ControlPlaneClient; use crate::control_plane::errors::GetAuthInfoError; use crate::control_plane::{ @@ -210,7 +210,7 @@ impl RateBucketInfo { impl AuthenticationConfig { pub(crate) fn check_rate_limit( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, secret: AuthSecret, endpoint: &EndpointId, is_cleartext: bool, @@ -265,7 +265,7 @@ impl AuthenticationConfig { /// /// All authentication flows will emit an AuthenticationOk message if successful. async fn auth_quirks( - ctx: &RequestMonitoring, + ctx: &RequestContext, api: &impl control_plane::ControlPlaneApi, user_info: ComputeUserInfoMaybeEndpoint, client: &mut stream::PqStream>, @@ -286,7 +286,7 @@ async fn auth_quirks( Ok(info) => (info, None), }; - info!("fetching user's authentication info"); + debug!("fetching user's authentication info"); let (allowed_ips, maybe_secret) = api.get_allowed_ips_and_secret(ctx, &info).await?; // check allowed list @@ -343,7 +343,7 @@ async fn auth_quirks( } async fn authenticate_with_secret( - ctx: &RequestMonitoring, + ctx: &RequestContext, secret: AuthSecret, info: ComputeUserInfo, client: &mut stream::PqStream>, @@ -396,7 +396,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> { #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)] pub(crate) async fn authenticate( self, - ctx: &RequestMonitoring, + ctx: &RequestContext, client: &mut stream::PqStream>, allow_cleartext: bool, config: &'static AuthenticationConfig, @@ -404,7 +404,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> { ) -> auth::Result> { let res = match self { Self::ControlPlane(api, user_info) => { - info!( + debug!( user = &*user_info.user, project = user_info.endpoint(), "performing authentication using the console" @@ -427,6 +427,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> { } }; + // TODO: replace with some metric info!("user successfully authenticated"); Ok(res) } @@ -435,7 +436,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> { impl Backend<'_, ComputeUserInfo> { pub(crate) async fn get_role_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, ) -> Result { match self { Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await, @@ -445,7 +446,7 @@ impl Backend<'_, ComputeUserInfo> { pub(crate) async fn get_allowed_ips_and_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, ) -> Result<(CachedAllowedIps, Option), GetAuthInfoError> { match self { Self::ControlPlane(api, user_info) => { @@ -460,7 +461,7 @@ impl Backend<'_, ComputeUserInfo> { impl ComputeConnectBackend for Backend<'_, ComputeCredentials> { async fn wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, ) -> Result { match self { Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await, @@ -496,7 +497,7 @@ mod tests { use crate::auth::backend::MaskedIp; use crate::auth::{ComputeUserInfoMaybeEndpoint, IpPattern}; use crate::config::AuthenticationConfig; - use crate::context::RequestMonitoring; + use crate::context::RequestContext; use crate::control_plane::{self, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret}; use crate::proxy::NeonOptions; use crate::rate_limiter::{EndpointRateLimiter, RateBucketInfo}; @@ -512,7 +513,7 @@ mod tests { impl control_plane::ControlPlaneApi for Auth { async fn get_role_secret( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _user_info: &super::ComputeUserInfo, ) -> Result { Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone()))) @@ -520,7 +521,7 @@ mod tests { async fn get_allowed_ips_and_secret( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _user_info: &super::ComputeUserInfo, ) -> Result< (CachedAllowedIps, Option), @@ -534,7 +535,7 @@ mod tests { async fn get_endpoint_jwks( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _endpoint: crate::types::EndpointId, ) -> Result, control_plane::errors::GetEndpointJwksError> { @@ -543,7 +544,7 @@ mod tests { async fn wake_compute( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _user_info: &super::ComputeUserInfo, ) -> Result { unimplemented!() @@ -622,7 +623,7 @@ mod tests { let (mut client, server) = tokio::io::duplex(1024); let mut stream = PqStream::new(Stream::from_raw(server)); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let api = Auth { ips: vec![], secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()), @@ -699,7 +700,7 @@ mod tests { let (mut client, server) = tokio::io::duplex(1024); let mut stream = PqStream::new(Stream::from_raw(server)); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let api = Auth { ips: vec![], secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()), @@ -751,7 +752,7 @@ mod tests { let (mut client, server) = tokio::io::duplex(1024); let mut stream = PqStream::new(Stream::from_raw(server)); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let api = Auth { ips: vec![], secret: AuthSecret::Scram(ServerSecret::build("my-secret-password").await.unwrap()), diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs index ddecae6af5..f6bce9f2d8 100644 --- a/proxy/src/auth/credentials.rs +++ b/proxy/src/auth/credentials.rs @@ -7,10 +7,10 @@ use std::str::FromStr; use itertools::Itertools; use pq_proto::StartupMessageParams; use thiserror::Error; -use tracing::{info, warn}; +use tracing::{debug, warn}; use crate::auth::password_hack::parse_endpoint_param; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::{ReportableError, UserFacingError}; use crate::metrics::{Metrics, SniKind}; use crate::proxy::NeonOptions; @@ -86,7 +86,7 @@ pub(crate) fn endpoint_sni( impl ComputeUserInfoMaybeEndpoint { pub(crate) fn parse( - ctx: &RequestMonitoring, + ctx: &RequestContext, params: &StartupMessageParams, sni: Option<&str>, common_names: Option<&HashSet>, @@ -147,22 +147,22 @@ impl ComputeUserInfoMaybeEndpoint { } let metrics = Metrics::get(); - info!(%user, "credentials"); + debug!(%user, "credentials"); if sni.is_some() { - info!("Connection with sni"); + debug!("Connection with sni"); metrics.proxy.accepted_connections_by_sni.inc(SniKind::Sni); } else if endpoint.is_some() { metrics .proxy .accepted_connections_by_sni .inc(SniKind::NoSni); - info!("Connection without sni"); + debug!("Connection without sni"); } else { metrics .proxy .accepted_connections_by_sni .inc(SniKind::PasswordHack); - info!("Connection with password hack"); + debug!("Connection with password hack"); } let options = NeonOptions::parse_params(params); @@ -260,7 +260,7 @@ mod tests { fn parse_bare_minimum() -> anyhow::Result<()> { // According to postgresql, only `user` should be required. let options = StartupMessageParams::new([("user", "john_doe")]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert_eq!(user_info.endpoint_id, None); @@ -275,7 +275,7 @@ mod tests { ("database", "world"), // should be ignored ("foo", "bar"), // should be ignored ]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert_eq!(user_info.endpoint_id, None); @@ -290,7 +290,7 @@ mod tests { let sni = Some("foo.localhost"); let common_names = Some(["localhost".into()].into()); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?; assert_eq!(user_info.user, "john_doe"); @@ -307,7 +307,7 @@ mod tests { ("options", "-ckey=1 project=bar -c geqo=off"), ]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert_eq!(user_info.endpoint_id.as_deref(), Some("bar")); @@ -322,7 +322,7 @@ mod tests { ("options", "-ckey=1 endpoint=bar -c geqo=off"), ]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert_eq!(user_info.endpoint_id.as_deref(), Some("bar")); @@ -340,7 +340,7 @@ mod tests { ), ]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert!(user_info.endpoint_id.is_none()); @@ -355,7 +355,7 @@ mod tests { ("options", "-ckey=1 endpoint=bar project=foo -c geqo=off"), ]); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, None, None)?; assert_eq!(user_info.user, "john_doe"); assert!(user_info.endpoint_id.is_none()); @@ -370,7 +370,7 @@ mod tests { let sni = Some("baz.localhost"); let common_names = Some(["localhost".into()].into()); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?; assert_eq!(user_info.user, "john_doe"); @@ -385,14 +385,14 @@ mod tests { let common_names = Some(["a.com".into(), "b.com".into()].into()); let sni = Some("p1.a.com"); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?; assert_eq!(user_info.endpoint_id.as_deref(), Some("p1")); let common_names = Some(["a.com".into(), "b.com".into()].into()); let sni = Some("p1.b.com"); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?; assert_eq!(user_info.endpoint_id.as_deref(), Some("p1")); @@ -408,7 +408,7 @@ mod tests { let sni = Some("second.localhost"); let common_names = Some(["localhost".into()].into()); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let err = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref()) .expect_err("should fail"); match err { @@ -427,7 +427,7 @@ mod tests { let sni = Some("project.localhost"); let common_names = Some(["example.com".into()].into()); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let err = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref()) .expect_err("should fail"); match err { @@ -447,7 +447,7 @@ mod tests { let sni = Some("project.localhost"); let common_names = Some(["localhost".into()].into()); - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let user_info = ComputeUserInfoMaybeEndpoint::parse(&ctx, &options, sni, common_names.as_ref())?; assert_eq!(user_info.endpoint_id.as_deref(), Some("project")); diff --git a/proxy/src/auth/flow.rs b/proxy/src/auth/flow.rs index 6294549ff6..9c6ce151cb 100644 --- a/proxy/src/auth/flow.rs +++ b/proxy/src/auth/flow.rs @@ -11,7 +11,7 @@ use tracing::info; use super::backend::ComputeCredentialKeys; use super::{AuthError, PasswordHackPayload}; use crate::config::TlsServerEndPoint; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::AuthSecret; use crate::intern::EndpointIdInt; use crate::sasl; @@ -32,7 +32,7 @@ pub(crate) struct Begin; /// Use [SCRAM](crate::scram)-based auth in [`AuthFlow`]. pub(crate) struct Scram<'a>( pub(crate) &'a scram::ServerSecret, - pub(crate) &'a RequestMonitoring, + pub(crate) &'a RequestContext, ); impl AuthMethod for Scram<'_> { @@ -178,6 +178,8 @@ impl AuthFlow<'_, S, Scram<'_>> { SCRAM_SHA_256_PLUS => ctx.set_auth_method(crate::context::AuthMethod::ScramSha256Plus), _ => {} } + + // TODO: make this a metric instead info!("client chooses {}", sasl.method); let outcome = sasl::SaslStream::new(self.stream, sasl.message) diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs index fbdb1dec15..c4ec1300f2 100644 --- a/proxy/src/bin/local_proxy.rs +++ b/proxy/src/bin/local_proxy.rs @@ -32,11 +32,12 @@ project_git_version!(GIT_VERSION); project_build_tag!(BUILD_TAG); use clap::Parser; +use thiserror::Error; use tokio::net::TcpListener; use tokio::sync::Notify; use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; -use tracing::{error, info, warn}; +use tracing::{debug, error, info, warn}; use utils::sentry_init::init_sentry; use utils::{pid_file, project_build_tag, project_git_version}; @@ -124,8 +125,9 @@ async fn main() -> anyhow::Result<()> { Metrics::install(Arc::new(ThreadPoolMetrics::new(0))); - info!("Version: {GIT_VERSION}"); - info!("Build_tag: {BUILD_TAG}"); + // TODO: refactor these to use labels + debug!("Version: {GIT_VERSION}"); + debug!("Build_tag: {BUILD_TAG}"); let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo { revision: GIT_VERSION, build_tag: BUILD_TAG, @@ -305,26 +307,46 @@ fn build_auth_backend( Ok(Box::leak(Box::new(auth_backend))) } +#[derive(Error, Debug)] +enum RefreshConfigError { + #[error(transparent)] + Read(#[from] std::io::Error), + #[error(transparent)] + Parse(#[from] serde_json::Error), + #[error(transparent)] + Validate(anyhow::Error), +} + async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc) { + let mut init = true; loop { rx.notified().await; match refresh_config_inner(&path).await { Ok(()) => {} + // don't log for file not found errors if this is the first time we are checking + // for computes that don't use local_proxy, this is not an error. + Err(RefreshConfigError::Read(e)) + if init && e.kind() == std::io::ErrorKind::NotFound => + { + debug!(error=?e, ?path, "could not read config file"); + } Err(e) => { error!(error=?e, ?path, "could not read config file"); } } + + init = false; } } -async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> { +async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError> { let bytes = tokio::fs::read(&path).await?; let data: LocalProxySpec = serde_json::from_slice(&bytes)?; let mut jwks_set = vec![]; - for jwks in data.jwks.into_iter().flatten() { + fn parse_jwks_settings(jwks: compute_api::spec::JwksSettings) -> anyhow::Result { let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?; ensure!( @@ -367,7 +389,7 @@ async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> { } } - jwks_set.push(JwksSettings { + Ok(JwksSettings { id: jwks.id, jwks_url, provider_name: jwks.provider_name, @@ -381,6 +403,10 @@ async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> { }) } + for jwks in data.jwks.into_iter().flatten() { + jwks_set.push(parse_jwks_settings(jwks).map_err(RefreshConfigError::Validate)?); + } + info!("successfully loaded new config"); JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set }))); diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs index ef5b5e8509..623a0fd3b2 100644 --- a/proxy/src/bin/pg_sni_router.rs +++ b/proxy/src/bin/pg_sni_router.rs @@ -11,7 +11,7 @@ use futures::future::Either; use futures::TryFutureExt; use itertools::Itertools; use proxy::config::TlsServerEndPoint; -use proxy::context::RequestMonitoring; +use proxy::context::RequestContext; use proxy::metrics::{Metrics, ThreadPoolMetrics}; use proxy::protocol2::ConnectionInfo; use proxy::proxy::{copy_bidirectional_client_compute, run_until_cancelled, ErrorSource}; @@ -177,7 +177,7 @@ async fn task_main( .context("failed to set socket option")?; info!(%peer_addr, "serving"); - let ctx = RequestMonitoring::new( + let ctx = RequestContext::new( session_id, ConnectionInfo { addr: peer_addr, @@ -208,7 +208,7 @@ async fn task_main( const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)"; async fn ssl_handshake( - ctx: &RequestMonitoring, + ctx: &RequestContext, raw_stream: S, tls_config: Arc, tls_server_end_point: TlsServerEndPoint, @@ -259,7 +259,7 @@ async fn ssl_handshake( } async fn handle_client( - ctx: RequestMonitoring, + ctx: RequestContext, dest_suffix: Arc, tls_config: Arc, tls_server_end_point: TlsServerEndPoint, diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs index fda5b25961..232721338d 100644 --- a/proxy/src/bin/proxy.rs +++ b/proxy/src/bin/proxy.rs @@ -288,6 +288,7 @@ async fn main() -> anyhow::Result<()> { let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook(); let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]); + // TODO: refactor these to use labels info!("Version: {GIT_VERSION}"); info!("Build_tag: {BUILD_TAG}"); let neon_metrics = ::metrics::NeonMetrics::new(::metrics::BuildInfo { diff --git a/proxy/src/cache/endpoints.rs b/proxy/src/cache/endpoints.rs index 07769e053c..20db1fbb14 100644 --- a/proxy/src/cache/endpoints.rs +++ b/proxy/src/cache/endpoints.rs @@ -11,7 +11,7 @@ use tokio_util::sync::CancellationToken; use tracing::info; use crate::config::EndpointCacheConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt}; use crate::metrics::{Metrics, RedisErrors, RedisEventsCount}; use crate::rate_limiter::GlobalRateLimiter; @@ -75,7 +75,7 @@ impl EndpointsCache { } } - pub(crate) fn is_valid(&self, ctx: &RequestMonitoring, endpoint: &EndpointId) -> bool { + pub(crate) fn is_valid(&self, ctx: &RequestContext, endpoint: &EndpointId) -> bool { if !self.ready.load(Ordering::Acquire) { // the endpoint cache is not yet fully initialised. return true; diff --git a/proxy/src/cancellation.rs b/proxy/src/cancellation.rs index db0970adcb..3ad2d55b53 100644 --- a/proxy/src/cancellation.rs +++ b/proxy/src/cancellation.rs @@ -7,7 +7,7 @@ use thiserror::Error; use tokio::net::TcpStream; use tokio::sync::Mutex; use tokio_postgres::{CancelToken, NoTls}; -use tracing::info; +use tracing::{debug, info}; use uuid::Uuid; use crate::error::ReportableError; @@ -73,7 +73,7 @@ impl CancellationHandler

{ break key; }; - info!("registered new query cancellation key {key}"); + debug!("registered new query cancellation key {key}"); Session { key, cancellation_handler: self, @@ -165,7 +165,7 @@ impl CancelClosure { pub(crate) async fn try_cancel_query(self) -> Result<(), CancelError> { let socket = TcpStream::connect(self.socket_addr).await?; self.cancel_token.cancel_query_raw(socket, NoTls).await?; - info!("query was cancelled"); + debug!("query was cancelled"); Ok(()) } } @@ -182,7 +182,7 @@ impl

Session

{ /// Store the cancel token for the given session. /// This enables query cancellation in `crate::proxy::prepare_client_connection`. pub(crate) fn enable_query_cancellation(&self, cancel_closure: CancelClosure) -> CancelKeyData { - info!("enabling query cancellation for this session"); + debug!("enabling query cancellation for this session"); self.cancellation_handler .map .insert(self.key, Some(cancel_closure)); @@ -194,7 +194,7 @@ impl

Session

{ impl

Drop for Session

{ fn drop(&mut self) { self.cancellation_handler.map.remove(&self.key); - info!("dropped query cancellation key {}", &self.key); + debug!("dropped query cancellation key {}", &self.key); } } diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index ca4a348ed8..e7fbe9ab47 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -14,11 +14,11 @@ use thiserror::Error; use tokio::net::TcpStream; use tokio_postgres::tls::MakeTlsConnect; use tokio_postgres_rustls::MakeRustlsConnect; -use tracing::{error, info, warn}; +use tracing::{debug, error, info, warn}; use crate::auth::parse_endpoint_param; use crate::cancellation::CancelClosure; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::client::ApiLockError; use crate::control_plane::errors::WakeComputeError; use crate::control_plane::messages::MetricsAuxInfo; @@ -213,7 +213,7 @@ impl ConnCfg { }; let connect_once = |host, port| { - info!("trying to connect to compute node at {host}:{port}"); + debug!("trying to connect to compute node at {host}:{port}"); connect_with_timeout(host, port).and_then(|socket| async { let socket_addr = socket.peer_addr()?; // This prevents load balancer from severing the connection. @@ -286,7 +286,7 @@ impl ConnCfg { /// Connect to a corresponding compute node. pub(crate) async fn connect( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, allow_self_signed_compute: bool, aux: MetricsAuxInfo, timeout: Duration, @@ -328,6 +328,7 @@ impl ConnCfg { tracing::Span::current().record("pid", tracing::field::display(client.get_process_id())); let stream = connection.stream.into_inner(); + // TODO: lots of useful info but maybe we can move it elsewhere (eg traces?) info!( cold_start_info = ctx.cold_start_info().as_str(), "connected to compute node at {host} ({socket_addr}) sslmode={:?}", diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index cc456f3667..c88b2936db 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -8,7 +8,7 @@ use tracing::{debug, error, info, Instrument}; use crate::auth::backend::ConsoleRedirectBackend; use crate::cancellation::{CancellationHandlerMain, CancellationHandlerMainInternal}; use crate::config::{ProxyConfig, ProxyProtocolV2}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::ReportableError; use crate::metrics::{Metrics, NumClientConnectionsGuard}; use crate::protocol2::{read_proxy_protocol, ConnectHeader, ConnectionInfo}; @@ -82,7 +82,7 @@ pub async fn task_main( } }; - let ctx = RequestMonitoring::new( + let ctx = RequestContext::new( session_id, peer_addr, crate::metrics::Protocol::Tcp, @@ -141,12 +141,12 @@ pub async fn task_main( pub(crate) async fn handle_client( config: &'static ProxyConfig, backend: &'static ConsoleRedirectBackend, - ctx: &RequestMonitoring, + ctx: &RequestContext, cancellation_handler: Arc, stream: S, conn_gauge: NumClientConnectionsGuard<'static>, ) -> Result>, ClientRequestError> { - info!( + debug!( protocol = %ctx.protocol(), "handling interactive connection from client" ); diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs index 6cf99c0c97..6d2d2d51ce 100644 --- a/proxy/src/context/mod.rs +++ b/proxy/src/context/mod.rs @@ -8,7 +8,7 @@ use pq_proto::StartupMessageParams; use smol_str::SmolStr; use tokio::sync::mpsc; use tracing::field::display; -use tracing::{debug, info, info_span, Span}; +use tracing::{debug, info_span, Span}; use try_lock::TryLock; use uuid::Uuid; @@ -32,15 +32,15 @@ pub(crate) static LOG_CHAN_DISCONNECT: OnceCell, + TryLock, ); -struct RequestMonitoringInner { +struct RequestContextInner { pub(crate) conn_info: ConnectionInfo, pub(crate) session_id: Uuid, pub(crate) protocol: Protocol, @@ -81,10 +81,10 @@ pub(crate) enum AuthMethod { Cleartext, } -impl Clone for RequestMonitoring { +impl Clone for RequestContext { fn clone(&self) -> Self { let inner = self.0.try_lock().expect("should not deadlock"); - let new = RequestMonitoringInner { + let new = RequestContextInner { conn_info: inner.conn_info.clone(), session_id: inner.session_id, protocol: inner.protocol, @@ -115,13 +115,14 @@ impl Clone for RequestMonitoring { } } -impl RequestMonitoring { +impl RequestContext { pub fn new( session_id: Uuid, conn_info: ConnectionInfo, protocol: Protocol, region: &'static str, ) -> Self { + // TODO: be careful with long lived spans let span = info_span!( "connect_request", %protocol, @@ -131,7 +132,7 @@ impl RequestMonitoring { role = tracing::field::Empty, ); - let inner = RequestMonitoringInner { + let inner = RequestContextInner { conn_info, session_id, protocol, @@ -167,7 +168,7 @@ impl RequestMonitoring { let ip = IpAddr::from([127, 0, 0, 1]); let addr = SocketAddr::new(ip, 5432); let conn_info = ConnectionInfo { addr, extra: None }; - RequestMonitoring::new(Uuid::now_v7(), conn_info, Protocol::Tcp, "test") + RequestContext::new(Uuid::now_v7(), conn_info, Protocol::Tcp, "test") } pub(crate) fn console_application_name(&self) -> String { @@ -324,7 +325,7 @@ impl RequestMonitoring { } pub(crate) struct LatencyTimerPause<'a> { - ctx: &'a RequestMonitoring, + ctx: &'a RequestContext, start: tokio::time::Instant, waiting_for: Waiting, } @@ -340,7 +341,7 @@ impl Drop for LatencyTimerPause<'_> { } } -impl RequestMonitoringInner { +impl RequestContextInner { fn set_cold_start_info(&mut self, info: ColdStartInfo) { self.cold_start_info = info; self.latency_timer.cold_start_info(info); @@ -384,6 +385,10 @@ impl RequestMonitoringInner { } else { ConnectOutcome::Failed }; + + // TODO: get rid of entirely/refactor + // check for false positives + // AND false negatives if let Some(rejected) = self.rejected { let ep = self .endpoint_id @@ -391,7 +396,7 @@ impl RequestMonitoringInner { .map(|x| x.as_str()) .unwrap_or_default(); // This makes sense only if cache is disabled - info!( + debug!( ?outcome, ?rejected, ?ep, @@ -425,7 +430,7 @@ impl RequestMonitoringInner { } } -impl Drop for RequestMonitoringInner { +impl Drop for RequestContextInner { fn drop(&mut self) { if self.sender.is_some() { self.log_connect(); diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs index 4112de646f..9bf3a275bb 100644 --- a/proxy/src/context/parquet.rs +++ b/proxy/src/context/parquet.rs @@ -20,7 +20,7 @@ use tokio_util::sync::CancellationToken; use tracing::{debug, info, Span}; use utils::backoff; -use super::{RequestMonitoringInner, LOG_CHAN}; +use super::{RequestContextInner, LOG_CHAN}; use crate::config::remote_storage_from_toml; use crate::context::LOG_CHAN_DISCONNECT; @@ -117,8 +117,8 @@ impl serde::Serialize for Options<'_> { } } -impl From<&RequestMonitoringInner> for RequestData { - fn from(value: &RequestMonitoringInner) -> Self { +impl From<&RequestContextInner> for RequestData { + fn from(value: &RequestContextInner) -> Self { Self { session_id: value.session_id, peer_addr: value.conn_info.addr.ip().to_string(), diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs index fd333d2aac..500acad50f 100644 --- a/proxy/src/control_plane/client/mock.rs +++ b/proxy/src/control_plane/client/mock.rs @@ -13,7 +13,7 @@ use crate::auth::backend::jwt::AuthRule; use crate::auth::backend::ComputeUserInfo; use crate::auth::IpPattern; use crate::cache::Cached; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::client::{CachedAllowedIps, CachedRoleSecret}; use crate::control_plane::errors::{ ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError, @@ -206,7 +206,7 @@ impl super::ControlPlaneApi for MockControlPlane { #[tracing::instrument(skip_all)] async fn get_role_secret( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { Ok(CachedRoleSecret::new_uncached( @@ -216,7 +216,7 @@ impl super::ControlPlaneApi for MockControlPlane { async fn get_allowed_ips_and_secret( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result<(CachedAllowedIps, Option), GetAuthInfoError> { Ok(( @@ -229,7 +229,7 @@ impl super::ControlPlaneApi for MockControlPlane { async fn get_endpoint_jwks( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, endpoint: EndpointId, ) -> Result, GetEndpointJwksError> { self.do_get_endpoint_jwks(endpoint).await @@ -238,7 +238,7 @@ impl super::ControlPlaneApi for MockControlPlane { #[tracing::instrument(skip_all)] async fn wake_compute( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _user_info: &ComputeUserInfo, ) -> Result { self.do_wake_compute().map_ok(Cached::new_uncached).await diff --git a/proxy/src/control_plane/client/mod.rs b/proxy/src/control_plane/client/mod.rs index e388d8a538..f8f74372f0 100644 --- a/proxy/src/control_plane/client/mod.rs +++ b/proxy/src/control_plane/client/mod.rs @@ -8,14 +8,14 @@ use std::time::Duration; use dashmap::DashMap; use tokio::time::Instant; -use tracing::info; +use tracing::{debug, info}; use crate::auth::backend::jwt::{AuthRule, FetchAuthRules, FetchAuthRulesError}; use crate::auth::backend::ComputeUserInfo; use crate::cache::endpoints::EndpointsCache; use crate::cache::project_info::ProjectInfoCacheImpl; use crate::config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::{ errors, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ControlPlaneApi, NodeInfoCache, }; @@ -41,7 +41,7 @@ pub enum ControlPlaneClient { impl ControlPlaneApi for ControlPlaneClient { async fn get_role_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { match self { @@ -57,7 +57,7 @@ impl ControlPlaneApi for ControlPlaneClient { async fn get_allowed_ips_and_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result<(CachedAllowedIps, Option), errors::GetAuthInfoError> { match self { @@ -71,7 +71,7 @@ impl ControlPlaneApi for ControlPlaneClient { async fn get_endpoint_jwks( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> Result, errors::GetEndpointJwksError> { match self { @@ -85,7 +85,7 @@ impl ControlPlaneApi for ControlPlaneClient { async fn wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { match self { @@ -214,7 +214,7 @@ impl ApiLocks { self.metrics .semaphore_acquire_seconds .observe(now.elapsed().as_secs_f64()); - info!("acquired permit {:?}", now.elapsed().as_secs_f64()); + debug!("acquired permit {:?}", now.elapsed().as_secs_f64()); Ok(WakeComputePermit { permit: permit? }) } @@ -271,7 +271,7 @@ impl WakeComputePermit { impl FetchAuthRules for ControlPlaneClient { async fn fetch_auth_rules( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> Result, FetchAuthRulesError> { self.get_endpoint_jwks(ctx, endpoint) diff --git a/proxy/src/control_plane/client/neon.rs b/proxy/src/control_plane/client/neon.rs index 26ff4e1402..53f9234926 100644 --- a/proxy/src/control_plane/client/neon.rs +++ b/proxy/src/control_plane/client/neon.rs @@ -14,7 +14,7 @@ use super::super::messages::{ControlPlaneErrorMessage, GetRoleSecret, WakeComput use crate::auth::backend::jwt::AuthRule; use crate::auth::backend::ComputeUserInfo; use crate::cache::Cached; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::caches::ApiCaches; use crate::control_plane::errors::{ ControlPlaneError, GetAuthInfoError, GetEndpointJwksError, WakeComputeError, @@ -65,7 +65,7 @@ impl NeonControlPlaneClient { async fn do_get_auth_info( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { if !self @@ -73,6 +73,8 @@ impl NeonControlPlaneClient { .endpoints_cache .is_valid(ctx, &user_info.endpoint.normalize()) { + // TODO: refactor this because it's weird + // this is a failure to authenticate but we return Ok. info!("endpoint is not valid, skipping the request"); return Ok(AuthInfo::default()); } @@ -92,7 +94,7 @@ impl NeonControlPlaneClient { ]) .build()?; - info!(url = request.url().as_str(), "sending http request"); + debug!(url = request.url().as_str(), "sending http request"); let start = Instant::now(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); let response = self.endpoint.execute(request).await?; @@ -104,10 +106,12 @@ impl NeonControlPlaneClient { // TODO(anna): retry Err(e) => { return if e.get_reason().is_not_found() { + // TODO: refactor this because it's weird + // this is a failure to authenticate but we return Ok. Ok(AuthInfo::default()) } else { Err(e.into()) - } + }; } }; @@ -137,7 +141,7 @@ impl NeonControlPlaneClient { async fn do_get_endpoint_jwks( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> Result, GetEndpointJwksError> { if !self @@ -163,7 +167,7 @@ impl NeonControlPlaneClient { .build() .map_err(GetEndpointJwksError::RequestBuild)?; - info!(url = request.url().as_str(), "sending http request"); + debug!(url = request.url().as_str(), "sending http request"); let start = Instant::now(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); let response = self @@ -196,7 +200,7 @@ impl NeonControlPlaneClient { async fn do_wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { let request_id = ctx.session_id().to_string(); @@ -220,7 +224,7 @@ impl NeonControlPlaneClient { let request = request_builder.build()?; - info!(url = request.url().as_str(), "sending http request"); + debug!(url = request.url().as_str(), "sending http request"); let start = Instant::now(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane); let response = self.endpoint.execute(request).await?; @@ -249,6 +253,7 @@ impl NeonControlPlaneClient { Ok(node) } .map_err(crate::error::log_error) + // TODO: redo this span stuff .instrument(info_span!("http", id = request_id)) .await } @@ -258,7 +263,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient { #[tracing::instrument(skip_all)] async fn get_role_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { let normalized_ep = &user_info.endpoint.normalize(); @@ -292,7 +297,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient { async fn get_allowed_ips_and_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result<(CachedAllowedIps, Option), GetAuthInfoError> { let normalized_ep = &user_info.endpoint.normalize(); @@ -334,7 +339,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient { #[tracing::instrument(skip_all)] async fn get_endpoint_jwks( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> Result, GetEndpointJwksError> { self.do_get_endpoint_jwks(ctx, endpoint).await @@ -343,7 +348,7 @@ impl super::ControlPlaneApi for NeonControlPlaneClient { #[tracing::instrument(skip_all)] async fn wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result { let key = user_info.endpoint_cache_key(); diff --git a/proxy/src/control_plane/mod.rs b/proxy/src/control_plane/mod.rs index 70607ac0d0..41972e4e44 100644 --- a/proxy/src/control_plane/mod.rs +++ b/proxy/src/control_plane/mod.rs @@ -17,7 +17,7 @@ use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo}; use crate::auth::IpPattern; use crate::cache::project_info::ProjectInfoCacheImpl; use crate::cache::{Cached, TimedLru}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::{ControlPlaneErrorMessage, MetricsAuxInfo}; use crate::intern::ProjectIdInt; use crate::types::{EndpointCacheKey, EndpointId}; @@ -75,7 +75,7 @@ pub(crate) struct NodeInfo { impl NodeInfo { pub(crate) async fn connect( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, timeout: Duration, ) -> Result { self.config @@ -116,26 +116,26 @@ pub(crate) trait ControlPlaneApi { /// We still have to mock the scram to avoid leaking information that user doesn't exist. async fn get_role_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result; async fn get_allowed_ips_and_secret( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result<(CachedAllowedIps, Option), errors::GetAuthInfoError>; async fn get_endpoint_jwks( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, endpoint: EndpointId, ) -> Result, errors::GetEndpointJwksError>; /// Wake up the compute node and return the corresponding connection info. async fn wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, ) -> Result; } diff --git a/proxy/src/jemalloc.rs b/proxy/src/jemalloc.rs index 0fae78b60c..9888458ee2 100644 --- a/proxy/src/jemalloc.rs +++ b/proxy/src/jemalloc.rs @@ -38,7 +38,7 @@ where impl MetricRecorder { pub fn new() -> Result { - tracing::info!( + tracing::debug!( config = config::malloc_conf::read()?, version = version::read()?, "starting jemalloc recorder" diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index 659b7afa68..b30aec09c1 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -7,7 +7,7 @@ use super::retry::ShouldRetryWakeCompute; use crate::auth::backend::ComputeCredentialKeys; use crate::compute::{self, PostgresConnection, COULD_NOT_CONNECT}; use crate::config::RetryConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::errors::WakeComputeError; use crate::control_plane::locks::ApiLocks; use crate::control_plane::{self, CachedNodeInfo, NodeInfo}; @@ -47,7 +47,7 @@ pub(crate) trait ConnectMechanism { type Error: From; async fn connect_once( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, node_info: &control_plane::CachedNodeInfo, timeout: time::Duration, ) -> Result; @@ -59,7 +59,7 @@ pub(crate) trait ConnectMechanism { pub(crate) trait ComputeConnectBackend { async fn wake_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, ) -> Result; fn get_keys(&self) -> &ComputeCredentialKeys; @@ -82,7 +82,7 @@ impl ConnectMechanism for TcpMechanism<'_> { #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] async fn connect_once( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, node_info: &control_plane::CachedNodeInfo, timeout: time::Duration, ) -> Result { @@ -99,7 +99,7 @@ impl ConnectMechanism for TcpMechanism<'_> { /// Try to connect to the compute node, retrying if necessary. #[tracing::instrument(skip_all)] pub(crate) async fn connect_to_compute( - ctx: &RequestMonitoring, + ctx: &RequestContext, mechanism: &M, user_info: &B, allow_self_signed_compute: bool, diff --git a/proxy/src/proxy/handshake.rs b/proxy/src/proxy/handshake.rs index a67f1b8112..3ada3a9995 100644 --- a/proxy/src/proxy/handshake.rs +++ b/proxy/src/proxy/handshake.rs @@ -9,7 +9,7 @@ use tracing::{info, warn}; use crate::auth::endpoint_sni; use crate::config::{TlsConfig, PG_ALPN_PROTOCOL}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::ReportableError; use crate::metrics::Metrics; use crate::proxy::ERR_INSECURE_CONNECTION; @@ -66,7 +66,7 @@ pub(crate) enum HandshakeData { /// we also take an extra care of propagating only the select handshake errors to client. #[tracing::instrument(skip_all)] pub(crate) async fn handshake( - ctx: &RequestMonitoring, + ctx: &RequestContext, stream: S, mut tls: Option<&TlsConfig>, record_handshake_error: bool, diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index 17721c23d5..4be4006d15 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -25,7 +25,7 @@ use self::connect_compute::{connect_to_compute, TcpMechanism}; use self::passthrough::ProxyPassthrough; use crate::cancellation::{self, CancellationHandlerMain, CancellationHandlerMainInternal}; use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::ReportableError; use crate::metrics::{Metrics, NumClientConnectionsGuard}; use crate::protocol2::{read_proxy_protocol, ConnectHeader, ConnectionInfo}; @@ -117,7 +117,7 @@ pub async fn task_main( } }; - let ctx = RequestMonitoring::new( + let ctx = RequestContext::new( session_id, conn_info, crate::metrics::Protocol::Tcp, @@ -247,7 +247,7 @@ impl ReportableError for ClientRequestError { pub(crate) async fn handle_client( config: &'static ProxyConfig, auth_backend: &'static auth::Backend<'static, ()>, - ctx: &RequestMonitoring, + ctx: &RequestContext, cancellation_handler: Arc, stream: S, mode: ClientMode, diff --git a/proxy/src/proxy/tests/mitm.rs b/proxy/src/proxy/tests/mitm.rs index df9f79a7e3..fe211adfeb 100644 --- a/proxy/src/proxy/tests/mitm.rs +++ b/proxy/src/proxy/tests/mitm.rs @@ -36,7 +36,7 @@ async fn proxy_mitm( // begin handshake with end_server let end_server = connect_tls(server2, client_config2.make_tls_connect().unwrap()).await; let (end_client, startup) = match handshake( - &RequestMonitoring::test(), + &RequestContext::test(), client1, Some(&server_config1), false, diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index be821925b5..3de8ca8736 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -162,7 +162,7 @@ impl TestAuth for Scram { stream: &mut PqStream>, ) -> anyhow::Result<()> { let outcome = auth::AuthFlow::new(stream) - .begin(auth::Scram(&self.0, &RequestMonitoring::test())) + .begin(auth::Scram(&self.0, &RequestContext::test())) .await? .authenticate() .await?; @@ -182,11 +182,10 @@ async fn dummy_proxy( auth: impl TestAuth + Send, ) -> anyhow::Result<()> { let (client, _) = read_proxy_protocol(client).await?; - let mut stream = - match handshake(&RequestMonitoring::test(), client, tls.as_ref(), false).await? { - HandshakeData::Startup(stream, _) => stream, - HandshakeData::Cancel(_) => bail!("cancellation not supported"), - }; + let mut stream = match handshake(&RequestContext::test(), client, tls.as_ref(), false).await? { + HandshakeData::Startup(stream, _) => stream, + HandshakeData::Cancel(_) => bail!("cancellation not supported"), + }; auth.authenticate(&mut stream).await?; @@ -466,7 +465,7 @@ impl ConnectMechanism for TestConnectMechanism { async fn connect_once( &self, - _ctx: &RequestMonitoring, + _ctx: &RequestContext, _node_info: &control_plane::CachedNodeInfo, _timeout: std::time::Duration, ) -> Result { @@ -581,7 +580,7 @@ fn helper_create_connect_info( async fn connect_to_compute_success() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { @@ -599,7 +598,7 @@ async fn connect_to_compute_success() { async fn connect_to_compute_retry() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { @@ -618,7 +617,7 @@ async fn connect_to_compute_retry() { async fn connect_to_compute_non_retry_1() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Fail]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { @@ -637,7 +636,7 @@ async fn connect_to_compute_non_retry_1() { async fn connect_to_compute_non_retry_2() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Fail, Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { @@ -657,7 +656,7 @@ async fn connect_to_compute_non_retry_3() { let _ = env_logger::try_init(); tokio::time::pause(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![Wake, Retry, Wake, Retry, Retry, Retry, Retry, Retry]); let user_info = helper_create_connect_info(&mechanism); @@ -689,7 +688,7 @@ async fn connect_to_compute_non_retry_3() { async fn wake_retry() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![WakeRetry, Wake, Connect]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { @@ -708,7 +707,7 @@ async fn wake_retry() { async fn wake_non_retry() { let _ = env_logger::try_init(); use ConnectAction::*; - let ctx = RequestMonitoring::test(); + let ctx = RequestContext::test(); let mechanism = TestConnectMechanism::new(vec![WakeRetry, WakeFail]); let user_info = helper_create_connect_info(&mechanism); let config = RetryConfig { diff --git a/proxy/src/proxy/wake_compute.rs b/proxy/src/proxy/wake_compute.rs index f9f46bb66c..d09e0b1f41 100644 --- a/proxy/src/proxy/wake_compute.rs +++ b/proxy/src/proxy/wake_compute.rs @@ -2,7 +2,7 @@ use tracing::{error, info, warn}; use super::connect_compute::ComputeConnectBackend; use crate::config::RetryConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::errors::WakeComputeError; use crate::control_plane::CachedNodeInfo; use crate::error::ReportableError; @@ -13,7 +13,7 @@ use crate::proxy::retry::{retry_after, should_retry}; pub(crate) async fn wake_compute( num_retries: &mut u32, - ctx: &RequestMonitoring, + ctx: &RequestContext, api: &B, config: RetryConfig, ) -> Result { diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 7fc5bd236d..d9dcf6fbb7 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -23,7 +23,7 @@ use crate::compute_ctl::{ ComputeCtlError, ExtensionInstallRequest, Privilege, SetRoleGrantsRequest, }; use crate::config::ProxyConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::client::ApiLockError; use crate::control_plane::errors::{GetAuthInfoError, WakeComputeError}; use crate::control_plane::locks::ApiLocks; @@ -33,7 +33,7 @@ use crate::intern::EndpointIdInt; use crate::proxy::connect_compute::ConnectMechanism; use crate::proxy::retry::{CouldRetry, ShouldRetryWakeCompute}; use crate::rate_limiter::EndpointRateLimiter; -use crate::types::{EndpointId, Host}; +use crate::types::{EndpointId, Host, LOCAL_PROXY_SUFFIX}; pub(crate) struct PoolingBackend { pub(crate) http_conn_pool: Arc>, @@ -48,7 +48,7 @@ pub(crate) struct PoolingBackend { impl PoolingBackend { pub(crate) async fn authenticate_with_password( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, password: &[u8], ) -> Result { @@ -110,7 +110,7 @@ impl PoolingBackend { pub(crate) async fn authenticate_with_jwt( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, user_info: &ComputeUserInfo, jwt: String, ) -> Result { @@ -161,7 +161,7 @@ impl PoolingBackend { #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] pub(crate) async fn connect_to_compute( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: ConnInfo, keys: ComputeCredentials, force_new: bool, @@ -201,7 +201,7 @@ impl PoolingBackend { #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] pub(crate) async fn connect_to_local_proxy( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: ConnInfo, ) -> Result, HttpConnError> { info!("pool: looking for an existing connection"); @@ -215,7 +215,10 @@ impl PoolingBackend { let backend = self.auth_backend.as_ref().map(|()| ComputeCredentials { info: ComputeUserInfo { user: conn_info.user_info.user.clone(), - endpoint: EndpointId::from(format!("{}-local-proxy", conn_info.user_info.endpoint)), + endpoint: EndpointId::from(format!( + "{}{LOCAL_PROXY_SUFFIX}", + conn_info.user_info.endpoint.normalize() + )), options: conn_info.user_info.options.clone(), }, keys: crate::auth::backend::ComputeCredentialKeys::None, @@ -246,7 +249,7 @@ impl PoolingBackend { #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] pub(crate) async fn connect_to_local_postgres( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: ConnInfo, ) -> Result, HttpConnError> { if let Some(client) = self.local_pool.get(ctx, &conn_info)? { @@ -487,7 +490,7 @@ impl ConnectMechanism for TokioMechanism { async fn connect_once( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, node_info: &CachedNodeInfo, timeout: Duration, ) -> Result { @@ -537,7 +540,7 @@ impl ConnectMechanism for HyperMechanism { async fn connect_once( &self, - ctx: &RequestMonitoring, + ctx: &RequestContext, node_info: &CachedNodeInfo, timeout: Duration, ) -> Result { diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs index 1845603bf7..07ba1ae9af 100644 --- a/proxy/src/serverless/conn_pool.rs +++ b/proxy/src/serverless/conn_pool.rs @@ -21,7 +21,7 @@ use { use super::conn_pool_lib::{ Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, GlobalConnPool, }; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::MetricsAuxInfo; use crate::metrics::Metrics; @@ -53,7 +53,7 @@ impl fmt::Display for ConnInfo { pub(crate) fn poll_client( global_pool: Arc>, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: ConnInfo, client: C, mut connection: tokio_postgres::Connection, diff --git a/proxy/src/serverless/conn_pool_lib.rs b/proxy/src/serverless/conn_pool_lib.rs index 61c39c32c9..fe3c422c3b 100644 --- a/proxy/src/serverless/conn_pool_lib.rs +++ b/proxy/src/serverless/conn_pool_lib.rs @@ -15,7 +15,7 @@ use super::conn_pool::ClientDataRemote; use super::http_conn_pool::ClientDataHttp; use super::local_conn_pool::ClientDataLocal; use crate::auth::backend::ComputeUserInfo; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::metrics::{HttpEndpointPoolsGuard, Metrics}; use crate::types::{DbName, EndpointCacheKey, RoleName}; @@ -380,7 +380,7 @@ impl GlobalConnPool { pub(crate) fn get( self: &Arc, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: &ConnInfo, ) -> Result>, HttpConnError> { let mut client: Option> = None; diff --git a/proxy/src/serverless/http_conn_pool.rs b/proxy/src/serverless/http_conn_pool.rs index a1d4473b01..bc86c4b1cd 100644 --- a/proxy/src/serverless/http_conn_pool.rs +++ b/proxy/src/serverless/http_conn_pool.rs @@ -12,7 +12,7 @@ use tracing::{debug, error, info, info_span, Instrument}; use super::backend::HttpConnError; use super::conn_pool_lib::{ClientInnerExt, ConnInfo}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::metrics::{HttpEndpointPoolsGuard, Metrics}; use crate::types::EndpointCacheKey; @@ -212,7 +212,7 @@ impl GlobalConnPool { #[expect(unused_results)] pub(crate) fn get( self: &Arc, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: &ConnInfo, ) -> Result>, HttpConnError> { let result: Result>, HttpConnError>; @@ -280,7 +280,7 @@ impl GlobalConnPool { pub(crate) fn poll_http2_client( global_pool: Arc>, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: &ConnInfo, client: Send, connection: Connect, diff --git a/proxy/src/serverless/local_conn_pool.rs b/proxy/src/serverless/local_conn_pool.rs index 99d4329f88..cadcbd7530 100644 --- a/proxy/src/serverless/local_conn_pool.rs +++ b/proxy/src/serverless/local_conn_pool.rs @@ -36,7 +36,7 @@ use super::conn_pool_lib::{ Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, DbUserConn, EndpointConnPool, }; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::metrics::Metrics; @@ -88,7 +88,7 @@ impl LocalConnPool { pub(crate) fn get( self: &Arc, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: &ConnInfo, ) -> Result>, HttpConnError> { let client = self @@ -159,7 +159,7 @@ impl LocalConnPool { #[allow(clippy::too_many_arguments)] pub(crate) fn poll_client( global_pool: Arc>, - ctx: &RequestMonitoring, + ctx: &RequestContext, conn_info: ConnInfo, client: C, mut connection: tokio_postgres::Connection, diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs index cf758855fa..59247f03bf 100644 --- a/proxy/src/serverless/mod.rs +++ b/proxy/src/serverless/mod.rs @@ -45,7 +45,7 @@ use utils::http::error::ApiError; use crate::cancellation::CancellationHandlerMain; use crate::config::{ProxyConfig, ProxyProtocolV2}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::metrics::Metrics; use crate::protocol2::{read_proxy_protocol, ChainRW, ConnectHeader, ConnectionInfo}; use crate::proxy::run_until_cancelled; @@ -423,7 +423,7 @@ async fn request_handler( if config.http_config.accept_websockets && framed_websockets::upgrade::is_upgrade_request(&request) { - let ctx = RequestMonitoring::new( + let ctx = RequestContext::new( session_id, conn_info, crate::metrics::Protocol::Ws, @@ -458,7 +458,7 @@ async fn request_handler( // Return the response so the spawned future can continue. Ok(response.map(|b| b.map_err(|x| match x {}).boxed())) } else if request.uri().path() == "/sql" && *request.method() == Method::POST { - let ctx = RequestMonitoring::new( + let ctx = RequestContext::new( session_id, conn_info, crate::metrics::Protocol::Http, diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index f0975617d4..36d8595902 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -34,7 +34,7 @@ use super::json::{json_to_pg_text, pg_text_row_to_json, JsonConversionError}; use crate::auth::backend::{ComputeCredentialKeys, ComputeUserInfo}; use crate::auth::{endpoint_sni, ComputeUserInfoParseError}; use crate::config::{AuthenticationConfig, HttpConfig, ProxyConfig, TlsConfig}; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::{ErrorKind, ReportableError, UserFacingError}; use crate::metrics::{HttpDirection, Metrics}; use crate::proxy::{run_until_cancelled, NeonOptions}; @@ -133,7 +133,7 @@ impl UserFacingError for ConnInfoError { fn get_conn_info( config: &'static AuthenticationConfig, - ctx: &RequestMonitoring, + ctx: &RequestContext, headers: &HeaderMap, tls: Option<&TlsConfig>, ) -> Result { @@ -240,7 +240,7 @@ fn get_conn_info( pub(crate) async fn handle( config: &'static ProxyConfig, - ctx: RequestMonitoring, + ctx: RequestContext, request: Request, backend: Arc, cancel: CancellationToken, @@ -516,7 +516,7 @@ fn map_isolation_level_to_headers(level: IsolationLevel) -> Option async fn handle_inner( cancel: CancellationToken, config: &'static ProxyConfig, - ctx: &RequestMonitoring, + ctx: &RequestContext, request: Request, backend: Arc, ) -> Result>, SqlOverHttpError> { @@ -562,7 +562,7 @@ async fn handle_inner( async fn handle_db_inner( cancel: CancellationToken, config: &'static ProxyConfig, - ctx: &RequestMonitoring, + ctx: &RequestContext, request: Request, conn_info: ConnInfo, auth: AuthData, @@ -733,7 +733,7 @@ pub(crate) fn uuid_to_header_value(id: Uuid) -> HeaderValue { } async fn handle_auth_broker_inner( - ctx: &RequestMonitoring, + ctx: &RequestContext, request: Request, conn_info: ConnInfo, jwt: String, diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs index ba36116c2c..4088fea835 100644 --- a/proxy/src/serverless/websocket.rs +++ b/proxy/src/serverless/websocket.rs @@ -14,7 +14,7 @@ use tracing::warn; use crate::cancellation::CancellationHandlerMain; use crate::config::ProxyConfig; -use crate::context::RequestMonitoring; +use crate::context::RequestContext; use crate::error::{io_error, ReportableError}; use crate::metrics::Metrics; use crate::proxy::{handle_client, ClientMode, ErrorSource}; @@ -126,7 +126,7 @@ impl AsyncBufRead for WebSocketRw { pub(crate) async fn serve_websocket( config: &'static ProxyConfig, auth_backend: &'static crate::auth::Backend<'static, ()>, - ctx: RequestMonitoring, + ctx: RequestContext, websocket: OnUpgrade, cancellation_handler: Arc, endpoint_rate_limiter: Arc, diff --git a/proxy/src/signals.rs b/proxy/src/signals.rs index 514a83d5eb..0b675683c0 100644 --- a/proxy/src/signals.rs +++ b/proxy/src/signals.rs @@ -2,7 +2,7 @@ use std::convert::Infallible; use anyhow::bail; use tokio_util::sync::CancellationToken; -use tracing::warn; +use tracing::{info, warn}; /// Handle unix signals appropriately. pub async fn handle( @@ -22,7 +22,7 @@ where tokio::select! { // Hangup is commonly used for config reload. _ = hangup.recv() => { - warn!("received SIGHUP"); + info!("received SIGHUP"); refresh_config(); } // Shut down the whole application. diff --git a/proxy/src/stream.rs b/proxy/src/stream.rs index 89df48c5d3..11f426819d 100644 --- a/proxy/src/stream.rs +++ b/proxy/src/stream.rs @@ -133,6 +133,7 @@ impl PqStream { msg: &'static str, error_kind: ErrorKind, ) -> Result { + // TODO: only log this for actually interesting errors tracing::info!( kind = error_kind.to_metric_label(), msg, diff --git a/pyproject.toml b/pyproject.toml index 9ea42bf46f..197946fff8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ psutil = "^5.9.4" types-psutil = "^5.9.5.12" types-toml = "^0.10.8.6" pytest-httpserver = "^1.0.8" -aiohttp = "3.10.2" +aiohttp = "3.10.11" pytest-rerunfailures = "^13.0" types-pytest-lazy-fixture = "^0.6.3.3" pytest-split = "^0.8.1" diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index a0a96c6e99..bfa1764abf 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -239,6 +239,10 @@ impl SafekeeperPostgresHandler { pgb: &mut PostgresBackend, tli: &mut Option, ) -> Result<(), CopyStreamHandlerEnd> { + // The `tli` parameter is only used for passing _out_ a timeline, one should + // not have been passed in. + assert!(tli.is_none()); + // Notify the libpq client that it's allowed to send `CopyData` messages pgb.write_message(&BeMessage::CopyBothResponse).await?; @@ -256,6 +260,7 @@ impl SafekeeperPostgresHandler { // sends, so this avoids deadlocks. let mut pgb_reader = pgb.split().context("START_WAL_PUSH split")?; let peer_addr = *pgb.get_peer_addr(); + let mut network_reader = NetworkReader { ttid: self.ttid, conn_id: self.conn_id, @@ -275,10 +280,14 @@ impl SafekeeperPostgresHandler { .subscribe(); *tli = Some(timeline.wal_residence_guard().await?); + let timeline_cancel = timeline.cancel.clone(); tokio::select! { // todo: add read|write .context to these errors r = network_reader.run(msg_tx, msg_rx, reply_tx, timeline, next_msg) => r, r = network_write(pgb, reply_rx, pageserver_feedback_rx) => r, + _ = timeline_cancel.cancelled() => { + return Err(CopyStreamHandlerEnd::Cancelled); + } } } else { res.map(|_| ()) @@ -303,7 +312,7 @@ impl SafekeeperPostgresHandler { // Otherwise, WalAcceptor thread must have errored. match wal_acceptor_res { - Ok(Ok(_)) => Ok(()), // can't happen currently; would be if we add graceful termination + Ok(Ok(_)) => Ok(()), // Clean shutdown Ok(Err(e)) => Err(CopyStreamHandlerEnd::Other(e.context("WAL acceptor"))), Err(_) => Err(CopyStreamHandlerEnd::Other(anyhow!( "WalAcceptor task panicked", @@ -356,6 +365,7 @@ impl<'a, IO: AsyncRead + AsyncWrite + Unpin> NetworkReader<'a, IO> { Ok((tli, next_msg)) } + /// This function is cancellation-safe (only does network I/O and channel read/writes). async fn run( self, msg_tx: Sender, @@ -397,6 +407,7 @@ async fn read_network_loop( loop { let started = Instant::now(); let size = next_msg.size(); + match msg_tx.send_timeout(next_msg, SLOW_THRESHOLD).await { Ok(()) => {} // Slow send, log a message and keep trying. Log context has timeline ID. @@ -428,6 +439,8 @@ async fn read_network_loop( /// Read replies from WalAcceptor and pass them back to socket. Returns Ok(()) /// if reply_rx closed; it must mean WalAcceptor terminated, joining it should /// tell the error. +/// +/// This function is cancellation-safe (only does network I/O and channel read/writes). async fn network_write( pgb_writer: &mut PostgresBackend, mut reply_rx: Receiver, @@ -461,7 +474,7 @@ async fn network_write( Some(AcceptorProposerMessage::AppendResponse(append_response)) } _ => None, - } + }, }; let Some(msg) = msg else { @@ -527,6 +540,10 @@ impl WalAcceptor { /// The main loop. Returns Ok(()) if either msg_rx or reply_tx got closed; /// it must mean that network thread terminated. + /// + /// This function is *not* cancellation safe, it does local disk I/O: it should always + /// be allowed to run to completion. It respects Timeline::cancel and shuts down cleanly + /// when that gets triggered. async fn run(&mut self) -> anyhow::Result<()> { let walreceiver_guard = self.tli.get_walreceivers().register(self.conn_id); @@ -541,7 +558,7 @@ impl WalAcceptor { // Tracks whether we have unflushed appends. let mut dirty = false; - loop { + while !self.tli.is_cancelled() { let reply = tokio::select! { // Process inbound message. msg = self.msg_rx.recv() => { @@ -562,6 +579,9 @@ impl WalAcceptor { // Don't flush the WAL on every append, only periodically via flush_ticker. // This batches multiple appends per fsync. If the channel is empty after // sending the reply, we'll schedule an immediate flush. + // + // Note that a flush can still happen on segment bounds, which will result + // in an AppendResponse. if let ProposerAcceptorMessage::AppendRequest(append_request) = msg { msg = ProposerAcceptorMessage::NoFlushAppendRequest(append_request); dirty = true; @@ -596,6 +616,10 @@ impl WalAcceptor { WAL_RECEIVER_QUEUE_DEPTH.observe(self.msg_rx.len() as f64); None // no reply } + + _ = self.tli.cancel.cancelled() => { + break; + } }; // Send reply, if any. @@ -607,7 +631,7 @@ impl WalAcceptor { } // Flush WAL on disconnect, see https://github.com/neondatabase/neon/issues/9259. - if dirty { + if dirty && !self.tli.cancel.is_cancelled() { self.tli .process_msg(&ProposerAcceptorMessage::FlushWAL) .await?; diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index f4983d44d0..6eb69f0b7c 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -947,6 +947,7 @@ where // while first connection still gets some packets later. It might be // better to not log this as error! above. let write_lsn = self.wal_store.write_lsn(); + let flush_lsn = self.wal_store.flush_lsn(); if write_lsn > msg.h.begin_lsn { bail!( "append request rewrites WAL written before, write_lsn={}, msg lsn={}", @@ -1004,7 +1005,9 @@ where ); // If flush_lsn hasn't updated, AppendResponse is not very useful. - if !require_flush { + // This is the common case for !require_flush, but a flush can still + // happen on segment bounds. + if !require_flush && flush_lsn == self.flush_lsn() { return Ok(None); } diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs index 6d94ff98b1..aa65ec851b 100644 --- a/safekeeper/src/send_wal.rs +++ b/safekeeper/src/send_wal.rs @@ -456,6 +456,8 @@ impl SafekeeperPostgresHandler { // not synchronized with sends, so this avoids deadlocks. let reader = pgb.split().context("START_REPLICATION split")?; + let tli_cancel = tli.cancel.clone(); + let mut sender = WalSender { pgb, // should succeed since we're already holding another guard @@ -479,6 +481,9 @@ impl SafekeeperPostgresHandler { // todo: add read|write .context to these errors r = sender.run() => r, r = reply_reader.run() => r, + _ = tli_cancel.cancelled() => { + return Err(CopyStreamHandlerEnd::Cancelled); + } }; let ws_state = ws_guard @@ -557,6 +562,7 @@ impl WalSender<'_, IO> { /// Send WAL until /// - an error occurs /// - receiver is caughtup and there is no computes (if streaming up to commit_lsn) + /// - timeline's cancellation token fires /// /// Err(CopyStreamHandlerEnd) is always returned; Result is used only for ? /// convenience. @@ -601,15 +607,14 @@ impl WalSender<'_, IO> { }; let send_buf = &send_buf[..send_size]; - // and send it - self.pgb - .write_message(&BeMessage::XLogData(XLogDataBody { - wal_start: self.start_pos.0, - wal_end: self.end_pos.0, - timestamp: get_current_timestamp(), - data: send_buf, - })) - .await?; + // and send it, while respecting Timeline::cancel + let msg = BeMessage::XLogData(XLogDataBody { + wal_start: self.start_pos.0, + wal_end: self.end_pos.0, + timestamp: get_current_timestamp(), + data: send_buf, + }); + self.pgb.write_message(&msg).await?; if let Some(appname) = &self.appname { if appname == "replica" { @@ -674,13 +679,13 @@ impl WalSender<'_, IO> { } } - self.pgb - .write_message(&BeMessage::KeepAlive(WalSndKeepAlive { - wal_end: self.end_pos.0, - timestamp: get_current_timestamp(), - request_reply: true, - })) - .await?; + let msg = BeMessage::KeepAlive(WalSndKeepAlive { + wal_end: self.end_pos.0, + timestamp: get_current_timestamp(), + request_reply: true, + }); + + self.pgb.write_message(&msg).await?; } } diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 85add6bfea..ef928f7633 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize}; use tokio::fs::{self}; use tokio_util::sync::CancellationToken; use utils::id::TenantId; +use utils::sync::gate::Gate; use std::cmp::max; use std::ops::{Deref, DerefMut}; @@ -467,6 +468,10 @@ pub struct Timeline { timeline_dir: Utf8PathBuf, manager_ctl: ManagerCtl, + /// Hold this gate from code that depends on the Timeline's non-shut-down state. While holding + /// this gate, you must respect [`Timeline::cancel`] + pub(crate) gate: Gate, + /// Delete/cancel will trigger this, background tasks should drop out as soon as it fires pub(crate) cancel: CancellationToken, @@ -508,6 +513,7 @@ impl Timeline { mutex: RwLock::new(shared_state), walsenders: WalSenders::new(walreceivers.clone()), walreceivers, + gate: Default::default(), cancel: CancellationToken::default(), manager_ctl: ManagerCtl::new(), broker_active: AtomicBool::new(false), @@ -533,56 +539,6 @@ impl Timeline { )) } - /// Initialize fresh timeline on disk and start background tasks. If init - /// fails, timeline is cancelled and cannot be used anymore. - /// - /// Init is transactional, so if it fails, created files will be deleted, - /// and state on disk should remain unchanged. - pub async fn init_new( - self: &Arc, - shared_state: &mut WriteGuardSharedState<'_>, - conf: &SafeKeeperConf, - broker_active_set: Arc, - partial_backup_rate_limiter: RateLimiter, - ) -> Result<()> { - match fs::metadata(&self.timeline_dir).await { - Ok(_) => { - // Timeline directory exists on disk, we should leave state unchanged - // and return error. - bail!(TimelineError::Invalid(self.ttid)); - } - Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} - Err(e) => { - return Err(e.into()); - } - } - - // Create timeline directory. - fs::create_dir_all(&self.timeline_dir).await?; - - // Write timeline to disk and start background tasks. - if let Err(e) = shared_state.sk.state_mut().flush().await { - // Bootstrap failed, cancel timeline and remove timeline directory. - self.cancel(shared_state); - - if let Err(fs_err) = fs::remove_dir_all(&self.timeline_dir).await { - warn!( - "failed to remove timeline {} directory after bootstrap failure: {}", - self.ttid, fs_err - ); - } - - return Err(e); - } - self.bootstrap( - shared_state, - conf, - broker_active_set, - partial_backup_rate_limiter, - ); - Ok(()) - } - /// Bootstrap new or existing timeline starting background tasks. pub fn bootstrap( self: &Arc, @@ -593,33 +549,61 @@ impl Timeline { ) { let (tx, rx) = self.manager_ctl.bootstrap_manager(); + let Ok(gate_guard) = self.gate.enter() else { + // Init raced with shutdown + return; + }; + // Start manager task which will monitor timeline state and update // background tasks. - tokio::spawn(timeline_manager::main_task( - ManagerTimeline { tli: self.clone() }, - conf.clone(), - broker_active_set, - tx, - rx, - partial_backup_rate_limiter, - )); + tokio::spawn({ + let this = self.clone(); + let conf = conf.clone(); + async move { + let _gate_guard = gate_guard; + timeline_manager::main_task( + ManagerTimeline { tli: this }, + conf, + broker_active_set, + tx, + rx, + partial_backup_rate_limiter, + ) + .await + } + }); + } + + /// Background timeline activities (which hold Timeline::gate) will no + /// longer run once this function completes. + pub async fn shutdown(&self) { + info!("timeline {} shutting down", self.ttid); + self.cancel.cancel(); + + // Wait for any concurrent tasks to stop using this timeline, to avoid e.g. attempts + // to read deleted files. + self.gate.close().await; } /// Delete timeline from disk completely, by removing timeline directory. - /// Background timeline activities will stop eventually. /// /// Also deletes WAL in s3. Might fail if e.g. s3 is unavailable, but /// deletion API endpoint is retriable. + /// + /// Timeline must be in shut-down state (i.e. call [`Self::shutdown`] first) pub async fn delete( &self, shared_state: &mut WriteGuardSharedState<'_>, only_local: bool, ) -> Result { - self.cancel(shared_state); + // Assert that [`Self::shutdown`] was already called + assert!(self.cancel.is_cancelled()); + assert!(self.gate.close_complete()); + + // Close associated FDs. Nobody will be able to touch timeline data once + // it is cancelled, so WAL storage won't be opened again. + shared_state.sk.close_wal_store(); - // TODO: It's better to wait for s3 offloader termination before - // removing data from s3. Though since s3 doesn't have transactions it - // still wouldn't guarantee absense of data after removal. let conf = GlobalTimelines::get_global_config(); if !only_local && conf.is_wal_backup_enabled() { // Note: we concurrently delete remote storage data from multiple @@ -631,16 +615,6 @@ impl Timeline { Ok(dir_existed) } - /// Cancel timeline to prevent further usage. Background tasks will stop - /// eventually after receiving cancellation signal. - fn cancel(&self, shared_state: &mut WriteGuardSharedState<'_>) { - info!("timeline {} is cancelled", self.ttid); - self.cancel.cancel(); - // Close associated FDs. Nobody will be able to touch timeline data once - // it is cancelled, so WAL storage won't be opened again. - shared_state.sk.close_wal_store(); - } - /// Returns if timeline is cancelled. pub fn is_cancelled(&self) -> bool { self.cancel.is_cancelled() diff --git a/safekeeper/src/timeline_guard.rs b/safekeeper/src/timeline_guard.rs index 1ddac573d2..9102a40df8 100644 --- a/safekeeper/src/timeline_guard.rs +++ b/safekeeper/src/timeline_guard.rs @@ -7,6 +7,7 @@ use std::collections::HashSet; use tracing::debug; +use utils::sync::gate::GateGuard; use crate::timeline_manager::ManagerCtlMessage; @@ -16,6 +17,12 @@ pub struct GuardId(u64); pub struct ResidenceGuard { manager_tx: tokio::sync::mpsc::UnboundedSender, guard_id: GuardId, + + /// [`ResidenceGuard`] represents a guarantee that a timeline's data remains resident, + /// which by extension also means the timeline is not shut down (since after shut down + /// our data may be deleted). Therefore everyone holding a residence guard must also + /// hold a guard on [`crate::timeline::Timeline::gate`] + _gate_guard: GateGuard, } impl Drop for ResidenceGuard { @@ -52,7 +59,8 @@ impl AccessService { self.guards.is_empty() } - pub(crate) fn create_guard(&mut self) -> ResidenceGuard { + /// `timeline_gate_guard` is a guarantee that the timeline is not shut down + pub(crate) fn create_guard(&mut self, timeline_gate_guard: GateGuard) -> ResidenceGuard { let guard_id = self.next_guard_id; self.next_guard_id += 1; self.guards.insert(guard_id); @@ -63,6 +71,7 @@ impl AccessService { ResidenceGuard { manager_tx: self.manager_tx.clone(), guard_id, + _gate_guard: timeline_gate_guard, } } diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index e9fed21bf5..c02fb904cf 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -266,8 +266,10 @@ pub async fn main_task( // Start recovery task which always runs on the timeline. if !mgr.is_offloaded && mgr.conf.peer_recovery_enabled { - let tli = mgr.wal_resident_timeline(); - mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone()))); + // Recovery task is only spawned if we can get a residence guard (i.e. timeline is not already shutting down) + if let Ok(tli) = mgr.wal_resident_timeline() { + mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone()))); + } } // If timeline is evicted, reflect that in the metric. @@ -375,6 +377,13 @@ pub async fn main_task( // shutdown background tasks if mgr.conf.is_wal_backup_enabled() { + if let Some(backup_task) = mgr.backup_task.take() { + // If we fell through here, then the timeline is shutting down. This is important + // because otherwise joining on the wal_backup handle might hang. + assert!(mgr.tli.cancel.is_cancelled()); + + backup_task.join().await; + } wal_backup::update_task(&mut mgr, false, &last_state).await; } @@ -442,10 +451,18 @@ impl Manager { /// Get a WalResidentTimeline. /// Manager code must use this function instead of one from `Timeline` /// directly, because it will deadlock. - pub(crate) fn wal_resident_timeline(&mut self) -> WalResidentTimeline { + /// + /// This function is fallible because the guard may not be created if the timeline is + /// shutting down. + pub(crate) fn wal_resident_timeline(&mut self) -> anyhow::Result { assert!(!self.is_offloaded); - let guard = self.access_service.create_guard(); - WalResidentTimeline::new(self.tli.clone(), guard) + let guard = self.access_service.create_guard( + self.tli + .gate + .enter() + .map_err(|_| anyhow::anyhow!("Timeline shutting down"))?, + ); + Ok(WalResidentTimeline::new(self.tli.clone(), guard)) } /// Get a snapshot of the timeline state. @@ -559,6 +576,11 @@ impl Manager { if removal_horizon_segno > self.last_removed_segno { // we need to remove WAL + let Ok(timeline_gate_guard) = self.tli.gate.enter() else { + tracing::info!("Timeline shutdown, not spawning WAL removal task"); + return; + }; + let remover = match self.tli.read_shared_state().await.sk { StateSK::Loaded(ref sk) => { crate::wal_storage::Storage::remove_up_to(&sk.wal_store, removal_horizon_segno) @@ -573,6 +595,8 @@ impl Manager { self.wal_removal_task = Some(tokio::spawn( async move { + let _timeline_gate_guard = timeline_gate_guard; + remover.await?; Ok(removal_horizon_segno) } @@ -619,10 +643,15 @@ impl Manager { return; } + let Ok(resident) = self.wal_resident_timeline() else { + // Shutting down + return; + }; + // Get WalResidentTimeline and start partial backup task. let cancel = CancellationToken::new(); let handle = tokio::spawn(wal_backup_partial::main_task( - self.wal_resident_timeline(), + resident, self.conf.clone(), self.global_rate_limiter.clone(), cancel.clone(), @@ -664,7 +693,7 @@ impl Manager { self.partial_backup_task = None; } - let tli = self.wal_resident_timeline(); + let tli = self.wal_resident_timeline()?; let mut partial_backup = PartialBackup::new(tli, self.conf.clone()).await; // Reset might fail e.g. when cfile is already reset but s3 removal // failed, so set manager state to None beforehand. In any case caller @@ -688,7 +717,12 @@ impl Manager { let guard = if self.is_offloaded { Err(anyhow::anyhow!("timeline is offloaded, can't get a guard")) } else { - Ok(self.access_service.create_guard()) + match self.tli.gate.enter() { + Ok(gate_guard) => Ok(self.access_service.create_guard(gate_guard)), + Err(_) => Err(anyhow::anyhow!( + "timeline is shutting down, can't get a guard" + )), + } }; if tx.send(guard).is_err() { @@ -699,7 +733,10 @@ impl Manager { let result = if self.is_offloaded { None } else { - Some(self.access_service.create_guard()) + match self.tli.gate.enter() { + Ok(gate_guard) => Some(self.access_service.create_guard(gate_guard)), + Err(_) => None, + } }; if tx.send(result).is_err() { diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs index 33d94da034..067945fd5f 100644 --- a/safekeeper/src/timelines_global_map.rs +++ b/safekeeper/src/timelines_global_map.rs @@ -457,10 +457,12 @@ impl GlobalTimelines { Ok(timeline) => { let was_active = timeline.broker_active.load(Ordering::Relaxed); + info!("deleting timeline {}, only_local={}", ttid, only_local); + timeline.shutdown().await; + // Take a lock and finish the deletion holding this mutex. let mut shared_state = timeline.write_shared_state().await; - info!("deleting timeline {}, only_local={}", ttid, only_local); let dir_existed = timeline.delete(&mut shared_state, only_local).await?; Ok(TimelineDeleteForceResult { diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs index 6c87e5a926..34b5dbeaa1 100644 --- a/safekeeper/src/wal_backup.rs +++ b/safekeeper/src/wal_backup.rs @@ -25,7 +25,6 @@ use tokio::fs::File; use tokio::select; use tokio::sync::mpsc::{self, Receiver, Sender}; use tokio::sync::{watch, OnceCell}; -use tokio::time::sleep; use tracing::*; use utils::{id::TenantTimelineId, lsn::Lsn}; @@ -46,6 +45,14 @@ pub struct WalBackupTaskHandle { handle: JoinHandle<()>, } +impl WalBackupTaskHandle { + pub(crate) async fn join(self) { + if let Err(e) = self.handle.await { + error!("WAL backup task panicked: {}", e); + } + } +} + /// Do we have anything to upload to S3, i.e. should safekeepers run backup activity? pub(crate) fn is_wal_backup_required( wal_seg_size: usize, @@ -74,11 +81,12 @@ pub(crate) async fn update_task(mgr: &mut Manager, need_backup: bool, state: &St let (shutdown_tx, shutdown_rx) = mpsc::channel(1); - let async_task = backup_task_main( - mgr.wal_resident_timeline(), - mgr.conf.backup_parallel_jobs, - shutdown_rx, - ); + let Ok(resident) = mgr.wal_resident_timeline() else { + info!("Timeline shut down"); + return; + }; + + let async_task = backup_task_main(resident, mgr.conf.backup_parallel_jobs, shutdown_rx); let handle = if mgr.conf.current_thread_runtime { tokio::spawn(async_task) @@ -108,9 +116,7 @@ async fn shut_down_task(entry: &mut Option) { // Tell the task to shutdown. Error means task exited earlier, that's ok. let _ = wb_handle.shutdown_tx.send(()).await; // Await the task itself. TODO: restart panicked tasks earlier. - if let Err(e) = wb_handle.handle.await { - warn!("WAL backup task panicked: {}", e); - } + wb_handle.join().await; } } @@ -214,6 +220,7 @@ async fn backup_task_main( let _guard = WAL_BACKUP_TASKS.guard(); info!("started"); + let cancel = tli.tli.cancel.clone(); let mut wb = WalBackupTask { wal_seg_size: tli.get_wal_seg_size().await, commit_lsn_watch_rx: tli.get_commit_lsn_watch_rx(), @@ -230,25 +237,34 @@ async fn backup_task_main( _ = wb.run() => {} _ = shutdown_rx.recv() => { canceled = true; + }, + _ = cancel.cancelled() => { + canceled = true; } } info!("task {}", if canceled { "canceled" } else { "terminated" }); } impl WalBackupTask { + /// This function must be called from a select! that also respects self.timeline's + /// cancellation token. This is done in [`backup_task_main`]. + /// + /// The future returned by this function is safe to drop at any time because it + /// does not write to local disk. async fn run(&mut self) { let mut backup_lsn = Lsn(0); let mut retry_attempt = 0u32; // offload loop - loop { + while !self.timeline.cancel.is_cancelled() { if retry_attempt == 0 { // wait for new WAL to arrive if let Err(e) = self.commit_lsn_watch_rx.changed().await { - // should never happen, as we hold Arc to timeline. + // should never happen, as we hold Arc to timeline and transmitter's lifetime + // is within Timeline's error!("commit_lsn watch shut down: {:?}", e); return; - } + }; } else { // or just sleep if we errored previously let mut retry_delay = UPLOAD_FAILURE_RETRY_MAX_MS; @@ -256,7 +272,7 @@ impl WalBackupTask { { retry_delay = min(retry_delay, backoff_delay); } - sleep(Duration::from_millis(retry_delay)).await; + tokio::time::sleep(Duration::from_millis(retry_delay)).await; } let commit_lsn = *self.commit_lsn_watch_rx.borrow(); diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs index c3bb6cd12c..e338d70731 100644 --- a/safekeeper/src/wal_storage.rs +++ b/safekeeper/src/wal_storage.rs @@ -113,6 +113,13 @@ pub struct PhysicalStorage { /// non-aligned chunks of data. write_record_lsn: Lsn, + /// The last LSN flushed to disk. May be in the middle of a record. + /// + /// NB: when the rest of the system refers to `flush_lsn`, it usually + /// actually refers to `flush_record_lsn`. This ambiguity can be dangerous + /// and should be resolved. + flush_lsn: Lsn, + /// The LSN of the last WAL record flushed to disk. flush_record_lsn: Lsn, @@ -211,6 +218,7 @@ impl PhysicalStorage { system_id: state.server.system_id, write_lsn, write_record_lsn: write_lsn, + flush_lsn, flush_record_lsn: flush_lsn, decoder: WalStreamDecoder::new(write_lsn, state.server.pg_version / 10000), file: None, @@ -295,8 +303,9 @@ impl PhysicalStorage { } } - /// Write WAL bytes, which are known to be located in a single WAL segment. - async fn write_in_segment(&mut self, segno: u64, xlogoff: usize, buf: &[u8]) -> Result<()> { + /// Write WAL bytes, which are known to be located in a single WAL segment. Returns true if the + /// segment was completed, closed, and flushed to disk. + async fn write_in_segment(&mut self, segno: u64, xlogoff: usize, buf: &[u8]) -> Result { let mut file = if let Some(file) = self.file.take() { file } else { @@ -320,20 +329,24 @@ impl PhysicalStorage { let (wal_file_path, wal_file_partial_path) = wal_file_paths(&self.timeline_dir, segno, self.wal_seg_size); fs::rename(wal_file_partial_path, wal_file_path).await?; + Ok(true) } else { // otherwise, file can be reused later self.file = Some(file); + Ok(false) } - - Ok(()) } /// Writes WAL to the segment files, until everything is writed. If some segments /// are fully written, they are flushed to disk. The last (partial) segment can /// be flushed separately later. /// - /// Updates `write_lsn`. + /// Updates `write_lsn` and `flush_lsn`. async fn write_exact(&mut self, pos: Lsn, mut buf: &[u8]) -> Result<()> { + // TODO: this shouldn't be possible, except possibly with write_lsn == 0. + // Rename this method to `append_exact`, and make it append-only, removing + // the `pos` parameter and this check. For this reason, we don't update + // `flush_lsn` here. if self.write_lsn != pos { // need to flush the file before discarding it if let Some(file) = self.file.take() { @@ -355,9 +368,13 @@ impl PhysicalStorage { buf.len() }; - self.write_in_segment(segno, xlogoff, &buf[..bytes_write]) + let flushed = self + .write_in_segment(segno, xlogoff, &buf[..bytes_write]) .await?; self.write_lsn += bytes_write as u64; + if flushed { + self.flush_lsn = self.write_lsn; + } buf = &buf[bytes_write..]; } @@ -371,6 +388,9 @@ impl Storage for PhysicalStorage { self.write_lsn } /// flush_lsn returns LSN of last durably stored WAL record. + /// + /// TODO: flush_lsn() returns flush_record_lsn, but write_lsn() returns write_lsn: confusing. + #[allow(clippy::misnamed_getters)] fn flush_lsn(&self) -> Lsn { self.flush_record_lsn } @@ -424,8 +444,9 @@ impl Storage for PhysicalStorage { self.metrics.observe_write_seconds(write_seconds); self.metrics.observe_write_bytes(buf.len()); - // figure out last record's end lsn for reporting (if we got the - // whole record) + // Figure out the last record's end LSN and update `write_record_lsn` + // (if we got a whole record). The write may also have closed and + // flushed a segment, so update `flush_record_lsn` as well. if self.decoder.available() != startpos { info!( "restart decoder from {} to {}", @@ -436,12 +457,15 @@ impl Storage for PhysicalStorage { self.decoder = WalStreamDecoder::new(startpos, pg_version); } self.decoder.feed_bytes(buf); - loop { - match self.decoder.poll_decode()? { - None => break, // no full record yet - Some((lsn, _rec)) => { - self.write_record_lsn = lsn; - } + + if self.write_record_lsn <= self.flush_lsn { + // We may have flushed a previously written record. + self.flush_record_lsn = self.write_record_lsn; + } + while let Some((lsn, _rec)) = self.decoder.poll_decode()? { + self.write_record_lsn = lsn; + if lsn <= self.flush_lsn { + self.flush_record_lsn = lsn; } } @@ -458,19 +482,17 @@ impl Storage for PhysicalStorage { self.fdatasync_file(&unflushed_file).await?; self.file = Some(unflushed_file); } else { - // We have unflushed data (write_lsn != flush_lsn), but no file. - // This should only happen if last file was fully written and flushed, - // but haven't updated flush_lsn yet. - if self.write_lsn.segment_offset(self.wal_seg_size) != 0 { - bail!( - "unexpected unflushed data with no open file, write_lsn={}, flush_lsn={}", - self.write_lsn, - self.flush_record_lsn - ); - } + // We have unflushed data (write_lsn != flush_lsn), but no file. This + // shouldn't happen, since the segment is flushed on close. + bail!( + "unexpected unflushed data with no open file, write_lsn={}, flush_lsn={}", + self.write_lsn, + self.flush_record_lsn + ); } // everything is flushed now, let's update flush_lsn + self.flush_lsn = self.write_lsn; self.flush_record_lsn = self.write_record_lsn; Ok(()) } @@ -517,6 +539,7 @@ impl Storage for PhysicalStorage { self.pending_wal_truncation = true; self.write_lsn = end_pos; + self.flush_lsn = end_pos; self.write_record_lsn = end_pos; self.flush_record_lsn = end_pos; diff --git a/scripts/ingest_perf_test_result.py b/scripts/ingest_perf_test_result.py index 40071c01b0..804f8a3cde 100644 --- a/scripts/ingest_perf_test_result.py +++ b/scripts/ingest_perf_test_result.py @@ -25,7 +25,8 @@ CREATE TABLE IF NOT EXISTS perf_test_results ( metric_value NUMERIC, metric_unit VARCHAR(10), metric_report_type TEXT, - recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW() + recorded_at_timestamp TIMESTAMP WITH TIME ZONE DEFAULT NOW(), + labels JSONB with default '{}' ) """ @@ -91,6 +92,7 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int) "metric_unit": metric["unit"], "metric_report_type": metric["report"], "recorded_at_timestamp": datetime.utcfromtimestamp(recorded_at_timestamp), + "labels": json.dumps(metric.get("labels")), } args_list.append(values) @@ -105,7 +107,8 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int) metric_value, metric_unit, metric_report_type, - recorded_at_timestamp + recorded_at_timestamp, + labels ) VALUES %s """, args_list, @@ -117,7 +120,8 @@ def ingest_perf_test_result(cursor, data_file: Path, recorded_at_timestamp: int) %(metric_value)s, %(metric_unit)s, %(metric_report_type)s, - %(recorded_at_timestamp)s + %(recorded_at_timestamp)s, + %(labels)s )""", ) return len(args_list) diff --git a/storage_scrubber/src/find_large_objects.rs b/storage_scrubber/src/find_large_objects.rs index 88e36af560..95d3af1453 100644 --- a/storage_scrubber/src/find_large_objects.rs +++ b/storage_scrubber/src/find_large_objects.rs @@ -106,9 +106,9 @@ pub async fn find_large_objects( } } - let bucket_name = target.bucket_name(); + let desc_str = target.desc_str(); tracing::info!( - "Scan of {bucket_name} finished. Scanned {tenant_ctr} shards. objects={object_ctr}, found={}.", + "Scan of {desc_str} finished. Scanned {tenant_ctr} shards. objects={object_ctr}, found={}.", objects.len() ); Ok(LargeObjectListing { objects }) diff --git a/storage_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs index 863dbf960d..91668a42a7 100644 --- a/storage_scrubber/src/garbage.rs +++ b/storage_scrubber/src/garbage.rs @@ -177,7 +177,7 @@ async fn find_garbage_inner( })); // Enumerate Tenants in S3, and check if each one exists in Console - tracing::info!("Finding all tenants in bucket {}...", bucket_config.bucket); + tracing::info!("Finding all tenants in {}...", bucket_config.desc_str()); let tenants = stream_tenants(&remote_client, &target); let tenants_checked = tenants.map_ok(|t| { let api_client = cloud_admin_api_client.clone(); @@ -524,7 +524,7 @@ pub async fn purge_garbage( init_remote(garbage_list.bucket_config.clone(), garbage_list.node_kind).await?; assert_eq!( - &garbage_list.bucket_config.bucket, + garbage_list.bucket_config.bucket_name().unwrap(), remote_client.bucket_name().unwrap() ); diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs index de0857cb5f..1fe4fc58cd 100644 --- a/storage_scrubber/src/lib.rs +++ b/storage_scrubber/src/lib.rs @@ -29,8 +29,7 @@ use pageserver::tenant::TENANTS_SEGMENT_NAME; use pageserver_api::shard::TenantShardId; use remote_storage::{ DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig, - RemoteStorageKind, S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, - DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT, + RemoteStorageKind, S3Config, }; use reqwest::Url; use serde::{Deserialize, Serialize}; @@ -48,7 +47,7 @@ const CLOUD_ADMIN_API_TOKEN_ENV_VAR: &str = "CLOUD_ADMIN_API_TOKEN"; #[derive(Debug, Clone)] pub struct S3Target { - pub bucket_name: String, + pub desc_str: String, /// This `prefix_in_bucket` is only equal to the PS/SK config of the same /// name for the RootTarget: other instances of S3Target will have prefix_in_bucket /// with extra parts. @@ -172,7 +171,7 @@ impl RootTarget { }; S3Target { - bucket_name: root.bucket_name.clone(), + desc_str: root.desc_str.clone(), prefix_in_bucket: format!( "{}/{TENANTS_SEGMENT_NAME}/{tenant_id}", root.prefix_in_bucket @@ -209,10 +208,10 @@ impl RootTarget { } } - pub fn bucket_name(&self) -> &str { + pub fn desc_str(&self) -> &str { match self { - Self::Pageserver(root) => &root.bucket_name, - Self::Safekeeper(root) => &root.bucket_name, + Self::Pageserver(root) => &root.desc_str, + Self::Safekeeper(root) => &root.desc_str, } } @@ -230,24 +229,61 @@ pub fn remote_timeline_path_id(id: &TenantShardTimelineId) -> RemotePath { #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(deny_unknown_fields)] -pub struct BucketConfig { - pub region: String, - pub bucket: String, - pub prefix_in_bucket: Option, -} +pub struct BucketConfig(RemoteStorageConfig); impl BucketConfig { pub fn from_env() -> anyhow::Result { - let region = env::var("REGION").context("'REGION' param retrieval")?; - let bucket = env::var("BUCKET").context("'BUCKET' param retrieval")?; - let prefix_in_bucket = env::var("BUCKET_PREFIX").ok(); - - Ok(Self { - region, - bucket, - prefix_in_bucket, - }) + if let Ok(legacy) = Self::from_env_legacy() { + return Ok(legacy); + } + let config_toml = + env::var("REMOTE_STORAGE_CONFIG").context("'REMOTE_STORAGE_CONFIG' retrieval")?; + let remote_config = RemoteStorageConfig::from_toml_str(&config_toml)?; + Ok(BucketConfig(remote_config)) } + + fn from_env_legacy() -> anyhow::Result { + let bucket_region = env::var("REGION").context("'REGION' param retrieval")?; + let bucket_name = env::var("BUCKET").context("'BUCKET' param retrieval")?; + let prefix_in_bucket = env::var("BUCKET_PREFIX").ok(); + let endpoint = env::var("AWS_ENDPOINT_URL").ok(); + // Create a json object which we then deserialize so that we don't + // have to repeat all of the S3Config fields. + let s3_config_json = serde_json::json!({ + "bucket_name": bucket_name, + "bucket_region": bucket_region, + "prefix_in_bucket": prefix_in_bucket, + "endpoint": endpoint, + }); + let config: RemoteStorageConfig = serde_json::from_value(s3_config_json)?; + Ok(BucketConfig(config)) + } + pub fn desc_str(&self) -> String { + match &self.0.storage { + RemoteStorageKind::LocalFs { local_path } => { + format!("local path {local_path}") + } + RemoteStorageKind::AwsS3(config) => format!( + "bucket {}, region {}", + config.bucket_name, config.bucket_region + ), + RemoteStorageKind::AzureContainer(config) => format!( + "bucket {}, storage account {:?}, region {}", + config.container_name, config.storage_account, config.container_region + ), + } + } + pub fn bucket_name(&self) -> Option<&str> { + self.0.storage.bucket_name() + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct BucketConfigLegacy { + pub region: String, + pub bucket: String, + pub prefix_in_bucket: Option, } pub struct ControllerClientConfig { @@ -337,13 +373,9 @@ fn default_prefix_in_bucket(node_kind: NodeKind) -> &'static str { } } -fn make_root_target( - bucket_name: String, - prefix_in_bucket: String, - node_kind: NodeKind, -) -> RootTarget { +fn make_root_target(desc_str: String, prefix_in_bucket: String, node_kind: NodeKind) -> RootTarget { let s3_target = S3Target { - bucket_name, + desc_str, prefix_in_bucket, delimiter: "/".to_string(), }; @@ -354,15 +386,15 @@ fn make_root_target( } async fn init_remote_s3( - bucket_config: BucketConfig, + bucket_config: S3Config, node_kind: NodeKind, ) -> anyhow::Result<(Arc, RootTarget)> { - let bucket_region = Region::new(bucket_config.region); + let bucket_region = Region::new(bucket_config.bucket_region); let s3_client = Arc::new(init_s3_client(bucket_region).await); let default_prefix = default_prefix_in_bucket(node_kind).to_string(); let s3_root = make_root_target( - bucket_config.bucket, + bucket_config.bucket_name, bucket_config.prefix_in_bucket.unwrap_or(default_prefix), node_kind, ); @@ -371,33 +403,28 @@ async fn init_remote_s3( } async fn init_remote( - bucket_config: BucketConfig, + mut storage_config: BucketConfig, node_kind: NodeKind, ) -> anyhow::Result<(GenericRemoteStorage, RootTarget)> { - let endpoint = env::var("AWS_ENDPOINT_URL").ok(); + let desc_str = storage_config.desc_str(); + let default_prefix = default_prefix_in_bucket(node_kind).to_string(); - let prefix_in_bucket = Some(bucket_config.prefix_in_bucket.unwrap_or(default_prefix)); - let storage = S3Config { - bucket_name: bucket_config.bucket.clone(), - bucket_region: bucket_config.region, - prefix_in_bucket, - endpoint, - concurrency_limit: DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT - .try_into() - .unwrap(), - max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, - upload_storage_class: None, - }; - let storage_config = RemoteStorageConfig { - storage: RemoteStorageKind::AwsS3(storage), - timeout: RemoteStorageConfig::DEFAULT_TIMEOUT, - }; + + match &mut storage_config.0.storage { + RemoteStorageKind::AwsS3(ref mut config) => { + config.prefix_in_bucket.get_or_insert(default_prefix); + } + RemoteStorageKind::AzureContainer(ref mut config) => { + config.prefix_in_container.get_or_insert(default_prefix); + } + RemoteStorageKind::LocalFs { .. } => (), + } // We already pass the prefix to the remote client above let prefix_in_root_target = String::new(); - let root_target = make_root_target(bucket_config.bucket, prefix_in_root_target, node_kind); + let root_target = make_root_target(desc_str, prefix_in_root_target, node_kind); - let client = GenericRemoteStorage::from_config(&storage_config).await?; + let client = GenericRemoteStorage::from_config(&storage_config.0).await?; Ok((client, root_target)) } @@ -469,7 +496,7 @@ async fn list_objects_with_retries( } warn!( "list_objects_v2 query failed: bucket_name={}, prefix={}, delimiter={}, error={}", - s3_target.bucket_name, + remote_client.bucket_name().unwrap_or_default(), s3_target.prefix_in_bucket, s3_target.delimiter, DisplayErrorContext(e), diff --git a/storage_scrubber/src/main.rs b/storage_scrubber/src/main.rs index ee816534c6..0ffb570984 100644 --- a/storage_scrubber/src/main.rs +++ b/storage_scrubber/src/main.rs @@ -140,7 +140,7 @@ async fn main() -> anyhow::Result<()> { "{}_{}_{}_{}.log", std::env::args().next().unwrap(), command_log_name, - bucket_config.bucket, + bucket_config.bucket_name().unwrap_or("nobucket"), chrono::Utc::now().format("%Y_%m_%d__%H_%M_%S") )); @@ -191,13 +191,7 @@ async fn main() -> anyhow::Result<()> { // Strictly speaking an empty bucket is a valid bucket, but if someone ran the // scrubber they were likely expecting to scan something, and if we see no timelines // at all then it's likely due to some configuration issues like a bad prefix - bail!( - "No timelines found in bucket {} prefix {}", - bucket_config.bucket, - bucket_config - .prefix_in_bucket - .unwrap_or("".to_string()) - ); + bail!("No timelines found in {}", bucket_config.desc_str()); } Ok(()) } else { @@ -396,13 +390,7 @@ pub async fn scan_pageserver_metadata_cmd( // Strictly speaking an empty bucket is a valid bucket, but if someone ran the // scrubber they were likely expecting to scan something, and if we see no timelines // at all then it's likely due to some configuration issues like a bad prefix - tracing::error!( - "No timelines found in bucket {} prefix {}", - bucket_config.bucket, - bucket_config - .prefix_in_bucket - .unwrap_or("".to_string()) - ); + tracing::error!("No timelines found in {}", bucket_config.desc_str()); if exit_code { std::process::exit(1); } diff --git a/storage_scrubber/src/metadata_stream.rs b/storage_scrubber/src/metadata_stream.rs index f896cff2d5..efda7c213d 100644 --- a/storage_scrubber/src/metadata_stream.rs +++ b/storage_scrubber/src/metadata_stream.rs @@ -60,7 +60,7 @@ pub async fn stream_tenant_shards<'a>( first_part .parse::() - .with_context(|| format!("Incorrect entry id str: {first_part}")) + .with_context(|| format!("Incorrect tenant entry id str: {first_part}")) }) .collect::>(); @@ -114,9 +114,10 @@ pub async fn stream_tenant_timelines<'a>( prefix.get_path().as_str().strip_prefix(prefix_str) }) .map(|entry_id_str| { - entry_id_str + let first_part = entry_id_str.split('/').next().unwrap(); + first_part .parse::() - .with_context(|| format!("Incorrect entry id str: {entry_id_str}")) + .with_context(|| format!("Incorrect timeline entry id str: {entry_id_str}")) }); for i in new_entry_ids { diff --git a/storage_scrubber/src/scan_safekeeper_metadata.rs b/storage_scrubber/src/scan_safekeeper_metadata.rs index 403b4590a8..0a4d4266a0 100644 --- a/storage_scrubber/src/scan_safekeeper_metadata.rs +++ b/storage_scrubber/src/scan_safekeeper_metadata.rs @@ -84,10 +84,7 @@ pub async fn scan_safekeeper_metadata( bucket_config: BucketConfig, db_or_list: DatabaseOrList, ) -> anyhow::Result { - info!( - "checking bucket {}, region {}", - bucket_config.bucket, bucket_config.region - ); + info!("checking {}", bucket_config.desc_str()); let (remote_client, target) = init_remote(bucket_config, NodeKind::Safekeeper).await?; let console_config = ConsoleConfig::from_env()?; diff --git a/storage_scrubber/src/tenant_snapshot.rs b/storage_scrubber/src/tenant_snapshot.rs index bb4079b5f4..39e0b5c9b4 100644 --- a/storage_scrubber/src/tenant_snapshot.rs +++ b/storage_scrubber/src/tenant_snapshot.rs @@ -16,7 +16,7 @@ use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata; use pageserver::tenant::storage_layer::LayerName; use pageserver::tenant::IndexPart; use pageserver_api::shard::TenantShardId; -use remote_storage::GenericRemoteStorage; +use remote_storage::{GenericRemoteStorage, S3Config}; use utils::generation::Generation; use utils::id::TenantId; @@ -24,6 +24,7 @@ pub struct SnapshotDownloader { s3_client: Arc, s3_root: RootTarget, bucket_config: BucketConfig, + bucket_config_s3: S3Config, tenant_id: TenantId, output_path: Utf8PathBuf, concurrency: usize, @@ -36,12 +37,17 @@ impl SnapshotDownloader { output_path: Utf8PathBuf, concurrency: usize, ) -> anyhow::Result { + let bucket_config_s3 = match &bucket_config.0.storage { + remote_storage::RemoteStorageKind::AwsS3(config) => config.clone(), + _ => panic!("only S3 configuration is supported for snapshot downloading"), + }; let (s3_client, s3_root) = - init_remote_s3(bucket_config.clone(), NodeKind::Pageserver).await?; + init_remote_s3(bucket_config_s3.clone(), NodeKind::Pageserver).await?; Ok(Self { s3_client, s3_root, bucket_config, + bucket_config_s3, tenant_id, output_path, concurrency, @@ -87,7 +93,7 @@ impl SnapshotDownloader { let versions = self .s3_client .list_object_versions() - .bucket(self.bucket_config.bucket.clone()) + .bucket(self.bucket_config_s3.bucket_name.clone()) .prefix(&remote_layer_path) .send() .await?; @@ -96,7 +102,7 @@ impl SnapshotDownloader { }; download_object_to_file_s3( &self.s3_client, - &self.bucket_config.bucket, + &self.bucket_config_s3.bucket_name, &remote_layer_path, version.version_id.as_deref(), &local_path, diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index d3419bd8b1..8e68775471 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -256,12 +256,17 @@ class NeonBenchmarker: metric_value: float, unit: str, report: MetricReport, + labels: Optional[ + dict[str, str] + ] = None, # use this to associate additional key/value pairs in json format for associated Neon object IDs like project ID with the metric ): """ Record a benchmark result. """ # just to namespace the value name = f"{self.PROPERTY_PREFIX}_{metric_name}" + if labels is None: + labels = {} self.property_recorder( name, { @@ -269,6 +274,7 @@ class NeonBenchmarker: "value": metric_value, "unit": unit, "report": report, + "labels": labels, }, ) diff --git a/test_runner/fixtures/pageserver/allowed_errors.py b/test_runner/fixtures/pageserver/allowed_errors.py index d05704c8e0..5059039678 100755 --- a/test_runner/fixtures/pageserver/allowed_errors.py +++ b/test_runner/fixtures/pageserver/allowed_errors.py @@ -25,8 +25,14 @@ def scan_pageserver_log_for_errors( # It's an ERROR or WARN. Is it in the allow-list? for a in allowed_errors: - if re.match(a, line): - break + try: + if re.match(a, line): + break + # We can switch `re.error` with `re.PatternError` after 3.13 + # https://docs.python.org/3/library/re.html#re.PatternError + except re.error: + print(f"Invalid regex: '{a}'", file=sys.stderr) + raise else: errors.append((lineno, line)) return errors diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index d1a9b5921a..01583757fa 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -665,6 +665,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter): force_l0_compaction=False, wait_until_uploaded=False, enhanced_gc_bottom_most_compaction=False, + body: Optional[dict[str, Any]] = None, ): self.is_testing_enabled_or_skip() query = {} @@ -683,6 +684,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter): res = self.put( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact", params=query, + json=body, ) log.info(f"Got compact request response code: {res.status_code}") self.verbose_error(res) diff --git a/test_runner/fixtures/pg_version.py b/test_runner/fixtures/pg_version.py index 4feab52c43..798db1e8d9 100644 --- a/test_runner/fixtures/pg_version.py +++ b/test_runner/fixtures/pg_version.py @@ -22,8 +22,8 @@ class PgVersion(str, enum.Enum): V16 = "16" V17 = "17" - # Default Postgres Version for tests that don't really depend on Postgres itself - DEFAULT = V16 + # Postgres Version for tests that uses `fixtures.utils.run_only_on_default_postgres` + DEFAULT = V17 # Instead of making version an optional parameter in methods, we can use this fake entry # to explicitly rely on the default server version (could be different from pg_version fixture value) diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index 96a651f0db..bb45385ea6 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -495,8 +495,14 @@ def scan_log_for_errors(input: Iterable[str], allowed_errors: list[str]) -> list # It's an ERROR or WARN. Is it in the allow-list? for a in allowed_errors: - if re.match(a, line): - break + try: + if re.match(a, line): + break + # We can switch `re.error` with `re.PatternError` after 3.13 + # https://docs.python.org/3/library/re.html#re.PatternError + except re.error: + log.error(f"Invalid regex: '{a}'") + raise else: errors.append((lineno, line)) return errors diff --git a/test_runner/performance/test_perf_ingest_using_pgcopydb.py b/test_runner/performance/test_perf_ingest_using_pgcopydb.py new file mode 100644 index 0000000000..2f4574ba88 --- /dev/null +++ b/test_runner/performance/test_perf_ingest_using_pgcopydb.py @@ -0,0 +1,267 @@ +import os +import re +import subprocess +import sys +import textwrap +from pathlib import Path +from typing import cast +from urllib.parse import urlparse + +import pytest +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.utils import humantime_to_ms + + +def setup_environment(): + """Set up necessary environment variables for pgcopydb execution. + + Expects the following variables to be set in the environment: + - PG_CONFIG: e.g. /tmp/neon/pg_install/v16/bin/pg_config + - PSQL: e.g. /tmp/neon/pg_install/v16/bin/psql + - PG_16_LIB_PATH: e.g. /tmp/neon/pg_install/v16/lib + - PGCOPYDB: e.g. /pgcopydb/bin/pgcopydb + - PGCOPYDB_LIB_PATH: e.g. /pgcopydb/lib + - BENCHMARK_INGEST_SOURCE_CONNSTR + - BENCHMARK_INGEST_TARGET_CONNSTR + - PERF_TEST_RESULT_CONNSTR + - TARGET_PROJECT_TYPE + + """ + # Ensure required environment variables are set + required_env_vars = [ + "PGCOPYDB", + "PGCOPYDB_LIB_PATH", + "PG_CONFIG", + "PSQL", + "PG_16_LIB_PATH", + "BENCHMARK_INGEST_SOURCE_CONNSTR", + "BENCHMARK_INGEST_TARGET_CONNSTR", + "PERF_TEST_RESULT_CONNSTR", + "TARGET_PROJECT_TYPE", + ] + for var in required_env_vars: + if not os.getenv(var): + raise OSError(f"Required environment variable '{var}' is not set.") + + +def build_pgcopydb_command(pgcopydb_filter_file: Path, test_output_dir: Path): + """Builds the pgcopydb command to execute using existing environment variables.""" + pgcopydb_executable = os.getenv("PGCOPYDB") + if not pgcopydb_executable: + raise OSError("PGCOPYDB environment variable is not set.") + + return [ + pgcopydb_executable, + "clone", + "--dir", + str(test_output_dir), + "--skip-vacuum", + "--no-owner", + "--no-acl", + "--skip-db-properties", + "--table-jobs", + "4", + "--index-jobs", + "4", + "--restore-jobs", + "4", + "--split-tables-larger-than", + "10GB", + "--skip-extensions", + "--use-copy-binary", + "--filters", + str(pgcopydb_filter_file), + ] + + +@pytest.fixture() # must be function scoped because test_output_dir is function scoped +def pgcopydb_filter_file(test_output_dir: Path) -> Path: + """Creates the pgcopydb_filter.txt file required by pgcopydb.""" + filter_content = textwrap.dedent("""\ + [include-only-table] + public.events + public.emails + public.email_transmissions + public.payments + public.editions + public.edition_modules + public.sp_content + public.email_broadcasts + public.user_collections + public.devices + public.user_accounts + public.lessons + public.lesson_users + public.payment_methods + public.orders + public.course_emails + public.modules + public.users + public.module_users + public.courses + public.payment_gateway_keys + public.accounts + public.roles + public.payment_gateways + public.management + public.event_names + """) + filter_path = test_output_dir / "pgcopydb_filter.txt" + filter_path.write_text(filter_content) + return filter_path + + +def get_backpressure_time(connstr): + """Executes a query to get the backpressure throttling time in seconds.""" + query = "select backpressure_throttling_time()/1000000;" + psql_path = os.getenv("PSQL") + if psql_path is None: + raise OSError("The PSQL environment variable is not set.") + result = subprocess.run( + [psql_path, connstr, "-t", "-c", query], capture_output=True, text=True, check=True + ) + return float(result.stdout.strip()) + + +def run_command_and_log_output(command, log_file_path: Path): + """ + Runs a command and logs output to both a file and GitHub Actions console. + + Args: + command (list): The command to execute. + log_file_path (Path): Path object for the log file where output is written. + """ + # Define a list of necessary environment variables for pgcopydb + custom_env_vars = { + "LD_LIBRARY_PATH": f"{os.getenv('PGCOPYDB_LIB_PATH')}:{os.getenv('PG_16_LIB_PATH')}", + "PGCOPYDB_SOURCE_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_SOURCE_CONNSTR")), + "PGCOPYDB_TARGET_PGURI": cast(str, os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")), + "PGOPTIONS": "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7", + } + # Combine the current environment with custom variables + env = os.environ.copy() + env.update(custom_env_vars) + + with log_file_path.open("w") as log_file: + process = subprocess.Popen( + command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, env=env + ) + + assert process.stdout is not None, "process.stdout should not be None" + + # Stream output to both log file and console + for line in process.stdout: + print(line, end="") # Stream to GitHub Actions log + sys.stdout.flush() + log_file.write(line) # Write to log file + + process.wait() # Wait for the process to finish + if process.returncode != 0: + raise subprocess.CalledProcessError(process.returncode, command) + + +def parse_log_and_report_metrics( + zenbenchmark: NeonBenchmarker, log_file_path: Path, backpressure_time_diff: float +): + """Parses the pgcopydb log file for performance metrics and reports them to the database.""" + metrics = {"backpressure_time": backpressure_time_diff} + + # Define regex patterns to capture metrics + metric_patterns = { + "COPY_INDEX_CONSTRAINTS_VACUUM": re.compile( + r"COPY, INDEX, CONSTRAINTS, VACUUM \(wall clock\).*" + ), + "COPY_CUMULATIVE": re.compile(r"COPY \(cumulative\).*"), + "CREATE_INDEX_CUMULATIVE": re.compile(r"CREATE INDEX \(cumulative\).*"), + "CONSTRAINTS_CUMULATIVE": re.compile(r"CONSTRAINTS \(cumulative\).*"), + "FINALIZE_SCHEMA": re.compile(r"Finalize Schema.*"), + "TOTAL_DURATION": re.compile(r"Total Wall Clock Duration.*"), + } + + # Parse log file + with log_file_path.open("r") as log_file: + for line in log_file: + for metric_name, pattern in metric_patterns.items(): + if pattern.search(line): + # Extract duration and convert it to seconds + duration_match = re.search(r"\d+h\d+m|\d+s|\d+ms|\d+\.\d+s", line) + if duration_match: + duration_str = duration_match.group(0) + parts = re.findall(r"\d+[a-zA-Z]+", duration_str) + rust_like_humantime = " ".join(parts) + duration_seconds = humantime_to_ms(rust_like_humantime) / 1000.0 + metrics[metric_name] = duration_seconds + + endpoint_id = {"endpoint_id": get_endpoint_id()} + for metric_name, duration_seconds in metrics.items(): + zenbenchmark.record( + metric_name, duration_seconds, "s", MetricReport.LOWER_IS_BETTER, endpoint_id + ) + + +def get_endpoint_id(): + """Extracts and returns the first segment of the hostname from the PostgreSQL URI stored in BENCHMARK_INGEST_TARGET_CONNSTR.""" + connstr = os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR") + if connstr is None: + raise OSError("BENCHMARK_INGEST_TARGET_CONNSTR environment variable is not set.") + + # Parse the URI + parsed_url = urlparse(connstr) + + # Extract the hostname and split to get the first segment + hostname = parsed_url.hostname + if hostname is None: + raise ValueError("Unable to parse hostname from BENCHMARK_INGEST_TARGET_CONNSTR") + + # Split the hostname by dots and take the first segment + endpoint_id = hostname.split(".")[0] + + return endpoint_id + + +@pytest.fixture() # must be function scoped because test_output_dir is function scoped +def log_file_path(test_output_dir): + """Fixture to provide a temporary log file path.""" + if not os.getenv("TARGET_PROJECT_TYPE"): + raise OSError("Required environment variable 'TARGET_PROJECT_TYPE' is not set.") + return (test_output_dir / os.getenv("TARGET_PROJECT_TYPE")).with_suffix(".log") + + +@pytest.mark.remote_cluster +def test_ingest_performance_using_pgcopydb( + zenbenchmark: NeonBenchmarker, + log_file_path: Path, + pgcopydb_filter_file: Path, + test_output_dir: Path, +): + """ + Simulate project migration from another PostgreSQL provider to Neon. + + Measure performance for Neon ingest steps + - COPY + - CREATE INDEX + - CREATE CONSTRAINT + - VACUUM ANALYZE + - create foreign keys + + Use pgcopydb to copy data from the source database to the destination database. + """ + # Set up environment and create filter file + setup_environment() + + # Get backpressure time before ingest + backpressure_time_before = get_backpressure_time(os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")) + + # Build and run the pgcopydb command + command = build_pgcopydb_command(pgcopydb_filter_file, test_output_dir) + try: + run_command_and_log_output(command, log_file_path) + except subprocess.CalledProcessError as e: + pytest.fail(f"pgcopydb command failed with error: {e}") + + # Get backpressure time after ingest and calculate the difference + backpressure_time_after = get_backpressure_time(os.getenv("BENCHMARK_INGEST_TARGET_CONNSTR")) + backpressure_time_diff = backpressure_time_after - backpressure_time_before + + # Parse log file and report metrics, including backpressure time difference + parse_log_and_report_metrics(zenbenchmark, log_file_path, backpressure_time_diff) diff --git a/test_runner/performance/test_storage_controller_scale.py b/test_runner/performance/test_storage_controller_scale.py index d2eba751f8..dc051483f8 100644 --- a/test_runner/performance/test_storage_controller_scale.py +++ b/test_runner/performance/test_storage_controller_scale.py @@ -16,7 +16,7 @@ from fixtures.neon_fixtures import ( PageserverAvailability, PageserverSchedulingPolicy, ) -from fixtures.pageserver.http import PageserverHttpClient +from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient from fixtures.pg_version import PgVersion @@ -273,7 +273,17 @@ def test_storage_controller_many_tenants( archival_state = rng.choice( [TimelineArchivalState.ARCHIVED, TimelineArchivalState.UNARCHIVED] ) - virtual_ps_http.timeline_archival_config(tenant_id, timeline_id, archival_state) + try: + virtual_ps_http.timeline_archival_config(tenant_id, timeline_id, archival_state) + except PageserverApiException as e: + if e.status_code == 404: + # FIXME: there is an edge case where timeline ops can encounter a 404 during + # a very short time window between generating a new generation number and + # attaching this tenant to its new pageserver. + # See https://github.com/neondatabase/neon/issues/9471 + pass + else: + raise # Generate a mixture of operations and dispatch them all concurrently futs = [] diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index 370df3c379..48950a5a50 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -116,16 +116,64 @@ page_cache_size=10 assert vectored_average < 8 +def test_pageserver_gc_compaction_smoke(neon_env_builder: NeonEnvBuilder): + env = neon_env_builder.init_start(initial_tenant_conf=AGGRESIVE_COMPACTION_TENANT_CONF) + + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + + row_count = 1000 + churn_rounds = 10 + + ps_http = env.pageserver.http_client() + + workload = Workload(env, tenant_id, timeline_id) + workload.init(env.pageserver.id) + + log.info("Writing initial data ...") + workload.write_rows(row_count, env.pageserver.id) + + for i in range(1, churn_rounds + 1): + if i % 10 == 0: + log.info(f"Running churn round {i}/{churn_rounds} ...") + + workload.churn_rows(row_count, env.pageserver.id) + # Force L0 compaction to ensure the number of layers is within bounds, so that gc-compaction can run. + ps_http.timeline_compact(tenant_id, timeline_id, force_l0_compaction=True) + assert ps_http.perf_info(tenant_id, timeline_id)[0]["num_of_l0"] <= 1 + ps_http.timeline_compact( + tenant_id, + timeline_id, + enhanced_gc_bottom_most_compaction=True, + body={ + "start": "000000000000000000000000000000000000", + "end": "030000000000000000000000000000000000", + }, + ) + + log.info("Validating at workload end ...") + workload.validate(env.pageserver.id) + + # Stripe sizes in number of pages. TINY_STRIPES = 16 LARGE_STRIPES = 32768 @pytest.mark.parametrize( - "shard_count,stripe_size", [(None, None), (4, TINY_STRIPES), (4, LARGE_STRIPES)] + "shard_count,stripe_size,gc_compaction", + [ + (None, None, False), + (4, TINY_STRIPES, False), + (4, LARGE_STRIPES, False), + (4, LARGE_STRIPES, True), + ], ) def test_sharding_compaction( - neon_env_builder: NeonEnvBuilder, stripe_size: int, shard_count: Optional[int] + neon_env_builder: NeonEnvBuilder, + stripe_size: int, + shard_count: Optional[int], + gc_compaction: bool, ): """ Use small stripes, small layers, and small compaction thresholds to exercise how compaction @@ -217,6 +265,17 @@ def test_sharding_compaction( # Assert that everything is still readable workload.validate() + if gc_compaction: + # trigger gc compaction to get more coverage for that, piggyback on the existing workload + for shard in env.storage_controller.locate(tenant_id): + pageserver = env.get_pageserver(shard["node_id"]) + tenant_shard_id = shard["shard_id"] + pageserver.http_client().timeline_compact( + tenant_shard_id, + timeline_id, + enhanced_gc_bottom_most_compaction=True, + ) + class CompactionAlgorithm(str, enum.Enum): LEGACY = "legacy" diff --git a/test_runner/regress/test_ondemand_wal_download.py b/test_runner/regress/test_ondemand_wal_download.py new file mode 100644 index 0000000000..a7eb3e6625 --- /dev/null +++ b/test_runner/regress/test_ondemand_wal_download.py @@ -0,0 +1,27 @@ +from fixtures.neon_fixtures import NeonEnv + + +def test_on_demand_wal_download(neon_simple_env: NeonEnv): + env = neon_simple_env + ep = env.endpoints.create_start( + branch_name="main", + endpoint_id="primary", + config_lines=[ + "max_wal_size=32MB", + "min_wal_size=32MB", + "neon.logical_replication_max_snap_files=10000", + ], + ) + + con = ep.connect() + cur = con.cursor() + cur.execute("CREATE TABLE t(pk bigint primary key, payload text)") + cur.execute("ALTER TABLE t ALTER payload SET STORAGE external") + cur.execute("select pg_create_logical_replication_slot('myslot', 'test_decoding', false, true)") + cur.execute("insert into t values (generate_series(1,100000),repeat('?',10000))") + + ep.stop("fast") + ep.start() + con = ep.connect() + cur = con.cursor() + cur.execute("select pg_replication_slot_advance('myslot', pg_current_wal_insert_lsn())") diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py index 5a499ea98b..158c3fddb0 100644 --- a/test_runner/regress/test_tenants.py +++ b/test_runner/regress/test_tenants.py @@ -369,12 +369,16 @@ def test_create_churn_during_restart(neon_env_builder: NeonEnvBuilder): - Bad response codes during shutdown (e.g. returning 500 instead of 503) - Issues where a tenant is still starting up while we receive a request for it - Issues with interrupting/resuming tenant/timeline creation in shutdown + - Issues with a timeline is not created successfully because of restart. """ env = neon_env_builder.init_configs() env.start() tenant_id: TenantId = env.initial_tenant timeline_id = env.initial_timeline + # At this point, the initial tenant/timeline might not have been created successfully, + # and this is the case we want to test. + # Multiple creation requests which race will generate this error on the pageserver # and storage controller respectively env.pageserver.allowed_errors.append(".*Conflict: Tenant is already being modified.*") diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index c447535e10..0650f12cd1 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -15,13 +15,19 @@ from fixtures.neon_fixtures import ( last_flush_lsn_upload, ) from fixtures.pageserver.http import PageserverApiException -from fixtures.pageserver.utils import assert_prefix_empty, assert_prefix_not_empty, list_prefix +from fixtures.pageserver.utils import ( + assert_prefix_empty, + assert_prefix_not_empty, + list_prefix, + wait_until_tenant_active, +) from fixtures.pg_version import PgVersion from fixtures.remote_storage import S3Storage, s3_storage -from fixtures.utils import run_only_on_default_postgres, wait_until +from fixtures.utils import run_only_on_default_postgres, skip_in_debug_build, wait_until from mypy_boto3_s3.type_defs import ( ObjectTypeDef, ) +from psycopg2.errors import IoError, UndefinedTable @pytest.mark.parametrize("shard_count", [0, 4]) @@ -384,6 +390,7 @@ def test_timeline_offload_persist(neon_env_builder: NeonEnvBuilder, delete_timel @run_only_on_default_postgres("this test isn't sensitive to the contents of timelines") +@skip_in_debug_build("times out in debug builds") def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): """ A general consistency check on archival/offload timeline state, and its intersection @@ -406,7 +413,13 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): tenant_shard_id = TenantShardId(tenant_id, 0, 0) # Unavailable pageservers during timeline CRUD operations can be logged as errors on the storage controller - env.storage_controller.allowed_errors.append(".*error sending request.*") + env.storage_controller.allowed_errors.extend( + [ + ".*error sending request.*", + # FIXME: the pageserver should not return 500s on cancellation (https://github.com/neondatabase/neon/issues/97680) + ".*InternalServerError\\(Error deleting timeline .* on .* on .*: pageserver API: error: Cancelled", + ] + ) for ps in env.pageservers: # We will do unclean restarts, which results in these messages when cleaning up files @@ -415,10 +428,10 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): ".*removing local file.*because it has unexpected length.*", ".*__temp.*", # FIXME: there are still anyhow::Error paths in timeline creation/deletion which - # generate 500 results when called during shutdown + # generate 500 results when called during shutdown (https://github.com/neondatabase/neon/issues/9768) ".*InternalServerError.*", # FIXME: there are still anyhow::Error paths in timeline deletion that generate - # log lines at error severity + # log lines at error severity (https://github.com/neondatabase/neon/issues/9768) ".*delete_timeline.*Error", ] ) @@ -635,8 +648,21 @@ def test_timeline_archival_chaos(neon_env_builder: NeonEnvBuilder): assert violations == [] -@pytest.mark.parametrize("offload_child", ["offload", "offload-corrupt", "archive", None]) -def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Optional[str]): +@pytest.mark.parametrize("with_intermediary", [False, True]) +@pytest.mark.parametrize( + "offload_child", + [ + "offload", + "offload-corrupt", + "offload-no-restart", + "offload-parent", + "archive", + None, + ], +) +def test_timeline_retain_lsn( + neon_env_builder: NeonEnvBuilder, with_intermediary: bool, offload_child: Optional[str] +): """ Ensure that retain_lsn functionality for timelines works, both for offloaded and non-offloaded ones """ @@ -644,6 +670,7 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op # Our corruption code only works with S3 compatible storage neon_env_builder.enable_pageserver_remote_storage(s3_storage()) + neon_env_builder.rust_log_override = "info,[gc_timeline]=debug" env = neon_env_builder.init_start() ps_http = env.pageserver.http_client() @@ -651,22 +678,30 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op tenant_id, root_timeline_id = env.create_tenant( conf={ # small checkpointing and compaction targets to ensure we generate many upload operations - "checkpoint_distance": 128 * 1024, + "checkpoint_distance": 32 * 1024, "compaction_threshold": 1, - "compaction_target_size": 128 * 1024, + "compaction_target_size": 32 * 1024, # set small image creation thresholds so that gc deletes data - "image_creation_threshold": 2, + "image_creation_threshold": 1, # disable background compaction and GC. We invoke it manually when we want it to happen. "gc_period": "0s", "compaction_period": "0s", # Disable pitr, we only want the latest lsn "pitr_interval": "0s", + "gc_horizon": 0, # Don't rely on endpoint lsn leases "lsn_lease_length": "0s", } ) - with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: + if with_intermediary: + parent_branch_name = "test_archived_parent" + parent_timeline_id = env.create_branch("test_archived_parent", tenant_id) + else: + parent_branch_name = "main" + parent_timeline_id = root_timeline_id + + with env.endpoints.create_start(parent_branch_name, tenant_id=tenant_id) as endpoint: endpoint.safe_psql_many( [ "CREATE TABLE foo(v int, key serial primary key, t text default 'data_content')", @@ -676,14 +711,16 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op ) pre_branch_sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200") log.info(f"Pre branch sum: {pre_branch_sum}") - last_flush_lsn_upload(env, endpoint, tenant_id, root_timeline_id) + last_flush_lsn_upload(env, endpoint, tenant_id, parent_timeline_id) # Create a branch and write some additional data to the parent - child_timeline_id = env.create_branch("test_archived_branch", tenant_id) + child_timeline_id = env.create_branch( + "test_archived_branch", tenant_id, ancestor_branch_name=parent_branch_name + ) - with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: - # Do some churn of the data. This is important so that we can overwrite image layers. - for i in range(10): + with env.endpoints.create_start(parent_branch_name, tenant_id=tenant_id) as endpoint: + # Do some overwriting churn with compactions in between. This is important so that we can overwrite image layers. + for i in range(5): endpoint.safe_psql_many( [ f"SELECT setseed(0.23{i})", @@ -692,9 +729,9 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op "UPDATE foo SET v=(random() * 409600)::int WHERE v % 3 = 0", ] ) + last_flush_lsn_upload(env, endpoint, tenant_id, parent_timeline_id) post_branch_sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200") log.info(f"Post branch sum: {post_branch_sum}") - last_flush_lsn_upload(env, endpoint, tenant_id, root_timeline_id) if offload_child is not None: ps_http.timeline_archival_config( @@ -709,9 +746,19 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op assert leaf_detail["is_archived"] is True if "offload" in offload_child: ps_http.timeline_offload(tenant_id, child_timeline_id) + if "offload-parent" in offload_child: + # Also offload the parent to ensure the retain_lsn of the child + # is entered in the parent at unoffloading + ps_http.timeline_archival_config( + tenant_id, + parent_timeline_id, + state=TimelineArchivalState.ARCHIVED, + ) + ps_http.timeline_offload(tenant_id, parent_timeline_id) # Do a restart to get rid of any in-memory objects (we only init gc info once, at attach) - env.pageserver.stop() + if offload_child is None or "no-restart" not in offload_child: + env.pageserver.stop() if offload_child == "offload-corrupt": assert isinstance(env.pageserver_remote_storage, S3Storage) listing = list_prefix( @@ -746,13 +793,21 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op ".*page_service_conn_main.*could not find data for key.*", ] ) - env.pageserver.start() + if offload_child is None or "no-restart" not in offload_child: + env.pageserver.start() + if offload_child == "offload-parent": + wait_until_tenant_active(ps_http, tenant_id=tenant_id) + ps_http.timeline_archival_config( + tenant_id, + parent_timeline_id, + state=TimelineArchivalState.UNARCHIVED, + ) # Do an agressive gc and compaction of the parent branch - ps_http.timeline_gc(tenant_id=tenant_id, timeline_id=root_timeline_id, gc_horizon=0) + ps_http.timeline_gc(tenant_id=tenant_id, timeline_id=parent_timeline_id, gc_horizon=0) ps_http.timeline_checkpoint( tenant_id, - root_timeline_id, + parent_timeline_id, force_l0_compaction=True, force_repartition=True, wait_until_uploaded=True, @@ -768,10 +823,15 @@ def test_timeline_retain_lsn(neon_env_builder: NeonEnvBuilder, offload_child: Op # Now, after unarchival, the child timeline should still have its data accessible (or corrupted) if offload_child == "offload-corrupt": - with pytest.raises(RuntimeError, match=".*failed to get basebackup.*"): - env.endpoints.create_start( + if with_intermediary: + error_regex = "(.*could not read .* from page server.*|.*relation .* does not exist)" + else: + error_regex = ".*failed to get basebackup.*" + with pytest.raises((RuntimeError, IoError, UndefinedTable), match=error_regex): + with env.endpoints.create_start( "test_archived_branch", tenant_id=tenant_id, basebackup_request_tries=1 - ) + ) as endpoint: + endpoint.safe_psql("SELECT sum(key) from foo where v < 51200") else: with env.endpoints.create_start("test_archived_branch", tenant_id=tenant_id) as endpoint: sum = endpoint.safe_psql("SELECT sum(key) from foo where v < 51200") diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 0676b3dd9a..6eaaa3c37f 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -1784,6 +1784,89 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): cur.execute("INSERT INTO t (key) VALUES (123)") +def test_delete_timeline_under_load(neon_env_builder: NeonEnvBuilder): + """ + Test deleting timelines on a safekeeper while they're under load. + + This should not happen under normal operation, but it can happen if + there is some rogue compute/pageserver that is writing/reading to a + safekeeper that we're migrating a timeline away from, or if the timeline + is being deleted while such a rogue client is running. + """ + neon_env_builder.auth_enabled = True + env = neon_env_builder.init_start() + + # Create two endpoints that will generate load + timeline_id_a = env.create_branch("deleteme_a") + timeline_id_b = env.create_branch("deleteme_b") + + endpoint_a = env.endpoints.create("deleteme_a") + endpoint_a.start() + endpoint_b = env.endpoints.create("deleteme_b") + endpoint_b.start() + + # Get tenant and timeline IDs + tenant_id = env.initial_tenant + + # Start generating load on both timelines + def generate_load(endpoint: Endpoint): + with closing(endpoint.connect()) as conn: + with conn.cursor() as cur: + cur.execute("CREATE TABLE IF NOT EXISTS t(key int, value text)") + while True: + try: + cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'data'") + except: # noqa + # Ignore errors since timeline may be deleted + break + + t_a = threading.Thread(target=generate_load, args=(endpoint_a,)) + t_b = threading.Thread(target=generate_load, args=(endpoint_b,)) + try: + t_a.start() + t_b.start() + + # Let the load run for a bit + log.info("Warming up...") + time.sleep(2) + + # Safekeeper errors will propagate to the pageserver: it is correct that these are + # logged at error severity because they indicate the pageserver is trying to read + # a timeline that it shouldn't. + env.pageserver.allowed_errors.extend( + [ + ".*Timeline.*was cancelled.*", + ".*Timeline.*was not found.*", + ] + ) + + # Try deleting timelines while under load + sk = env.safekeepers[0] + sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id)) + + # Delete first timeline + log.info(f"Deleting {timeline_id_a}...") + assert sk_http.timeline_delete(tenant_id, timeline_id_a, only_local=True)["dir_existed"] + + # Delete second timeline + log.info(f"Deleting {timeline_id_b}...") + assert sk_http.timeline_delete(tenant_id, timeline_id_b, only_local=True)["dir_existed"] + + # Verify timelines are gone from disk + sk_data_dir = sk.data_dir + assert not (sk_data_dir / str(tenant_id) / str(timeline_id_a)).exists() + # assert not (sk_data_dir / str(tenant_id) / str(timeline_id_b)).exists() + + finally: + log.info("Stopping endpoints...") + # Stop endpoints with immediate mode because we deleted the timeline out from under the compute, which may cause it to hang + endpoint_a.stop(mode="immediate") + endpoint_b.stop(mode="immediate") + log.info("Joining threads...") + t_a.join() + t_b.join() + + # Basic pull_timeline test. # When live_sk_change is False, compute is restarted to change set of # safekeepers; otherwise it is live reload. diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index c5e0d642ef..aeecd27b1f 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit c5e0d642efb02e4bfedc283b0a7707fe6c79cc89 +Subproject commit aeecd27b1f0775b606409d1cbb9c8aa9853a82af diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 1feff6b60f..544620db4c 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 1feff6b60f07cb71b665d0f5ead71a4320a71743 +Subproject commit 544620db4ca6945be4f1f686a7fbd2cdfb0bf96f diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index b0b693ea29..3cc152ae2d 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit b0b693ea298454e95e6b154780d1fd586a244dfd +Subproject commit 3cc152ae2d17b19679c7102486bdb94677705c02 diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index aa2e29f2b6..e5d795a1a0 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit aa2e29f2b6952140dfe51876bbd11054acae776f +Subproject commit e5d795a1a0c25da907176d37c905badab70e00c0 diff --git a/vendor/revisions.json b/vendor/revisions.json index a1f2bc5dd1..a13ef29e45 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,18 +1,18 @@ { "v17": [ - "17.1", - "aa2e29f2b6952140dfe51876bbd11054acae776f" + "17.2", + "e5d795a1a0c25da907176d37c905badab70e00c0" ], "v16": [ - "16.5", - "b0b693ea298454e95e6b154780d1fd586a244dfd" + "16.6", + "3cc152ae2d17b19679c7102486bdb94677705c02" ], "v15": [ - "15.9", - "1feff6b60f07cb71b665d0f5ead71a4320a71743" + "15.10", + "544620db4ca6945be4f1f686a7fbd2cdfb0bf96f" ], "v14": [ - "14.14", - "c5e0d642efb02e4bfedc283b0a7707fe6c79cc89" + "14.15", + "aeecd27b1f0775b606409d1cbb9c8aa9853a82af" ] } diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index d6773987ea..53d3a7364b 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -75,7 +75,7 @@ smallvec = { version = "1", default-features = false, features = ["const_new", " spki = { version = "0.7", default-features = false, features = ["pem", "std"] } subtle = { version = "2" } sync_wrapper = { version = "0.1", default-features = false, features = ["futures"] } -tikv-jemalloc-sys = { version = "0.5" } +tikv-jemalloc-sys = { version = "0.6", features = ["stats"] } time = { version = "0.3", features = ["macros", "serde-well-known"] } tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] } tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon", features = ["with-serde_json-1"] }