mirror of
https://github.com/neondatabase/neon.git
synced 2026-03-03 16:30:38 +00:00
Compare commits
113 Commits
conrad/pro
...
jcsp/contr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e09b33b49b | ||
|
|
b3cd883f93 | ||
|
|
38c7a2abfc | ||
|
|
f94248a594 | ||
|
|
9c53b41245 | ||
|
|
197a89ab3d | ||
|
|
b89e02f3e8 | ||
|
|
04517c6ff3 | ||
|
|
628451d68e | ||
|
|
502d512fe2 | ||
|
|
afda6d4700 | ||
|
|
65042cbadd | ||
|
|
b135194090 | ||
|
|
43dc03459d | ||
|
|
a1b0558493 | ||
|
|
cc138b56f9 | ||
|
|
61fcf64c22 | ||
|
|
6d3e8096fc | ||
|
|
3d1c3a80ae | ||
|
|
835287ba3a | ||
|
|
d63602cc78 | ||
|
|
1668d39b7c | ||
|
|
1d12efc428 | ||
|
|
85696297c5 | ||
|
|
aaf980f70d | ||
|
|
c52514ab02 | ||
|
|
2ee6bc5ec4 | ||
|
|
fd230227f2 | ||
|
|
93e958341f | ||
|
|
7dddbb9570 | ||
|
|
a55853f67f | ||
|
|
007b13b79a | ||
|
|
2dfd3cab8c | ||
|
|
b5833ef259 | ||
|
|
b0e43c2f88 | ||
|
|
e226d7a3d1 | ||
|
|
aa7ab9b3ac | ||
|
|
28ccda0a63 | ||
|
|
59b7ff8988 | ||
|
|
2e4c9c5704 | ||
|
|
3d30a7a934 | ||
|
|
6565fd4056 | ||
|
|
c5e3314c6e | ||
|
|
1ed0e52bc8 | ||
|
|
24d6587914 | ||
|
|
ebcbc1a482 | ||
|
|
117c1b5dde | ||
|
|
f3ecd5d76a | ||
|
|
cf161e1556 | ||
|
|
2521eba674 | ||
|
|
d56fea680e | ||
|
|
7ee5dca752 | ||
|
|
07d1db54b3 | ||
|
|
eeabecd89f | ||
|
|
fcff752851 | ||
|
|
2c91062828 | ||
|
|
ce8eb089f3 | ||
|
|
7dc382601c | ||
|
|
2451969d5c | ||
|
|
59ef701925 | ||
|
|
ac04bad457 | ||
|
|
2f3f98a319 | ||
|
|
5ff4b991c7 | ||
|
|
a93e3d31cc | ||
|
|
6d5687521b | ||
|
|
53721266f1 | ||
|
|
2f3433876f | ||
|
|
58d45c6e86 | ||
|
|
e502e880b5 | ||
|
|
c9a773af37 | ||
|
|
ec0ce06c16 | ||
|
|
0bd8eca9ca | ||
|
|
739f627b96 | ||
|
|
342cbea255 | ||
|
|
b391b29bdc | ||
|
|
5126ebbfed | ||
|
|
7fa986bc92 | ||
|
|
e8395807a5 | ||
|
|
a3e80448e8 | ||
|
|
ef233e91ef | ||
|
|
dee2041cd3 | ||
|
|
e4bb1ca7d8 | ||
|
|
b987648e71 | ||
|
|
c79c1dd8e9 | ||
|
|
a53db73851 | ||
|
|
9ae980bf4f | ||
|
|
665369c439 | ||
|
|
d7aeca2f34 | ||
|
|
38415a9816 | ||
|
|
597125e124 | ||
|
|
e71d20d392 | ||
|
|
aa0554fd1e | ||
|
|
b853f78136 | ||
|
|
6ad99826c1 | ||
|
|
311ee793b9 | ||
|
|
ad472bd4a1 | ||
|
|
c51db1db61 | ||
|
|
34c1295594 | ||
|
|
b593e51eae | ||
|
|
4c4cb80186 | ||
|
|
92273b6d5e | ||
|
|
e74e7aac93 | ||
|
|
4cca5cdb12 | ||
|
|
9d425b54f7 | ||
|
|
ec790870d5 | ||
|
|
4d7111f240 | ||
|
|
b1fd086c0c | ||
|
|
b6eea65597 | ||
|
|
c42c28b339 | ||
|
|
e4837b0a5a | ||
|
|
14c4fae64a | ||
|
|
cc70fc802d | ||
|
|
fa07097f2f |
4
.github/actionlint.yml
vendored
4
.github/actionlint.yml
vendored
@@ -21,3 +21,7 @@ config-variables:
|
||||
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
|
||||
- DEV_AWS_OIDC_ROLE_ARN
|
||||
- BENCHMARK_INGEST_TARGET_PROJECTID
|
||||
- PGREGRESS_PG16_PROJECT_ID
|
||||
- PGREGRESS_PG17_PROJECT_ID
|
||||
- SLACK_ON_CALL_QA_STAGING_STREAM
|
||||
- DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN
|
||||
|
||||
@@ -7,10 +7,9 @@ inputs:
|
||||
type: boolean
|
||||
required: false
|
||||
default: false
|
||||
aws_oicd_role_arn:
|
||||
description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
|
||||
required: false
|
||||
default: ''
|
||||
aws-oicd-role-arn:
|
||||
description: 'OIDC role arn to interract with S3'
|
||||
required: true
|
||||
|
||||
outputs:
|
||||
base-url:
|
||||
@@ -84,12 +83,11 @@ runs:
|
||||
ALLURE_VERSION: 2.27.0
|
||||
ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777
|
||||
|
||||
- name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
|
||||
if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws_oicd_role_arn }}
|
||||
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
|
||||
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
|
||||
|
||||
# Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
|
||||
|
||||
14
.github/actions/allure-report-store/action.yml
vendored
14
.github/actions/allure-report-store/action.yml
vendored
@@ -8,10 +8,9 @@ inputs:
|
||||
unique-key:
|
||||
description: 'string to distinguish different results in the same run'
|
||||
required: true
|
||||
aws_oicd_role_arn:
|
||||
description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
|
||||
required: false
|
||||
default: ''
|
||||
aws-oicd-role-arn:
|
||||
description: 'OIDC role arn to interract with S3'
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
@@ -36,12 +35,11 @@ runs:
|
||||
env:
|
||||
REPORT_DIR: ${{ inputs.report-dir }}
|
||||
|
||||
- name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
|
||||
if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws_oicd_role_arn }}
|
||||
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
|
||||
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
|
||||
|
||||
- name: Upload test results
|
||||
|
||||
9
.github/actions/download/action.yml
vendored
9
.github/actions/download/action.yml
vendored
@@ -15,10 +15,19 @@ inputs:
|
||||
prefix:
|
||||
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
|
||||
required: false
|
||||
aws-oicd-role-arn:
|
||||
description: 'OIDC role arn to interract with S3'
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Download artifact
|
||||
id: download-artifact
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
20
.github/actions/run-python-test-set/action.yml
vendored
20
.github/actions/run-python-test-set/action.yml
vendored
@@ -48,10 +48,9 @@ inputs:
|
||||
description: 'benchmark durations JSON'
|
||||
required: false
|
||||
default: '{}'
|
||||
aws_oicd_role_arn:
|
||||
description: 'the OIDC role arn to (re-)acquire for allure report upload - if not set call must acquire OIDC role'
|
||||
required: false
|
||||
default: ''
|
||||
aws-oicd-role-arn:
|
||||
description: 'OIDC role arn to interract with S3'
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
@@ -62,6 +61,7 @@ runs:
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
|
||||
path: /tmp/neon
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
- name: Download Neon binaries for the previous release
|
||||
if: inputs.build_type != 'remote'
|
||||
@@ -70,6 +70,7 @@ runs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
|
||||
path: /tmp/neon-previous
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
- name: Download compatibility snapshot
|
||||
if: inputs.build_type != 'remote'
|
||||
@@ -81,6 +82,7 @@ runs:
|
||||
# The lack of compatibility snapshot (for example, for the new Postgres version)
|
||||
# shouldn't fail the whole job. Only relevant test should fail.
|
||||
skip-if-does-not-exist: true
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
- name: Checkout
|
||||
if: inputs.needs_postgres_source == 'true'
|
||||
@@ -218,17 +220,19 @@ runs:
|
||||
# The lack of compatibility snapshot shouldn't fail the job
|
||||
# (for example if we didn't run the test for non build-and-test workflow)
|
||||
skip-if-does-not-exist: true
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
- name: (Re-)configure AWS credentials # necessary to upload reports to S3 after a long-running test
|
||||
if: ${{ !cancelled() && (inputs.aws_oicd_role_arn != '') }}
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws_oicd_role_arn }}
|
||||
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
|
||||
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
|
||||
|
||||
- name: Upload test results
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-store
|
||||
with:
|
||||
report-dir: /tmp/test_output/allure/results
|
||||
unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
@@ -14,9 +14,11 @@ runs:
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage
|
||||
skip-if-does-not-exist: true # skip if there's no previous coverage to download
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
- name: Upload coverage data
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
11
.github/actions/upload/action.yml
vendored
11
.github/actions/upload/action.yml
vendored
@@ -14,6 +14,10 @@ inputs:
|
||||
prefix:
|
||||
description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
|
||||
required: false
|
||||
aws-oicd-role-arn:
|
||||
description: "the OIDC role arn for aws auth"
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
@@ -53,6 +57,13 @@ runs:
|
||||
|
||||
echo 'SKIPPED=false' >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Upload artifact
|
||||
if: ${{ steps.prepare-artifact.outputs.SKIPPED == 'false' }}
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
@@ -70,6 +70,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
# we create a table that has one row for each database that we want to restore with the status whether the restore is done
|
||||
- name: Create benchmark_restore_status table if it does not exist
|
||||
|
||||
20
.github/workflows/_build-and-test-locally.yml
vendored
20
.github/workflows/_build-and-test-locally.yml
vendored
@@ -31,12 +31,13 @@ defaults:
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
|
||||
jobs:
|
||||
build-neon:
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
contents: read
|
||||
container:
|
||||
image: ${{ inputs.build-tools-image }}
|
||||
credentials:
|
||||
@@ -205,6 +206,13 @@ jobs:
|
||||
done
|
||||
fi
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Run rust tests
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
@@ -256,6 +264,7 @@ jobs:
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
|
||||
path: /tmp/neon
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
|
||||
- name: Merge and upload coverage data
|
||||
@@ -265,6 +274,10 @@ jobs:
|
||||
regress-tests:
|
||||
# Don't run regression tests on debug arm64 builds
|
||||
if: inputs.build-type != 'debug' || inputs.arch != 'arm64'
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
contents: read
|
||||
statuses: write
|
||||
needs: [ build-neon ]
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
container:
|
||||
@@ -283,7 +296,7 @@ jobs:
|
||||
submodules: true
|
||||
|
||||
- name: Pytest regression tests
|
||||
continue-on-error: ${{ matrix.lfc_state == 'with-lfc' }}
|
||||
continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
timeout-minutes: 60
|
||||
with:
|
||||
@@ -295,6 +308,7 @@ jobs:
|
||||
real_s3_region: eu-central-1
|
||||
rerun_failed: true
|
||||
pg_version: ${{ matrix.pg_version }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
|
||||
|
||||
67
.github/workflows/benchmarking.yml
vendored
67
.github/workflows/benchmarking.yml
vendored
@@ -105,6 +105,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
@@ -122,7 +123,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
# Set --sparse-ordering option of pytest-order plugin
|
||||
# to ensure tests are running in order of appears in the file.
|
||||
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
||||
@@ -152,7 +153,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -204,6 +205,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Run Logical Replication benchmarks
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
@@ -214,7 +216,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -231,7 +233,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -243,7 +245,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
@@ -306,6 +308,7 @@ jobs:
|
||||
"image": [ "'"$image_default"'" ],
|
||||
"include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned-bookworm" },
|
||||
@@ -405,9 +408,10 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
|
||||
if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
@@ -426,7 +430,7 @@ jobs:
|
||||
neonvm-captest-sharding-reuse)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
|
||||
;;
|
||||
neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
|
||||
neonvm-captest-new | neonvm-captest-new-many-tables | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
|
||||
CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
|
||||
;;
|
||||
rds-aurora)
|
||||
@@ -443,6 +447,26 @@ jobs:
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
# we want to compare Neon project OLTP throughput and latency at scale factor 10 GB
|
||||
# without (neonvm-captest-new)
|
||||
# and with (neonvm-captest-new-many-tables) many relations in the database
|
||||
- name: Create many relations before the run
|
||||
if: contains(fromJson('["neonvm-captest-new-many-tables"]'), matrix.platform)
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
TEST_NUM_RELATIONS: 10000
|
||||
|
||||
- name: Benchmark init
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
@@ -452,7 +476,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -467,7 +491,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -482,7 +506,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -500,7 +524,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -611,7 +635,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -626,7 +650,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -637,7 +661,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -708,6 +732,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
@@ -739,7 +764,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -753,7 +778,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -818,6 +843,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Get Connstring Secret Name
|
||||
run: |
|
||||
@@ -856,7 +882,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -868,7 +894,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -926,6 +952,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
@@ -957,7 +984,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -968,7 +995,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
|
||||
221
.github/workflows/build_and_test.yml
vendored
221
.github/workflows/build_and_test.yml
vendored
@@ -21,8 +21,6 @@ concurrency:
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
# A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
|
||||
E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
||||
|
||||
@@ -255,15 +253,15 @@ jobs:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds
|
||||
# run without LFC on v17 release only
|
||||
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds.
|
||||
# Run without LFC on v17 release and debug builds only. For all the other cases LFC is enabled.
|
||||
test-cfg: |
|
||||
${{ matrix.build-type == 'release' && '[{"pg_version":"v14", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v15", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v16", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "with-lfc"}]'
|
||||
|| '[{"pg_version":"v17", "lfc_state": "without-lfc"}]' }}
|
||||
${{ matrix.build-type == 'release' && '[{"pg_version":"v14", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v15", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v16", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "without-lfc"}]'
|
||||
|| '[{"pg_version":"v17", "lfc_state": "without-lfc" }]' }}
|
||||
secrets: inherit
|
||||
|
||||
# Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
|
||||
@@ -305,6 +303,11 @@ jobs:
|
||||
benchmarks:
|
||||
if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
|
||||
needs: [ check-permissions, build-and-test-locally, build-build-tools-image, get-benchmarks-durations ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
@@ -333,6 +336,7 @@ jobs:
|
||||
extra_params: --splits 5 --group ${{ matrix.pytest_split_group }}
|
||||
benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }}
|
||||
pg_version: v16
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -345,6 +349,11 @@ jobs:
|
||||
report-benchmarks-failures:
|
||||
needs: [ benchmarks, create-test-report ]
|
||||
if: github.ref_name == 'main' && failure() && needs.benchmarks.result == 'failure'
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
@@ -360,6 +369,11 @@ jobs:
|
||||
create-test-report:
|
||||
needs: [ check-permissions, build-and-test-locally, coverage-report, build-build-tools-image, benchmarks ]
|
||||
if: ${{ !cancelled() && contains(fromJSON('["skipped", "success"]'), needs.check-permissions.result) }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
outputs:
|
||||
report-url: ${{ steps.create-allure-report.outputs.report-url }}
|
||||
|
||||
@@ -380,6 +394,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
@@ -411,6 +426,10 @@ jobs:
|
||||
coverage-report:
|
||||
if: ${{ !startsWith(github.ref_name, 'release') }}
|
||||
needs: [ check-permissions, build-build-tools-image, build-and-test-locally ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
@@ -437,12 +456,14 @@ jobs:
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-artifact
|
||||
path: /tmp/neon
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Get coverage artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Merge coverage data
|
||||
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
|
||||
@@ -573,6 +594,10 @@ jobs:
|
||||
neon-image:
|
||||
needs: [ neon-image-arch, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
@@ -587,11 +612,15 @@ jobs:
|
||||
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \
|
||||
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Login to Amazon Dev ECR
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Push multi-arch image to ECR
|
||||
run: |
|
||||
@@ -600,6 +629,10 @@ jobs:
|
||||
|
||||
compute-node-image-arch:
|
||||
needs: [ check-permissions, build-build-tools-image, tag ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: read
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -640,11 +673,15 @@ jobs:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Login to Amazon Dev ECR
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
@@ -717,6 +754,10 @@ jobs:
|
||||
|
||||
compute-node-image:
|
||||
needs: [ compute-node-image-arch, tag ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: read
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
@@ -761,11 +802,15 @@ jobs:
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Login to Amazon Dev ECR
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Push multi-arch compute-node-${{ matrix.version.pg }} image to ECR
|
||||
run: |
|
||||
@@ -795,7 +840,7 @@ jobs:
|
||||
- pg: v17
|
||||
debian: bookworm
|
||||
env:
|
||||
VM_BUILDER_VERSION: v0.35.0
|
||||
VM_BUILDER_VERSION: v0.37.1
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -890,7 +935,9 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
id-token: write # for `aws-actions/configure-aws-credentials`
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: read
|
||||
|
||||
env:
|
||||
VERSIONS: v14 v15 v16 v17
|
||||
@@ -901,12 +948,15 @@ jobs:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Login to dev ECR
|
||||
uses: docker/login-action@v3
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Login to Amazon Dev ECR
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Copy vm-compute-node images to ECR
|
||||
run: |
|
||||
@@ -985,6 +1035,11 @@ jobs:
|
||||
trigger-custom-extensions-build-and-wait:
|
||||
needs: [ check-permissions, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Set PR's status to pending and request a remote CI test
|
||||
run: |
|
||||
@@ -1060,12 +1115,79 @@ jobs:
|
||||
needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
|
||||
# `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
|
||||
if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
|
||||
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
runs-on: [ self-hosted, small ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Create git tag and GitHub release
|
||||
if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
retries: 5
|
||||
script: |
|
||||
const tag = "${{ needs.tag.outputs.build-tag }}";
|
||||
|
||||
try {
|
||||
const existingRef = await github.rest.git.getRef({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
ref: `tags/${tag}`,
|
||||
});
|
||||
|
||||
if (existingRef.data.object.sha !== context.sha) {
|
||||
throw new Error(`Tag ${tag} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);
|
||||
}
|
||||
|
||||
console.log(`Tag ${tag} already exists and points to ${context.sha} as expected.`);
|
||||
} catch (error) {
|
||||
if (error.status !== 404) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
console.log(`Tag ${tag} does not exist. Creating it...`);
|
||||
await github.rest.git.createRef({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
ref: `refs/tags/${tag}`,
|
||||
sha: context.sha,
|
||||
});
|
||||
console.log(`Tag ${tag} created successfully.`);
|
||||
}
|
||||
|
||||
// TODO: check how GitHub releases looks for proxy/compute releases and enable them if they're ok
|
||||
if (context.ref !== 'refs/heads/release') {
|
||||
console.log(`GitHub release skipped for ${context.ref}.`);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const existingRelease = await github.rest.repos.getReleaseByTag({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
tag: tag,
|
||||
});
|
||||
|
||||
console.log(`Release for tag ${tag} already exists (ID: ${existingRelease.data.id}).`);
|
||||
} catch (error) {
|
||||
if (error.status !== 404) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
console.log(`Release for tag ${tag} does not exist. Creating it...`);
|
||||
await github.rest.repos.createRelease({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
tag_name: tag,
|
||||
generate_release_notes: true,
|
||||
});
|
||||
console.log(`Release for tag ${tag} created successfully.`);
|
||||
}
|
||||
|
||||
- name: Trigger deploy workflow
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
@@ -1115,38 +1237,13 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Create git tag
|
||||
if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
||||
retries: 5
|
||||
script: |
|
||||
await github.rest.git.createRef({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
ref: "refs/tags/${{ needs.tag.outputs.build-tag }}",
|
||||
sha: context.sha,
|
||||
})
|
||||
|
||||
# TODO: check how GitHub releases looks for proxy releases and enable it if it's ok
|
||||
- name: Create GitHub release
|
||||
if: github.ref_name == 'release'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
||||
retries: 5
|
||||
script: |
|
||||
await github.rest.repos.createRelease({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
tag_name: "${{ needs.tag.outputs.build-tag }}",
|
||||
generate_release_notes: true,
|
||||
})
|
||||
|
||||
# The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
|
||||
promote-compatibility-data:
|
||||
needs: [ deploy ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: read
|
||||
# `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
|
||||
if: github.ref_name == 'release' && !failure() && !cancelled()
|
||||
|
||||
@@ -1183,6 +1280,12 @@ jobs:
|
||||
echo "run-id=${run_id}" | tee -a ${GITHUB_OUTPUT}
|
||||
echo "commit-sha=${last_commit_sha}" | tee -a ${GITHUB_OUTPUT}
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Promote compatibility snapshot and Neon artifact
|
||||
env:
|
||||
BUCKET: neon-github-public-dev
|
||||
|
||||
46
.github/workflows/cloud-regress.yml
vendored
46
.github/workflows/cloud-regress.yml
vendored
@@ -19,15 +19,21 @@ concurrency:
|
||||
group: ${{ github.workflow }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
regress:
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pg-version: [16, 17]
|
||||
|
||||
runs-on: us-east-2
|
||||
container:
|
||||
@@ -40,9 +46,11 @@ jobs:
|
||||
submodules: true
|
||||
|
||||
- name: Patch the test
|
||||
env:
|
||||
PG_VERSION: ${{matrix.pg-version}}
|
||||
run: |
|
||||
cd "vendor/postgres-v${DEFAULT_PG_VERSION}"
|
||||
patch -p1 < "../../compute/patches/cloud_regress_pg${DEFAULT_PG_VERSION}.patch"
|
||||
cd "vendor/postgres-v${PG_VERSION}"
|
||||
patch -p1 < "../../compute/patches/cloud_regress_pg${PG_VERSION}.patch"
|
||||
|
||||
- name: Generate a random password
|
||||
id: pwgen
|
||||
@@ -55,8 +63,9 @@ jobs:
|
||||
- name: Change tests according to the generated password
|
||||
env:
|
||||
DBPASS: ${{ steps.pwgen.outputs.DBPASS }}
|
||||
PG_VERSION: ${{matrix.pg-version}}
|
||||
run: |
|
||||
cd vendor/postgres-v"${DEFAULT_PG_VERSION}"/src/test/regress
|
||||
cd vendor/postgres-v"${PG_VERSION}"/src/test/regress
|
||||
for fname in sql/*.sql expected/*.out; do
|
||||
sed -i.bak s/NEON_PASSWORD_PLACEHOLDER/"'${DBPASS}'"/ "${fname}"
|
||||
done
|
||||
@@ -72,27 +81,46 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create a new branch
|
||||
id: create-branch
|
||||
uses: ./.github/actions/neon-branch-create
|
||||
with:
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
project_id: ${{ vars[format('PGREGRESS_PG{0}_PROJECT_ID', matrix.pg-version)] }}
|
||||
|
||||
- name: Run the regression tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: cloud_regress
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
pg_version: ${{matrix.pg-version}}
|
||||
extra_params: -m remote_cluster
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ secrets.PG_REGRESS_CONNSTR }}
|
||||
BENCHMARK_CONNSTR: ${{steps.create-branch.outputs.dsn}}
|
||||
|
||||
- name: Delete branch
|
||||
if: always()
|
||||
uses: ./.github/actions/neon-branch-delete
|
||||
with:
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
project_id: ${{ vars[format('PGREGRESS_PG{0}_PROJECT_ID', matrix.pg-version)] }}
|
||||
branch_id: ${{steps.create-branch.outputs.branch_id}}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # on-call-staging-stream
|
||||
channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }}
|
||||
slack-message: |
|
||||
Periodic pg_regress on staging: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
|
||||
15
.github/workflows/ingest_benchmark.yml
vendored
15
.github/workflows/ingest_benchmark.yml
vendored
@@ -13,7 +13,7 @@ on:
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 9 * * *' # run once a day, timezone is utc
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
@@ -28,7 +28,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
target_project: [new_empty_project, large_existing_project]
|
||||
target_project: [new_empty_project, large_existing_project]
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
|
||||
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
@@ -64,6 +64,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
if: ${{ matrix.target_project == 'new_empty_project' }}
|
||||
@@ -94,7 +95,7 @@ jobs:
|
||||
project_id: ${{ vars.BENCHMARK_INGEST_TARGET_PROJECTID }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Initialize Neon project
|
||||
- name: Initialize Neon project
|
||||
if: ${{ matrix.target_project == 'large_existing_project' }}
|
||||
env:
|
||||
BENCHMARK_INGEST_TARGET_CONNSTR: ${{ steps.create-neon-branch-ingest-target.outputs.dsn }}
|
||||
@@ -122,7 +123,7 @@ jobs:
|
||||
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "CREATE EXTENSION IF NOT EXISTS neon; CREATE EXTENSION IF NOT EXISTS neon_utils;"
|
||||
echo "BENCHMARK_INGEST_TARGET_CONNSTR=${BENCHMARK_INGEST_TARGET_CONNSTR}" >> $GITHUB_ENV
|
||||
|
||||
- name: Invoke pgcopydb
|
||||
- name: Invoke pgcopydb
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: remote
|
||||
@@ -131,7 +132,7 @@ jobs:
|
||||
extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
|
||||
pg_version: v16
|
||||
save_perf_report: true
|
||||
aws_oicd_role_arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
|
||||
TARGET_PROJECT_TYPE: ${{ matrix.target_project }}
|
||||
@@ -143,7 +144,7 @@ jobs:
|
||||
run: |
|
||||
export LD_LIBRARY_PATH=${PG_16_LIB_PATH}
|
||||
${PSQL} "${BENCHMARK_INGEST_TARGET_CONNSTR}" -c "\dt+"
|
||||
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ always() && matrix.target_project == 'new_empty_project' }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
|
||||
13
.github/workflows/neon_extra_builds.yml
vendored
13
.github/workflows/neon_extra_builds.yml
vendored
@@ -143,6 +143,10 @@ jobs:
|
||||
|
||||
gather-rust-build-stats:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
@@ -177,13 +181,18 @@ jobs:
|
||||
- name: Produce the build stats
|
||||
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Upload the build stats
|
||||
id: upload-stats
|
||||
env:
|
||||
BUCKET: neon-github-public-dev
|
||||
SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
run: |
|
||||
REPORT_URL=https://${BUCKET}.s3.amazonaws.com/build-stats/${SHA}/${GITHUB_RUN_ID}/cargo-timing.html
|
||||
aws s3 cp --only-show-errors ./target/cargo-timings/cargo-timing.html "s3://${BUCKET}/build-stats/${SHA}/${GITHUB_RUN_ID}/"
|
||||
|
||||
27
.github/workflows/periodic_pagebench.yml
vendored
27
.github/workflows/periodic_pagebench.yml
vendored
@@ -27,6 +27,11 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
trigger_bench_on_ec2_machine_in_eu_central_1:
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: neondatabase/build-tools:pinned-bookworm
|
||||
@@ -38,8 +43,6 @@ jobs:
|
||||
env:
|
||||
API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }}
|
||||
RUN_ID: ${{ github.run_id }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_SECRET }}
|
||||
AWS_DEFAULT_REGION : "eu-central-1"
|
||||
AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
|
||||
steps:
|
||||
@@ -50,6 +53,13 @@ jobs:
|
||||
- name: Show my own (github runner) external IP address - usefull for IP allowlisting
|
||||
run: curl https://ifconfig.me
|
||||
|
||||
- name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine)
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Start EC2 instance and wait for the instance to boot up
|
||||
run: |
|
||||
aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID
|
||||
@@ -124,11 +134,10 @@ jobs:
|
||||
cat "test_log_${GITHUB_RUN_ID}"
|
||||
|
||||
- name: Create Allure report
|
||||
env:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -148,6 +157,14 @@ jobs:
|
||||
-H "Authorization: Bearer $API_KEY" \
|
||||
-d ''
|
||||
|
||||
- name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine)
|
||||
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Stop EC2 instance and wait for the instance to be stopped
|
||||
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
|
||||
run: |
|
||||
|
||||
12
.github/workflows/pg-clients.yml
vendored
12
.github/workflows/pg-clients.yml
vendored
@@ -25,11 +25,13 @@ defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write # require for posting a status update
|
||||
|
||||
env:
|
||||
DEFAULT_PG_VERSION: 16
|
||||
PLATFORM: neon-captest-new
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
AWS_DEFAULT_REGION: eu-central-1
|
||||
|
||||
jobs:
|
||||
@@ -94,6 +96,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
@@ -110,6 +113,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
extra_params: -m remote_cluster
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
|
||||
@@ -126,6 +130,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
@@ -159,6 +164,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
@@ -175,6 +181,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
extra_params: -m remote_cluster
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
|
||||
@@ -191,6 +198,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
|
||||
14
.github/workflows/pin-build-tools-image.yml
vendored
14
.github/workflows/pin-build-tools-image.yml
vendored
@@ -67,7 +67,7 @@ jobs:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
id-token: write # for `azure/login`
|
||||
id-token: write # for `azure/login` and aws auth
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
@@ -75,11 +75,15 @@ jobs:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Login to Amazon Dev ECR
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Azure login
|
||||
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1
|
||||
|
||||
1
.github/workflows/pre-merge-checks.yml
vendored
1
.github/workflows/pre-merge-checks.yml
vendored
@@ -63,6 +63,7 @@ jobs:
|
||||
if: always()
|
||||
permissions:
|
||||
statuses: write # for `github.repos.createCommitStatus(...)`
|
||||
contents: write
|
||||
needs:
|
||||
- get-changed-files
|
||||
- check-codestyle-python
|
||||
|
||||
4
.github/workflows/release.yml
vendored
4
.github/workflows/release.yml
vendored
@@ -3,7 +3,7 @@ name: Create Release Branch
|
||||
on:
|
||||
schedule:
|
||||
# It should be kept in sync with if-condition in jobs
|
||||
- cron: '0 6 * * MON' # Storage release
|
||||
- cron: '0 6 * * FRI' # Storage release
|
||||
- cron: '0 6 * * THU' # Proxy release
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
@@ -29,7 +29,7 @@ defaults:
|
||||
|
||||
jobs:
|
||||
create-storage-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * MON' || inputs.create-storage-release-branch }}
|
||||
if: ${{ github.event.schedule == '0 6 * * FRI' || inputs.create-storage-release-branch }}
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
33
CODEOWNERS
33
CODEOWNERS
@@ -1,16 +1,29 @@
|
||||
/.github/ @neondatabase/developer-productivity
|
||||
/compute_tools/ @neondatabase/control-plane @neondatabase/compute
|
||||
/libs/pageserver_api/ @neondatabase/storage
|
||||
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
|
||||
/libs/proxy/ @neondatabase/proxy
|
||||
/libs/remote_storage/ @neondatabase/storage
|
||||
/libs/safekeeper_api/ @neondatabase/storage
|
||||
# Autoscaling
|
||||
/libs/vm_monitor/ @neondatabase/autoscaling
|
||||
/pageserver/ @neondatabase/storage
|
||||
|
||||
# DevProd
|
||||
/.github/ @neondatabase/developer-productivity
|
||||
|
||||
# Compute
|
||||
/pgxn/ @neondatabase/compute
|
||||
/pgxn/neon/ @neondatabase/compute @neondatabase/storage
|
||||
/vendor/ @neondatabase/compute
|
||||
/compute/ @neondatabase/compute
|
||||
/compute_tools/ @neondatabase/compute
|
||||
|
||||
# Proxy
|
||||
/libs/proxy/ @neondatabase/proxy
|
||||
/proxy/ @neondatabase/proxy
|
||||
|
||||
# Storage
|
||||
/pageserver/ @neondatabase/storage
|
||||
/safekeeper/ @neondatabase/storage
|
||||
/storage_controller @neondatabase/storage
|
||||
/storage_scrubber @neondatabase/storage
|
||||
/vendor/ @neondatabase/compute
|
||||
/libs/pageserver_api/ @neondatabase/storage
|
||||
/libs/remote_storage/ @neondatabase/storage
|
||||
/libs/safekeeper_api/ @neondatabase/storage
|
||||
|
||||
# Shared
|
||||
/pgxn/neon/ @neondatabase/compute @neondatabase/storage
|
||||
/libs/compute_api/ @neondatabase/compute @neondatabase/control-plane
|
||||
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
|
||||
|
||||
639
Cargo.lock
generated
639
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
13
Cargo.toml
13
Cargo.toml
@@ -11,6 +11,7 @@ members = [
|
||||
"pageserver/pagebench",
|
||||
"proxy",
|
||||
"safekeeper",
|
||||
"safekeeper/client",
|
||||
"storage_broker",
|
||||
"storage_controller",
|
||||
"storage_controller/client",
|
||||
@@ -51,10 +52,7 @@ anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
arc-swap = "1.6"
|
||||
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
|
||||
atomic-take = "1.1.0"
|
||||
azure_core = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls", "hmac_rust"] }
|
||||
azure_identity = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_storage = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_storage_blobs = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
backtrace = "0.3.74"
|
||||
flate2 = "1.0.26"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
@@ -216,6 +214,12 @@ postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git",
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
|
||||
|
||||
## Azure SDK crates
|
||||
azure_core = { git = "https://github.com/neondatabase/azure-sdk-for-rust.git", branch = "neon", default-features = false, features = ["enable_reqwest_rustls", "hmac_rust"] }
|
||||
azure_identity = { git = "https://github.com/neondatabase/azure-sdk-for-rust.git", branch = "neon", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_storage = { git = "https://github.com/neondatabase/azure-sdk-for-rust.git", branch = "neon", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_storage_blobs = { git = "https://github.com/neondatabase/azure-sdk-for-rust.git", branch = "neon", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
|
||||
## Local libraries
|
||||
compute_api = { version = "0.1", path = "./libs/compute_api/" }
|
||||
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
|
||||
@@ -231,6 +235,7 @@ postgres_initdb = { path = "./libs/postgres_initdb" }
|
||||
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
|
||||
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
|
||||
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
|
||||
safekeeper_client = { path = "./safekeeper/client" }
|
||||
desim = { version = "0.1", path = "./libs/desim" }
|
||||
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
|
||||
storage_controller_client = { path = "./storage_controller/client" }
|
||||
|
||||
@@ -69,6 +69,8 @@ RUN set -e \
|
||||
libreadline-dev \
|
||||
libseccomp-dev \
|
||||
ca-certificates \
|
||||
# System postgres for use with client libraries (e.g. in storage controller)
|
||||
postgresql-15 \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
|
||||
&& useradd -d /data neon \
|
||||
&& chown -R neon:neon /data
|
||||
|
||||
2
Makefile
2
Makefile
@@ -67,8 +67,6 @@ CARGO_BUILD_FLAGS += $(filter -j1,$(MAKEFLAGS))
|
||||
CARGO_CMD_PREFIX += $(if $(filter n,$(MAKEFLAGS)),,+)
|
||||
# Force cargo not to print progress bar
|
||||
CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
|
||||
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
|
||||
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
|
||||
|
||||
CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
|
||||
|
||||
|
||||
@@ -115,7 +115,7 @@ RUN set -e \
|
||||
|
||||
# Keep the version the same as in compute/compute-node.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py.
|
||||
ENV SQL_EXPORTER_VERSION=0.13.1
|
||||
ENV SQL_EXPORTER_VERSION=0.16.0
|
||||
RUN curl -fsSL \
|
||||
"https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case "$(uname -m)" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz" \
|
||||
--output sql_exporter.tar.gz \
|
||||
|
||||
@@ -35,10 +35,12 @@ RUN case $DEBIAN_VERSION in \
|
||||
;; \
|
||||
esac && \
|
||||
apt update && \
|
||||
apt install --no-install-recommends -y git autoconf automake libtool build-essential bison flex libreadline-dev \
|
||||
apt install --no-install-recommends --no-install-suggests -y \
|
||||
ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \
|
||||
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
|
||||
libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd \
|
||||
$VERSION_INSTALLS
|
||||
$VERSION_INSTALLS \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -113,10 +115,12 @@ ARG DEBIAN_VERSION
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends -y gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
|
||||
apt install --no-install-recommends --no-install-suggests -y \
|
||||
gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
|
||||
libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \
|
||||
libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
|
||||
protobuf-c-compiler xsltproc
|
||||
protobuf-c-compiler xsltproc \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
# Postgis 3.5.0 requires SFCGAL 1.4+
|
||||
@@ -143,9 +147,9 @@ RUN case "${DEBIAN_VERSION}" in \
|
||||
wget https://gitlab.com/sfcgal/SFCGAL/-/archive/v${SFCGAL_VERSION}/SFCGAL-v${SFCGAL_VERSION}.tar.gz -O SFCGAL.tar.gz && \
|
||||
echo "${SFCGAL_CHECKSUM} SFCGAL.tar.gz" | sha256sum --check && \
|
||||
mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make clean && cp -R /sfcgal/* /
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -GNinja . && ninja -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
DESTDIR=/sfcgal ninja install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
ninja clean && cp -R /sfcgal/* /
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
@@ -213,9 +217,9 @@ RUN case "${PG_VERSION}" in \
|
||||
echo "${PGROUTING_CHECKSUM} pgrouting.tar.gz" | sha256sum --check && \
|
||||
mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
|
||||
mkdir build && cd build && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release .. && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. && \
|
||||
ninja -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
ninja -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\
|
||||
cp /usr/local/pgsql/share/extension/pgrouting.control /extensions/postgis && \
|
||||
@@ -235,7 +239,9 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY compute/patches/plv8-3.1.10.patch /plv8-3.1.10.patch
|
||||
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends -y ninja-build python3-dev libncurses5 binutils clang
|
||||
apt install --no-install-recommends --no-install-suggests -y \
|
||||
ninja-build python3-dev libncurses5 binutils clang \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# plv8 3.2.3 supports v17
|
||||
# last release v3.2.3 - Sep 7, 2024
|
||||
@@ -301,9 +307,10 @@ RUN mkdir -p /h3/usr/ && \
|
||||
echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
|
||||
mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
|
||||
mkdir build && cd build && \
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
DESTDIR=/h3 make install && \
|
||||
cmake .. -GNinja -DBUILD_BENCHMARKS=0 -DCMAKE_BUILD_TYPE=Release \
|
||||
-DBUILD_FUZZERS=0 -DBUILD_FILTERS=0 -DBUILD_GENERATORS=0 -DBUILD_TESTING=0 \
|
||||
&& ninja -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
DESTDIR=/h3 ninja install && \
|
||||
cp -R /h3/usr / && \
|
||||
rm -rf build
|
||||
|
||||
@@ -650,14 +657,15 @@ FROM build-deps AS rdkit-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y \
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y \
|
||||
libboost-iostreams1.74-dev \
|
||||
libboost-regex1.74-dev \
|
||||
libboost-serialization1.74-dev \
|
||||
libboost-system1.74-dev \
|
||||
libeigen3-dev \
|
||||
libboost-all-dev
|
||||
libboost-all-dev \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# rdkit Release_2024_09_1 supports v17
|
||||
# last release Release_2024_09_1 - Sep 27, 2024
|
||||
@@ -693,6 +701,8 @@ RUN case "${PG_VERSION}" in \
|
||||
-D RDK_BUILD_MOLINTERCHANGE_SUPPORT=OFF \
|
||||
-D RDK_BUILD_YAEHMOP_SUPPORT=OFF \
|
||||
-D RDK_BUILD_STRUCTCHECKER_SUPPORT=OFF \
|
||||
-D RDK_TEST_MULTITHREADED=OFF \
|
||||
-D RDK_BUILD_CPP_TESTS=OFF \
|
||||
-D RDK_USE_URF=OFF \
|
||||
-D RDK_BUILD_PGSQL=ON \
|
||||
-D RDK_PGSQL_STATIC=ON \
|
||||
@@ -704,9 +714,10 @@ RUN case "${PG_VERSION}" in \
|
||||
-D RDK_INSTALL_COMIC_FONTS=OFF \
|
||||
-D RDK_BUILD_FREETYPE_SUPPORT=OFF \
|
||||
-D CMAKE_BUILD_TYPE=Release \
|
||||
-GNinja \
|
||||
. && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
ninja -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
ninja -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -849,8 +860,9 @@ FROM build-deps AS rust-extensions-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y curl libclang-dev && \
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
useradd -ms /bin/bash nonroot -b /home
|
||||
|
||||
ENV HOME=/home/nonroot
|
||||
@@ -885,8 +897,9 @@ FROM build-deps AS rust-extensions-build-pgrx12
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install --no-install-recommends -y curl libclang-dev && \
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
useradd -ms /bin/bash nonroot -b /home
|
||||
|
||||
ENV HOME=/home/nonroot
|
||||
@@ -914,18 +927,22 @@ FROM rust-extensions-build-pgrx12 AS pg-onnx-build
|
||||
|
||||
# cmake 3.26 or higher is required, so installing it using pip (bullseye-backports has cmake 3.25).
|
||||
# Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise
|
||||
RUN apt-get update && apt-get install -y python3 python3-pip python3-venv && \
|
||||
RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
|
||||
python3 python3-pip python3-venv && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
python3 -m venv venv && \
|
||||
. venv/bin/activate && \
|
||||
python3 -m pip install cmake==3.30.5 && \
|
||||
wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \
|
||||
mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
|
||||
./build.sh --config Release --parallel --skip_submodule_sync --skip_tests --allow_running_as_root
|
||||
./build.sh --config Release --parallel --cmake_generator Ninja \
|
||||
--skip_submodule_sync --skip_tests --allow_running_as_root
|
||||
|
||||
|
||||
FROM pg-onnx-build AS pgrag-pg-build
|
||||
|
||||
RUN apt-get install -y protobuf-compiler && \
|
||||
RUN apt update && apt install --no-install-recommends --no-install-suggests -y protobuf-compiler \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz && \
|
||||
echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \
|
||||
mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C . && \
|
||||
@@ -1168,6 +1185,25 @@ RUN case "${PG_VERSION}" in \
|
||||
make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg_repack"
|
||||
# compile pg_repack extension
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM build-deps AS pg-repack-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
|
||||
RUN wget https://github.com/reorg/pg_repack/archive/refs/tags/ver_1.5.2.tar.gz -O pg_repack.tar.gz && \
|
||||
echo '4516cad42251ed3ad53ff619733004db47d5755acac83f75924cd94d1c4fb681 pg_repack.tar.gz' | sha256sum --check && \
|
||||
mkdir pg_repack-src && cd pg_repack-src && tar xzf ../pg_repack.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "neon-pg-ext-build"
|
||||
@@ -1213,6 +1249,7 @@ COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-ivm-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-partman-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg-repack-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY pgxn/ pgxn/
|
||||
|
||||
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
@@ -1279,8 +1316,8 @@ COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/fast_
|
||||
|
||||
FROM debian:$DEBIAN_FLAVOR AS pgbouncer
|
||||
RUN set -e \
|
||||
&& apt-get update \
|
||||
&& apt-get install --no-install-recommends -y \
|
||||
&& apt update \
|
||||
&& apt install --no-install-suggests --no-install-recommends -y \
|
||||
build-essential \
|
||||
git \
|
||||
ca-certificates \
|
||||
@@ -1288,7 +1325,8 @@ RUN set -e \
|
||||
automake \
|
||||
libevent-dev \
|
||||
libtool \
|
||||
pkg-config
|
||||
pkg-config \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
|
||||
ENV PGBOUNCER_TAG=pgbouncer_1_22_1
|
||||
@@ -1324,7 +1362,7 @@ FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter
|
||||
|
||||
# Keep the version the same as in build-tools.Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py.
|
||||
FROM burningalchemist/sql_exporter:0.13.1 AS sql-exporter
|
||||
FROM burningalchemist/sql_exporter:0.16.0 AS sql-exporter
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1518,28 +1556,30 @@ RUN apt update && \
|
||||
locales \
|
||||
procps \
|
||||
ca-certificates \
|
||||
curl \
|
||||
unzip \
|
||||
$VERSION_INSTALLS && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
|
||||
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
|
||||
|
||||
# s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2
|
||||
# used by fast_import
|
||||
# aws cli is used by fast_import (curl and unzip above are at this time only used for this installation step)
|
||||
ARG TARGETARCH
|
||||
ADD https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_linux_$TARGETARCH.deb /tmp/s5cmd.deb
|
||||
RUN set -ex; \
|
||||
\
|
||||
# Determine the expected checksum based on TARGETARCH
|
||||
if [ "${TARGETARCH}" = "amd64" ]; then \
|
||||
CHECKSUM="392c385320cd5ffa435759a95af77c215553d967e4b1c0fffe52e4f14c29cf85"; \
|
||||
TARGETARCH_ALT="x86_64"; \
|
||||
CHECKSUM="c9a9df3770a3ff9259cb469b6179e02829687a464e0824d5c32d378820b53a00"; \
|
||||
elif [ "${TARGETARCH}" = "arm64" ]; then \
|
||||
CHECKSUM="939bee3cf4b5604ddb00e67f8c157b91d7c7a5b553d1fbb6890fad32894b7b46"; \
|
||||
TARGETARCH_ALT="aarch64"; \
|
||||
CHECKSUM="8181730be7891582b38b028112e81b4899ca817e8c616aad807c9e9d1289223a"; \
|
||||
else \
|
||||
echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
|
||||
fi; \
|
||||
\
|
||||
# Compute and validate the checksum
|
||||
echo "${CHECKSUM} /tmp/s5cmd.deb" | sha256sum -c -
|
||||
RUN dpkg -i /tmp/s5cmd.deb && rm /tmp/s5cmd.deb
|
||||
curl -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \
|
||||
echo "${CHECKSUM} /tmp/awscliv2.zip" | sha256sum -c -; \
|
||||
unzip /tmp/awscliv2.zip -d /tmp/awscliv2; \
|
||||
/tmp/awscliv2/aws/install; \
|
||||
rm -rf /tmp/awscliv2.zip /tmp/awscliv2; \
|
||||
true
|
||||
|
||||
ENV LANG=en_US.utf8
|
||||
USER postgres
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
metrics: [
|
||||
import 'sql_exporter/checkpoints_req.libsonnet',
|
||||
import 'sql_exporter/checkpoints_timed.libsonnet',
|
||||
import 'sql_exporter/compute_backpressure_throttling_seconds.libsonnet',
|
||||
import 'sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet',
|
||||
import 'sql_exporter/compute_current_lsn.libsonnet',
|
||||
import 'sql_exporter/compute_logical_snapshot_files.libsonnet',
|
||||
import 'sql_exporter/compute_logical_snapshots_bytes.libsonnet',
|
||||
|
||||
@@ -19,3 +19,10 @@ max_prepared_statements=0
|
||||
admin_users=postgres
|
||||
unix_socket_dir=/tmp/
|
||||
unix_socket_mode=0777
|
||||
|
||||
;; Disable connection logging. It produces a lot of logs that no one looks at,
|
||||
;; and we can get similar log entries from the proxy too. We had incidents in
|
||||
;; the past where the logging significantly stressed the log device or pgbouncer
|
||||
;; itself.
|
||||
log_connections=0
|
||||
log_disconnections=0
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
{
|
||||
metric_name: 'compute_backpressure_throttling_seconds',
|
||||
type: 'gauge',
|
||||
metric_name: 'compute_backpressure_throttling_seconds_total',
|
||||
type: 'counter',
|
||||
help: 'Time compute has spent throttled',
|
||||
key_labels: null,
|
||||
values: [
|
||||
'throttled',
|
||||
],
|
||||
query: importstr 'sql_exporter/compute_backpressure_throttling_seconds.sql',
|
||||
query: importstr 'sql_exporter/compute_backpressure_throttling_seconds_total.sql',
|
||||
}
|
||||
@@ -981,7 +981,7 @@ index fc42d418bf..e38f517574 100644
|
||||
CREATE SCHEMA addr_nsp;
|
||||
SET search_path TO 'addr_nsp';
|
||||
diff --git a/src/test/regress/expected/password.out b/src/test/regress/expected/password.out
|
||||
index 8475231735..1afae5395f 100644
|
||||
index 8475231735..0653946337 100644
|
||||
--- a/src/test/regress/expected/password.out
|
||||
+++ b/src/test/regress/expected/password.out
|
||||
@@ -12,11 +12,11 @@ SET password_encryption = 'md5'; -- ok
|
||||
@@ -1006,65 +1006,63 @@ index 8475231735..1afae5395f 100644
|
||||
-----------------+---------------------------------------------------
|
||||
- regress_passwd1 | md5783277baca28003b33453252be4dbb34
|
||||
- regress_passwd2 | md54044304ba511dd062133eb5b4b84a2a3
|
||||
+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1
|
||||
+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2
|
||||
+ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1
|
||||
+ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2
|
||||
regress_passwd3 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
- regress_passwd4 |
|
||||
+ regress_passwd4 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
(4 rows)
|
||||
|
||||
-- Rename a role
|
||||
@@ -54,24 +54,30 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
|
||||
@@ -54,24 +54,16 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
|
||||
-- passwords.
|
||||
SET password_encryption = 'md5';
|
||||
-- encrypt with MD5
|
||||
-ALTER ROLE regress_passwd2 PASSWORD 'foo';
|
||||
--- already encrypted, use as they are
|
||||
-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
|
||||
-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
|
||||
+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
-- already encrypted, use as they are
|
||||
ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
|
||||
+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
|
||||
ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
|
||||
+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
|
||||
SET password_encryption = 'scram-sha-256';
|
||||
-- create SCRAM secret
|
||||
-ALTER ROLE regress_passwd4 PASSWORD 'foo';
|
||||
--- already encrypted with MD5, use as it is
|
||||
-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
|
||||
--- This looks like a valid SCRAM-SHA-256 secret, but it is not
|
||||
--- so it should be hashed with SCRAM-SHA-256.
|
||||
-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';
|
||||
--- These may look like valid MD5 secrets, but they are not, so they
|
||||
--- should be hashed with SCRAM-SHA-256.
|
||||
--- trailing garbage at the end
|
||||
-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';
|
||||
--- invalid length
|
||||
-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';
|
||||
+ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
-- already encrypted with MD5, use as it is
|
||||
CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
|
||||
+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
|
||||
-- This looks like a valid SCRAM-SHA-256 secret, but it is not
|
||||
-- so it should be hashed with SCRAM-SHA-256.
|
||||
CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';
|
||||
+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
|
||||
-- These may look like valid MD5 secrets, but they are not, so they
|
||||
-- should be hashed with SCRAM-SHA-256.
|
||||
-- trailing garbage at the end
|
||||
CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';
|
||||
+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
|
||||
-- invalid length
|
||||
CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';
|
||||
+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
|
||||
+-- Neon does not support encrypted passwords, use unencrypted instead
|
||||
+CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
+-- Neon does not support encrypted passwords, use unencrypted instead
|
||||
+CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
+CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
+CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
-- Changing the SCRAM iteration count
|
||||
SET scram_iterations = 1024;
|
||||
CREATE ROLE regress_passwd9 PASSWORD 'alterediterationcount';
|
||||
@@ -81,63 +87,67 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
|
||||
@@ -81,11 +73,11 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
|
||||
ORDER BY rolname, rolpassword;
|
||||
rolname | rolpassword_masked
|
||||
-----------------+---------------------------------------------------
|
||||
- regress_passwd1 | md5cd3578025fe2c3d7ed1b9a9b26238b70
|
||||
- regress_passwd2 | md5dfa155cadd5f4ad57860162f3fab9cdb
|
||||
+ regress_passwd1 | NEON_MD5_PLACEHOLDER_regress_passwd1
|
||||
+ regress_passwd2 | NEON_MD5_PLACEHOLDER_regress_passwd2
|
||||
+ regress_passwd1 | NEON_MD5_PLACEHOLDER:regress_passwd1
|
||||
+ regress_passwd2 | NEON_MD5_PLACEHOLDER:regress_passwd2
|
||||
regress_passwd3 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
regress_passwd4 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
- regress_passwd5 | md5e73a4b11df52a6068f8b39f90be36023
|
||||
- regress_passwd6 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
- regress_passwd7 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
- regress_passwd8 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
regress_passwd9 | SCRAM-SHA-256$1024:<salt>$<storedkey>:<serverkey>
|
||||
-(9 rows)
|
||||
+(5 rows)
|
||||
|
||||
+ regress_passwd5 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
regress_passwd6 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
regress_passwd7 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
regress_passwd8 | SCRAM-SHA-256$4096:<salt>$<storedkey>:<serverkey>
|
||||
@@ -95,23 +87,20 @@ SELECT rolname, regexp_replace(rolpassword, '(SCRAM-SHA-256)\$(\d+):([a-zA-Z0-9+
|
||||
-- An empty password is not allowed, in any form
|
||||
CREATE ROLE regress_passwd_empty PASSWORD '';
|
||||
NOTICE: empty string is not a valid password, clearing password
|
||||
@@ -1082,56 +1080,37 @@ index 8475231735..1afae5395f 100644
|
||||
-(1 row)
|
||||
+(0 rows)
|
||||
|
||||
-- Test with invalid stored and server keys.
|
||||
--
|
||||
-- The first is valid, to act as a control. The others have too long
|
||||
-- stored/server keys. They will be re-hashed.
|
||||
CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
|
||||
+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
|
||||
CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
|
||||
+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
|
||||
CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';
|
||||
+ERROR: Received HTTP code 400 from control plane: {"error":"Neon only supports being given plaintext passwords"}
|
||||
--- Test with invalid stored and server keys.
|
||||
---
|
||||
--- The first is valid, to act as a control. The others have too long
|
||||
--- stored/server keys. They will be re-hashed.
|
||||
-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
|
||||
-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
|
||||
-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';
|
||||
+-- Neon does not support encrypted passwords, use unencrypted instead
|
||||
+CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
+CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
+CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
-- Check that the invalid secrets were re-hashed. A re-hashed secret
|
||||
-- should not contain the original salt.
|
||||
SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassword_rehashed
|
||||
FROM pg_authid
|
||||
WHERE rolname LIKE 'regress_passwd_sha_len%'
|
||||
@@ -120,7 +109,7 @@ SELECT rolname, rolpassword not like '%A6xHKoH/494E941doaPOYg==%' as is_rolpassw
|
||||
ORDER BY rolname;
|
||||
- rolname | is_rolpassword_rehashed
|
||||
--------------------------+-------------------------
|
||||
rolname | is_rolpassword_rehashed
|
||||
-------------------------+-------------------------
|
||||
- regress_passwd_sha_len0 | f
|
||||
- regress_passwd_sha_len1 | t
|
||||
- regress_passwd_sha_len2 | t
|
||||
-(3 rows)
|
||||
+ rolname | is_rolpassword_rehashed
|
||||
+---------+-------------------------
|
||||
+(0 rows)
|
||||
|
||||
DROP ROLE regress_passwd1;
|
||||
DROP ROLE regress_passwd2;
|
||||
DROP ROLE regress_passwd3;
|
||||
DROP ROLE regress_passwd4;
|
||||
DROP ROLE regress_passwd5;
|
||||
+ERROR: role "regress_passwd5" does not exist
|
||||
DROP ROLE regress_passwd6;
|
||||
+ERROR: role "regress_passwd6" does not exist
|
||||
DROP ROLE regress_passwd7;
|
||||
+ERROR: role "regress_passwd7" does not exist
|
||||
+ regress_passwd_sha_len0 | t
|
||||
regress_passwd_sha_len1 | t
|
||||
regress_passwd_sha_len2 | t
|
||||
(3 rows)
|
||||
@@ -135,6 +124,7 @@ DROP ROLE regress_passwd7;
|
||||
DROP ROLE regress_passwd8;
|
||||
+ERROR: role "regress_passwd8" does not exist
|
||||
DROP ROLE regress_passwd9;
|
||||
DROP ROLE regress_passwd_empty;
|
||||
+ERROR: role "regress_passwd_empty" does not exist
|
||||
DROP ROLE regress_passwd_sha_len0;
|
||||
+ERROR: role "regress_passwd_sha_len0" does not exist
|
||||
DROP ROLE regress_passwd_sha_len1;
|
||||
+ERROR: role "regress_passwd_sha_len1" does not exist
|
||||
DROP ROLE regress_passwd_sha_len2;
|
||||
+ERROR: role "regress_passwd_sha_len2" does not exist
|
||||
-- all entries should have been removed
|
||||
SELECT rolname, rolpassword
|
||||
FROM pg_authid
|
||||
diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out
|
||||
index 5b9dba7b32..cc408dad42 100644
|
||||
--- a/src/test/regress/expected/privileges.out
|
||||
@@ -3194,7 +3173,7 @@ index 1a6c61f49d..1c31ac6a53 100644
|
||||
-- Test generic object addressing/identification functions
|
||||
CREATE SCHEMA addr_nsp;
|
||||
diff --git a/src/test/regress/sql/password.sql b/src/test/regress/sql/password.sql
|
||||
index 53e86b0b6c..f07cf1ec54 100644
|
||||
index 53e86b0b6c..0303fdfe96 100644
|
||||
--- a/src/test/regress/sql/password.sql
|
||||
+++ b/src/test/regress/sql/password.sql
|
||||
@@ -10,11 +10,11 @@ SET password_encryption = 'scram-sha-256'; -- ok
|
||||
@@ -3213,23 +3192,59 @@ index 53e86b0b6c..f07cf1ec54 100644
|
||||
|
||||
-- check list of created entries
|
||||
--
|
||||
@@ -42,14 +42,14 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
|
||||
@@ -42,26 +42,18 @@ ALTER ROLE regress_passwd2_new RENAME TO regress_passwd2;
|
||||
SET password_encryption = 'md5';
|
||||
|
||||
-- encrypt with MD5
|
||||
-ALTER ROLE regress_passwd2 PASSWORD 'foo';
|
||||
--- already encrypted, use as they are
|
||||
-ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
|
||||
-ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
|
||||
+ALTER ROLE regress_passwd2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
-- already encrypted, use as they are
|
||||
ALTER ROLE regress_passwd1 PASSWORD 'md5cd3578025fe2c3d7ed1b9a9b26238b70';
|
||||
ALTER ROLE regress_passwd3 PASSWORD 'SCRAM-SHA-256$4096:VLK4RMaQLCvNtQ==$6YtlR4t69SguDiwFvbVgVZtuz6gpJQQqUMZ7IQJK5yI=:ps75jrHeYU4lXCcXI4O8oIdJ3eO8o2jirjruw9phBTo=';
|
||||
|
||||
SET password_encryption = 'scram-sha-256';
|
||||
-- create SCRAM secret
|
||||
-ALTER ROLE regress_passwd4 PASSWORD 'foo';
|
||||
--- already encrypted with MD5, use as it is
|
||||
-CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
|
||||
+ALTER ROLE regress_passwd4 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
-- already encrypted with MD5, use as it is
|
||||
CREATE ROLE regress_passwd5 PASSWORD 'md5e73a4b11df52a6068f8b39f90be36023';
|
||||
+-- Neon does not support encrypted passwords, use unencrypted instead
|
||||
+CREATE ROLE regress_passwd5 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
|
||||
--- This looks like a valid SCRAM-SHA-256 secret, but it is not
|
||||
--- so it should be hashed with SCRAM-SHA-256.
|
||||
-CREATE ROLE regress_passwd6 PASSWORD 'SCRAM-SHA-256$1234';
|
||||
--- These may look like valid MD5 secrets, but they are not, so they
|
||||
--- should be hashed with SCRAM-SHA-256.
|
||||
--- trailing garbage at the end
|
||||
-CREATE ROLE regress_passwd7 PASSWORD 'md5012345678901234567890123456789zz';
|
||||
--- invalid length
|
||||
-CREATE ROLE regress_passwd8 PASSWORD 'md501234567890123456789012345678901zz';
|
||||
+-- Neon does not support encrypted passwords, use unencrypted instead
|
||||
+CREATE ROLE regress_passwd6 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
+CREATE ROLE regress_passwd7 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
+CREATE ROLE regress_passwd8 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
|
||||
-- Changing the SCRAM iteration count
|
||||
SET scram_iterations = 1024;
|
||||
@@ -78,13 +70,10 @@ ALTER ROLE regress_passwd_empty PASSWORD 'md585939a5ce845f1a1b620742e3c659e0a';
|
||||
ALTER ROLE regress_passwd_empty PASSWORD 'SCRAM-SHA-256$4096:hpFyHTUsSWcR7O9P$LgZFIt6Oqdo27ZFKbZ2nV+vtnYM995pDh9ca6WSi120=:qVV5NeluNfUPkwm7Vqat25RjSPLkGeoZBQs6wVv+um4=';
|
||||
SELECT rolpassword FROM pg_authid WHERE rolname='regress_passwd_empty';
|
||||
|
||||
--- Test with invalid stored and server keys.
|
||||
---
|
||||
--- The first is valid, to act as a control. The others have too long
|
||||
--- stored/server keys. They will be re-hashed.
|
||||
-CREATE ROLE regress_passwd_sha_len0 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
|
||||
-CREATE ROLE regress_passwd_sha_len1 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96RqwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZI=';
|
||||
-CREATE ROLE regress_passwd_sha_len2 PASSWORD 'SCRAM-SHA-256$4096:A6xHKoH/494E941doaPOYg==$Ky+A30sewHIH3VHQLRN9vYsuzlgNyGNKCh37dy96Rqw=:COPdlNiIkrsacU5QoxydEuOH6e/KfiipeETb/bPw8ZIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=';
|
||||
+-- Neon does not support encrypted passwords, use unencrypted instead
|
||||
+CREATE ROLE regress_passwd_sha_len0 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
+CREATE ROLE regress_passwd_sha_len1 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
+CREATE ROLE regress_passwd_sha_len2 PASSWORD NEON_PASSWORD_PLACEHOLDER;
|
||||
|
||||
-- Check that the invalid secrets were re-hashed. A re-hashed secret
|
||||
-- should not contain the original salt.
|
||||
diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql
|
||||
index 249df17a58..b258e7f26a 100644
|
||||
--- a/src/test/regress/sql/privileges.sql
|
||||
|
||||
4058
compute/patches/cloud_regress_pg17.patch
Normal file
4058
compute/patches/cloud_regress_pg17.patch
Normal file
File diff suppressed because it is too large
Load Diff
@@ -246,47 +246,48 @@ fn try_spec_from_cli(
|
||||
let compute_id = matches.get_one::<String>("compute-id");
|
||||
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
|
||||
|
||||
let spec;
|
||||
let mut live_config_allowed = false;
|
||||
match spec_json {
|
||||
// First, try to get cluster spec from the cli argument
|
||||
Some(json) => {
|
||||
info!("got spec from cli argument {}", json);
|
||||
spec = Some(serde_json::from_str(json)?);
|
||||
}
|
||||
None => {
|
||||
// Second, try to read it from the file if path is provided
|
||||
if let Some(sp) = spec_path {
|
||||
let path = Path::new(sp);
|
||||
let file = File::open(path)?;
|
||||
spec = Some(serde_json::from_reader(file)?);
|
||||
live_config_allowed = true;
|
||||
} else if let Some(id) = compute_id {
|
||||
if let Some(cp_base) = control_plane_uri {
|
||||
live_config_allowed = true;
|
||||
spec = match get_spec_from_control_plane(cp_base, id) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
error!("cannot get response from control plane: {}", e);
|
||||
panic!("neither spec nor confirmation that compute is in the Empty state was received");
|
||||
}
|
||||
};
|
||||
} else {
|
||||
panic!("must specify both --control-plane-uri and --compute-id or none");
|
||||
}
|
||||
} else {
|
||||
panic!(
|
||||
"compute spec should be provided by one of the following ways: \
|
||||
--spec OR --spec-path OR --control-plane-uri and --compute-id"
|
||||
);
|
||||
}
|
||||
}
|
||||
// First, try to get cluster spec from the cli argument
|
||||
if let Some(spec_json) = spec_json {
|
||||
info!("got spec from cli argument {}", spec_json);
|
||||
return Ok(CliSpecParams {
|
||||
spec: Some(serde_json::from_str(spec_json)?),
|
||||
live_config_allowed: false,
|
||||
});
|
||||
}
|
||||
|
||||
// Second, try to read it from the file if path is provided
|
||||
if let Some(spec_path) = spec_path {
|
||||
let file = File::open(Path::new(spec_path))?;
|
||||
return Ok(CliSpecParams {
|
||||
spec: Some(serde_json::from_reader(file)?),
|
||||
live_config_allowed: true,
|
||||
});
|
||||
}
|
||||
|
||||
let Some(compute_id) = compute_id else {
|
||||
panic!(
|
||||
"compute spec should be provided by one of the following ways: \
|
||||
--spec OR --spec-path OR --control-plane-uri and --compute-id"
|
||||
);
|
||||
};
|
||||
let Some(control_plane_uri) = control_plane_uri else {
|
||||
panic!("must specify both --control-plane-uri and --compute-id or none");
|
||||
};
|
||||
|
||||
Ok(CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
})
|
||||
match get_spec_from_control_plane(control_plane_uri, compute_id) {
|
||||
Ok(spec) => Ok(CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed: true,
|
||||
}),
|
||||
Err(e) => {
|
||||
error!(
|
||||
"cannot get response from control plane: {}\n\
|
||||
neither spec nor confirmation that compute is in the Empty state was received",
|
||||
e
|
||||
);
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct CliSpecParams {
|
||||
|
||||
@@ -34,12 +34,12 @@ use nix::unistd::Pid;
|
||||
use tracing::{info, info_span, warn, Instrument};
|
||||
use utils::fs_ext::is_directory_empty;
|
||||
|
||||
#[path = "fast_import/aws_s3_sync.rs"]
|
||||
mod aws_s3_sync;
|
||||
#[path = "fast_import/child_stdio_to_log.rs"]
|
||||
mod child_stdio_to_log;
|
||||
#[path = "fast_import/s3_uri.rs"]
|
||||
mod s3_uri;
|
||||
#[path = "fast_import/s5cmd.rs"]
|
||||
mod s5cmd;
|
||||
|
||||
#[derive(clap::Parser)]
|
||||
struct Args {
|
||||
@@ -326,7 +326,7 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
info!("upload pgdata");
|
||||
s5cmd::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/"))
|
||||
aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
|
||||
.await
|
||||
.context("sync dump directory to destination")?;
|
||||
|
||||
@@ -334,10 +334,10 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
{
|
||||
let status_dir = working_directory.join("status");
|
||||
std::fs::create_dir(&status_dir).context("create status directory")?;
|
||||
let status_file = status_dir.join("status");
|
||||
let status_file = status_dir.join("pgdata");
|
||||
std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
|
||||
.context("write status file")?;
|
||||
s5cmd::sync(&status_file, &s3_prefix.append("/status/pgdata"))
|
||||
aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
|
||||
.await
|
||||
.context("sync status directory to destination")?;
|
||||
}
|
||||
|
||||
@@ -4,24 +4,21 @@ use camino::Utf8Path;
|
||||
use super::s3_uri::S3Uri;
|
||||
|
||||
pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
|
||||
let mut builder = tokio::process::Command::new("s5cmd");
|
||||
// s5cmd uses aws-sdk-go v1, hence doesn't support AWS_ENDPOINT_URL
|
||||
if let Some(val) = std::env::var_os("AWS_ENDPOINT_URL") {
|
||||
builder.arg("--endpoint-url").arg(val);
|
||||
}
|
||||
let mut builder = tokio::process::Command::new("aws");
|
||||
builder
|
||||
.arg("s3")
|
||||
.arg("sync")
|
||||
.arg(local.as_str())
|
||||
.arg(remote.to_string());
|
||||
let st = builder
|
||||
.spawn()
|
||||
.context("spawn s5cmd")?
|
||||
.context("spawn aws s3 sync")?
|
||||
.wait()
|
||||
.await
|
||||
.context("wait for s5cmd")?;
|
||||
.context("wait for aws s3 sync")?;
|
||||
if st.success() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(anyhow::anyhow!("s5cmd failed"))
|
||||
Err(anyhow::anyhow!("aws s3 sync failed"))
|
||||
}
|
||||
}
|
||||
@@ -537,12 +537,14 @@ components:
|
||||
properties:
|
||||
extname:
|
||||
type: string
|
||||
versions:
|
||||
type: array
|
||||
version:
|
||||
type: string
|
||||
items:
|
||||
type: string
|
||||
n_databases:
|
||||
type: integer
|
||||
owned_by_superuser:
|
||||
type: integer
|
||||
|
||||
SetRoleGrantsRequest:
|
||||
type: object
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||
use metrics::proto::MetricFamily;
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
|
||||
use anyhow::Result;
|
||||
use postgres::{Client, NoTls};
|
||||
@@ -38,61 +37,77 @@ fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
||||
/// Connect to every database (see list_dbs above) and get the list of installed extensions.
|
||||
///
|
||||
/// Same extension can be installed in multiple databases with different versions,
|
||||
/// we only keep the highest and lowest version across all databases.
|
||||
/// so we report a separate metric (number of databases where it is installed)
|
||||
/// for each extension version.
|
||||
pub fn get_installed_extensions(mut conf: postgres::config::Config) -> Result<InstalledExtensions> {
|
||||
conf.application_name("compute_ctl:get_installed_extensions");
|
||||
let mut client = conf.connect(NoTls)?;
|
||||
|
||||
let databases: Vec<String> = list_dbs(&mut client)?;
|
||||
|
||||
let mut extensions_map: HashMap<String, InstalledExtension> = HashMap::new();
|
||||
let mut extensions_map: HashMap<(String, String, String), InstalledExtension> = HashMap::new();
|
||||
for db in databases.iter() {
|
||||
conf.dbname(db);
|
||||
let mut db_client = conf.connect(NoTls)?;
|
||||
let extensions: Vec<(String, String)> = db_client
|
||||
let extensions: Vec<(String, String, i32)> = db_client
|
||||
.query(
|
||||
"SELECT extname, extversion FROM pg_catalog.pg_extension;",
|
||||
"SELECT extname, extversion, extowner::integer FROM pg_catalog.pg_extension",
|
||||
&[],
|
||||
)?
|
||||
.iter()
|
||||
.map(|row| (row.get("extname"), row.get("extversion")))
|
||||
.map(|row| {
|
||||
(
|
||||
row.get("extname"),
|
||||
row.get("extversion"),
|
||||
row.get("extowner"),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
for (extname, v) in extensions.iter() {
|
||||
for (extname, v, extowner) in extensions.iter() {
|
||||
let version = v.to_string();
|
||||
|
||||
// increment the number of databases where the version of extension is installed
|
||||
INSTALLED_EXTENSIONS
|
||||
.with_label_values(&[extname, &version])
|
||||
.inc();
|
||||
// check if the extension is owned by superuser
|
||||
// 10 is the oid of superuser
|
||||
let owned_by_superuser = if *extowner == 10 { "1" } else { "0" };
|
||||
|
||||
extensions_map
|
||||
.entry(extname.to_string())
|
||||
.entry((
|
||||
extname.to_string(),
|
||||
version.clone(),
|
||||
owned_by_superuser.to_string(),
|
||||
))
|
||||
.and_modify(|e| {
|
||||
e.versions.insert(version.clone());
|
||||
// count the number of databases where the extension is installed
|
||||
e.n_databases += 1;
|
||||
})
|
||||
.or_insert(InstalledExtension {
|
||||
extname: extname.to_string(),
|
||||
versions: HashSet::from([version.clone()]),
|
||||
version: version.clone(),
|
||||
n_databases: 1,
|
||||
owned_by_superuser: owned_by_superuser.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let res = InstalledExtensions {
|
||||
extensions: extensions_map.into_values().collect(),
|
||||
};
|
||||
for (key, ext) in extensions_map.iter() {
|
||||
let (extname, version, owned_by_superuser) = key;
|
||||
let n_databases = ext.n_databases as u64;
|
||||
|
||||
Ok(res)
|
||||
INSTALLED_EXTENSIONS
|
||||
.with_label_values(&[extname, version, owned_by_superuser])
|
||||
.set(n_databases);
|
||||
}
|
||||
|
||||
Ok(InstalledExtensions {
|
||||
extensions: extensions_map.into_values().collect(),
|
||||
})
|
||||
}
|
||||
|
||||
static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
|
||||
register_uint_gauge_vec!(
|
||||
"compute_installed_extensions",
|
||||
"Number of databases where the version of extension is installed",
|
||||
&["extension_name", "version"]
|
||||
&["extension_name", "version", "owned_by_superuser"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
@@ -274,6 +274,7 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
for env_key in [
|
||||
"AWS_ACCESS_KEY_ID",
|
||||
"AWS_SECRET_ACCESS_KEY",
|
||||
"AWS_SESSION_TOKEN",
|
||||
"AWS_PROFILE",
|
||||
// HOME is needed in combination with `AWS_PROFILE` to pick up the SSO sessions.
|
||||
"HOME",
|
||||
|
||||
@@ -19,6 +19,7 @@ use control_plane::storage_controller::{
|
||||
NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
|
||||
};
|
||||
use control_plane::{broker, local_env};
|
||||
use nix::fcntl::{flock, FlockArg};
|
||||
use pageserver_api::config::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
|
||||
@@ -36,6 +37,8 @@ use safekeeper_api::{
|
||||
};
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeSet, HashMap};
|
||||
use std::fs::File;
|
||||
use std::os::fd::AsRawFd;
|
||||
use std::path::PathBuf;
|
||||
use std::process::exit;
|
||||
use std::str::FromStr;
|
||||
@@ -689,6 +692,21 @@ struct TimelineTreeEl {
|
||||
pub children: BTreeSet<TimelineId>,
|
||||
}
|
||||
|
||||
/// A flock-based guard over the neon_local repository directory
|
||||
struct RepoLock {
|
||||
_file: File,
|
||||
}
|
||||
|
||||
impl RepoLock {
|
||||
fn new() -> Result<Self> {
|
||||
let repo_dir = File::open(local_env::base_path())?;
|
||||
let repo_dir_fd = repo_dir.as_raw_fd();
|
||||
flock(repo_dir_fd, FlockArg::LockExclusive)?;
|
||||
|
||||
Ok(Self { _file: repo_dir })
|
||||
}
|
||||
}
|
||||
|
||||
// Main entry point for the 'neon_local' CLI utility
|
||||
//
|
||||
// This utility helps to manage neon installation. That includes following:
|
||||
@@ -700,9 +718,14 @@ fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
// Check for 'neon init' command first.
|
||||
let subcommand_result = if let NeonLocalCmd::Init(args) = cli.command {
|
||||
handle_init(&args).map(|env| Some(Cow::Owned(env)))
|
||||
let (subcommand_result, _lock) = if let NeonLocalCmd::Init(args) = cli.command {
|
||||
(handle_init(&args).map(|env| Some(Cow::Owned(env))), None)
|
||||
} else {
|
||||
// This tool uses a collection of simple files to store its state, and consequently
|
||||
// it is not generally safe to run multiple commands concurrently. Rather than expect
|
||||
// all callers to know this, use a lock file to protect against concurrent execution.
|
||||
let _repo_lock = RepoLock::new().unwrap();
|
||||
|
||||
// all other commands need an existing config
|
||||
let env = LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
|
||||
let original_env = env.clone();
|
||||
@@ -728,11 +751,12 @@ fn main() -> Result<()> {
|
||||
NeonLocalCmd::Mappings(subcmd) => handle_mappings(&subcmd, env),
|
||||
};
|
||||
|
||||
if &original_env != env {
|
||||
let subcommand_result = if &original_env != env {
|
||||
subcommand_result.map(|()| Some(Cow::Borrowed(env)))
|
||||
} else {
|
||||
subcommand_result.map(|()| None)
|
||||
}
|
||||
};
|
||||
(subcommand_result, Some(_repo_lock))
|
||||
};
|
||||
|
||||
match subcommand_result {
|
||||
@@ -922,7 +946,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
|
||||
} else {
|
||||
// User (likely interactive) did not provide a description of the environment, give them the default
|
||||
NeonLocalInitConf {
|
||||
control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
|
||||
control_plane_api: Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap()),
|
||||
broker: NeonBroker {
|
||||
listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
|
||||
},
|
||||
@@ -1718,18 +1742,15 @@ async fn handle_start_all_impl(
|
||||
broker::start_broker_process(env, &retry_timeout).await
|
||||
});
|
||||
|
||||
// Only start the storage controller if the pageserver is configured to need it
|
||||
if env.control_plane_api.is_some() {
|
||||
js.spawn(async move {
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
storage_controller
|
||||
.start(NeonStorageControllerStartArgs::with_default_instance_id(
|
||||
retry_timeout,
|
||||
))
|
||||
.await
|
||||
.map_err(|e| e.context("start storage_controller"))
|
||||
});
|
||||
}
|
||||
js.spawn(async move {
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
storage_controller
|
||||
.start(NeonStorageControllerStartArgs::with_default_instance_id(
|
||||
retry_timeout,
|
||||
))
|
||||
.await
|
||||
.map_err(|e| e.context("start storage_controller"))
|
||||
});
|
||||
|
||||
for ps_conf in &env.pageservers {
|
||||
js.spawn(async move {
|
||||
@@ -1774,10 +1795,6 @@ async fn neon_start_status_check(
|
||||
const RETRY_INTERVAL: Duration = Duration::from_millis(100);
|
||||
const NOTICE_AFTER_RETRIES: Duration = Duration::from_secs(5);
|
||||
|
||||
if env.control_plane_api.is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let storcon = StorageController::from_env(env);
|
||||
|
||||
let retries = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
|
||||
|
||||
@@ -316,6 +316,10 @@ impl Endpoint {
|
||||
// and can cause errors like 'no unpinned buffers available', see
|
||||
// <https://github.com/neondatabase/neon/issues/9956>
|
||||
conf.append("shared_buffers", "1MB");
|
||||
// Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's
|
||||
// batching logic. Set this to 2 so that we exercise the code a bit without letting
|
||||
// individual tests do a lot of concurrent work on underpowered test machines
|
||||
conf.append("effective_io_concurrency", "2");
|
||||
conf.append("fsync", "off");
|
||||
conf.append("max_connections", "100");
|
||||
conf.append("wal_level", "logical");
|
||||
@@ -810,7 +814,7 @@ impl Endpoint {
|
||||
}
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(30))
|
||||
.timeout(Duration::from_secs(120))
|
||||
.build()
|
||||
.unwrap();
|
||||
let response = client
|
||||
|
||||
@@ -76,7 +76,7 @@ pub struct LocalEnv {
|
||||
|
||||
// Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
|
||||
// be propagated into each pageserver's configuration.
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_api: Url,
|
||||
|
||||
// Control plane upcall API for storage controller. If set, this will be propagated into the
|
||||
// storage controller's configuration.
|
||||
@@ -133,7 +133,7 @@ pub struct NeonLocalInitConf {
|
||||
pub storage_controller: Option<NeonStorageControllerConf>,
|
||||
pub pageservers: Vec<NeonLocalInitPageserverConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Option<Url>>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Option<Url>>,
|
||||
}
|
||||
|
||||
@@ -180,7 +180,7 @@ impl NeonStorageControllerConf {
|
||||
const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);
|
||||
|
||||
// Very tight heartbeat interval to speed up tests
|
||||
const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(100);
|
||||
const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(1000);
|
||||
}
|
||||
|
||||
impl Default for NeonStorageControllerConf {
|
||||
@@ -535,7 +535,7 @@ impl LocalEnv {
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_api: control_plane_api.unwrap(),
|
||||
control_plane_compute_hook_api,
|
||||
branch_name_mappings,
|
||||
}
|
||||
@@ -638,7 +638,7 @@ impl LocalEnv {
|
||||
storage_controller: self.storage_controller.clone(),
|
||||
pageservers: vec![], // it's skip_serializing anyway
|
||||
safekeepers: self.safekeepers.clone(),
|
||||
control_plane_api: self.control_plane_api.clone(),
|
||||
control_plane_api: Some(self.control_plane_api.clone()),
|
||||
control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
|
||||
branch_name_mappings: self.branch_name_mappings.clone(),
|
||||
},
|
||||
@@ -768,7 +768,7 @@ impl LocalEnv {
|
||||
storage_controller: storage_controller.unwrap_or_default(),
|
||||
pageservers: pageservers.iter().map(Into::into).collect(),
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap_or_default(),
|
||||
control_plane_api: control_plane_api.unwrap(),
|
||||
control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
|
||||
branch_name_mappings: Default::default(),
|
||||
};
|
||||
|
||||
@@ -95,21 +95,19 @@ impl PageServerNode {
|
||||
|
||||
let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];
|
||||
|
||||
if let Some(control_plane_api) = &self.env.control_plane_api {
|
||||
overrides.push(format!(
|
||||
"control_plane_api='{}'",
|
||||
control_plane_api.as_str()
|
||||
));
|
||||
overrides.push(format!(
|
||||
"control_plane_api='{}'",
|
||||
self.env.control_plane_api.as_str()
|
||||
));
|
||||
|
||||
// Storage controller uses the same auth as pageserver: if JWT is enabled
|
||||
// for us, we will also need it to talk to them.
|
||||
if matches!(conf.http_auth_type, AuthType::NeonJWT) {
|
||||
let jwt_token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
|
||||
.unwrap();
|
||||
overrides.push(format!("control_plane_api_token='{}'", jwt_token));
|
||||
}
|
||||
// Storage controller uses the same auth as pageserver: if JWT is enabled
|
||||
// for us, we will also need it to talk to them.
|
||||
if matches!(conf.http_auth_type, AuthType::NeonJWT) {
|
||||
let jwt_token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
|
||||
.unwrap();
|
||||
overrides.push(format!("control_plane_api_token='{}'", jwt_token));
|
||||
}
|
||||
|
||||
if !conf.other.contains_key("remote_storage") {
|
||||
@@ -435,7 +433,7 @@ impl PageServerNode {
|
||||
) -> anyhow::Result<()> {
|
||||
let config = Self::parse_config(settings)?;
|
||||
self.http_client
|
||||
.tenant_config(&models::TenantConfigRequest { tenant_id, config })
|
||||
.set_tenant_config(&models::TenantConfigRequest { tenant_id, config })
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -338,7 +338,7 @@ impl StorageController {
|
||||
.port(),
|
||||
)
|
||||
} else {
|
||||
let listen_url = self.env.control_plane_api.clone().unwrap();
|
||||
let listen_url = self.env.control_plane_api.clone();
|
||||
|
||||
let listen = format!(
|
||||
"{}:{}",
|
||||
@@ -708,7 +708,7 @@ impl StorageController {
|
||||
} else {
|
||||
// The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
|
||||
// for general purpose API access.
|
||||
let listen_url = self.env.control_plane_api.clone().unwrap();
|
||||
let listen_url = self.env.control_plane_api.clone();
|
||||
Url::from_str(&format!(
|
||||
"http://{}:{}/{path}",
|
||||
listen_url.host_str().unwrap(),
|
||||
|
||||
@@ -5,12 +5,13 @@ use clap::{Parser, Subcommand};
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
|
||||
ShardSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||
SafekeeperDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest,
|
||||
TenantDescribeResponse, TenantPolicyRequest,
|
||||
},
|
||||
models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
||||
ShardParameters, TenantConfig, TenantConfigRequest, TenantShardSplitRequest,
|
||||
TenantShardSplitResponse,
|
||||
ShardParameters, TenantConfig, TenantConfigPatchRequest, TenantConfigRequest,
|
||||
TenantShardSplitRequest, TenantShardSplitResponse,
|
||||
},
|
||||
shard::{ShardStripeSize, TenantShardId},
|
||||
};
|
||||
@@ -116,9 +117,19 @@ enum Command {
|
||||
#[arg(long)]
|
||||
tenant_shard_id: TenantShardId,
|
||||
},
|
||||
/// Modify the pageserver tenant configuration of a tenant: this is the configuration structure
|
||||
/// Set the pageserver tenant configuration of a tenant: this is the configuration structure
|
||||
/// that is passed through to pageservers, and does not affect storage controller behavior.
|
||||
TenantConfig {
|
||||
/// Any previous tenant configs are overwritten.
|
||||
SetTenantConfig {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
config: String,
|
||||
},
|
||||
/// Patch the pageserver tenant configuration of a tenant. Any fields with null values in the
|
||||
/// provided JSON are unset from the tenant config and all fields with non-null values are set.
|
||||
/// Unspecified fields are not changed.
|
||||
PatchTenantConfig {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
@@ -201,6 +212,8 @@ enum Command {
|
||||
#[arg(long)]
|
||||
timeout: humantime::Duration,
|
||||
},
|
||||
/// List safekeepers known to the storage controller
|
||||
Safekeepers {},
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -549,11 +562,21 @@ async fn main() -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantConfig { tenant_id, config } => {
|
||||
Command::SetTenantConfig { tenant_id, config } => {
|
||||
let tenant_conf = serde_json::from_str(&config)?;
|
||||
|
||||
vps_client
|
||||
.tenant_config(&TenantConfigRequest {
|
||||
.set_tenant_config(&TenantConfigRequest {
|
||||
tenant_id,
|
||||
config: tenant_conf,
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
Command::PatchTenantConfig { tenant_id, config } => {
|
||||
let tenant_conf = serde_json::from_str(&config)?;
|
||||
|
||||
vps_client
|
||||
.patch_tenant_config(&TenantConfigPatchRequest {
|
||||
tenant_id,
|
||||
config: tenant_conf,
|
||||
})
|
||||
@@ -736,7 +759,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
threshold,
|
||||
} => {
|
||||
vps_client
|
||||
.tenant_config(&TenantConfigRequest {
|
||||
.set_tenant_config(&TenantConfigRequest {
|
||||
tenant_id,
|
||||
config: TenantConfig {
|
||||
eviction_policy: Some(EvictionPolicy::LayerAccessThreshold(
|
||||
@@ -1000,6 +1023,31 @@ async fn main() -> anyhow::Result<()> {
|
||||
"Fill was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}"
|
||||
);
|
||||
}
|
||||
Command::Safekeepers {} => {
|
||||
let mut resp = storcon_client
|
||||
.dispatch::<(), Vec<SafekeeperDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/safekeeper".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
resp.sort_by(|a, b| a.id.cmp(&b.id));
|
||||
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header(["Id", "Version", "Host", "Port", "Http Port", "AZ Id"]);
|
||||
for sk in resp {
|
||||
table.add_row([
|
||||
format!("{}", sk.id),
|
||||
format!("{}", sk.version),
|
||||
sk.host,
|
||||
format!("{}", sk.port),
|
||||
format!("{}", sk.http_port),
|
||||
sk.availability_zone_id.to_string(),
|
||||
]);
|
||||
}
|
||||
println!("{table}");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -42,6 +42,7 @@ allow = [
|
||||
"MPL-2.0",
|
||||
"OpenSSL",
|
||||
"Unicode-DFS-2016",
|
||||
"Unicode-3.0",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
exceptions = [
|
||||
|
||||
@@ -132,11 +132,6 @@
|
||||
"name": "cron.database",
|
||||
"value": "postgres",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "session_preload_libraries",
|
||||
"value": "anon",
|
||||
"vartype": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@@ -35,11 +35,11 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
|
||||
# The support of pg_anon not yet added to PG17, so we have to remove the corresponding option
|
||||
if [ $pg_version -eq 17 ]; then
|
||||
# The support of pg_anon not yet added to PG17, so we have to add the corresponding option for other PG versions
|
||||
if [ "${pg_version}" -ne 17 ]; then
|
||||
SPEC_PATH="compute_wrapper/var/db/postgres/specs"
|
||||
mv $SPEC_PATH/spec.json $SPEC_PATH/spec.bak
|
||||
jq 'del(.cluster.settings[] | select (.name == "session_preload_libraries"))' $SPEC_PATH/spec.bak > $SPEC_PATH/spec.json
|
||||
jq '.cluster.settings += [{"name": "session_preload_libraries","value": "anon","vartype": "string"}]' "${SPEC_PATH}/spec.bak" > "${SPEC_PATH}/spec.json"
|
||||
fi
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d
|
||||
|
||||
@@ -106,8 +106,8 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
fi
|
||||
fi
|
||||
cleanup
|
||||
# The support of pg_anon not yet added to PG17, so we have to remove the corresponding option
|
||||
if [ $pg_version -eq 17 ]; then
|
||||
mv $SPEC_PATH/spec.bak $SPEC_PATH/spec.json
|
||||
# Restore the original spec.json
|
||||
if [ "$pg_version" -ne 17 ]; then
|
||||
mv "$SPEC_PATH/spec.bak" "$SPEC_PATH/spec.json"
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::Display;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
@@ -163,8 +162,9 @@ pub enum ControlPlaneComputeStatus {
|
||||
#[derive(Clone, Debug, Default, Serialize)]
|
||||
pub struct InstalledExtension {
|
||||
pub extname: String,
|
||||
pub versions: HashSet<String>,
|
||||
pub version: String,
|
||||
pub n_databases: u32, // Number of databases using this extension
|
||||
pub owned_by_superuser: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Serialize)]
|
||||
|
||||
@@ -91,7 +91,7 @@ impl Timing {
|
||||
|
||||
/// Return true if there is a ready event.
|
||||
fn is_event_ready(&self, queue: &mut BinaryHeap<Pending>) -> bool {
|
||||
queue.peek().map_or(false, |x| x.time <= self.now())
|
||||
queue.peek().is_some_and(|x| x.time <= self.now())
|
||||
}
|
||||
|
||||
/// Clear all pending events.
|
||||
|
||||
@@ -75,7 +75,7 @@ pub struct TenantPolicyRequest {
|
||||
pub scheduling: Option<ShardSchedulingPolicy>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug)]
|
||||
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
|
||||
pub struct AvailabilityZone(pub String);
|
||||
|
||||
impl Display for AvailabilityZone {
|
||||
@@ -245,6 +245,17 @@ impl From<NodeAvailability> for NodeAvailabilityWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
/// Scheduling policy enables us to selectively disable some automatic actions that the
|
||||
/// controller performs on a tenant shard. This is only set to a non-default value by
|
||||
/// human intervention, and it is reset to the default value (Active) when the tenant's
|
||||
/// placement policy is modified away from Attached.
|
||||
///
|
||||
/// The typical use of a non-Active scheduling policy is one of:
|
||||
/// - Pinnning a shard to a node (i.e. migrating it there & setting a non-Active scheduling policy)
|
||||
/// - Working around a bug (e.g. if something is flapping and we need to stop it until the bug is fixed)
|
||||
///
|
||||
/// If you're not sure which policy to use to pin a shard to its current location, you probably
|
||||
/// want Pause.
|
||||
#[derive(Serialize, Deserialize, Clone, Copy, Eq, PartialEq, Debug)]
|
||||
pub enum ShardSchedulingPolicy {
|
||||
// Normal mode: the tenant's scheduled locations may be updated at will, including
|
||||
@@ -361,6 +372,23 @@ pub struct MetadataHealthListOutdatedResponse {
|
||||
pub health_records: Vec<MetadataHealthRecord>,
|
||||
}
|
||||
|
||||
/// Publicly exposed safekeeper description
|
||||
///
|
||||
/// The `active` flag which we have in the DB is not included on purpose: it is deprecated.
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct SafekeeperDescribeResponse {
|
||||
pub id: NodeId,
|
||||
pub region_id: String,
|
||||
/// 1 is special, it means just created (not currently posted to storcon).
|
||||
/// Zero or negative is not really expected.
|
||||
/// Otherwise the number from `release-$(number_of_commits_on_branch)` tag.
|
||||
pub version: i64,
|
||||
pub host: String,
|
||||
pub port: i32,
|
||||
pub http_port: i32,
|
||||
pub availability_zone_id: String,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
@@ -24,7 +24,7 @@ pub struct Key {
|
||||
|
||||
/// When working with large numbers of Keys in-memory, it is more efficient to handle them as i128 than as
|
||||
/// a struct of fields.
|
||||
#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
|
||||
#[derive(Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize, Debug)]
|
||||
pub struct CompactKey(i128);
|
||||
|
||||
/// The storage key size.
|
||||
@@ -565,6 +565,10 @@ impl Key {
|
||||
&& self.field5 == 0
|
||||
&& self.field6 == u32::MAX
|
||||
}
|
||||
|
||||
pub fn is_slru_dir_key(&self) -> bool {
|
||||
slru_dir_kind(self).is_some()
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
|
||||
@@ -6,6 +6,7 @@ pub mod utilization;
|
||||
use camino::Utf8PathBuf;
|
||||
pub use utilization::PageserverUtilization;
|
||||
|
||||
use core::ops::Range;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fmt::Display,
|
||||
@@ -17,7 +18,7 @@ use std::{
|
||||
|
||||
use byteorder::{BigEndian, ReadBytesExt};
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_with::serde_as;
|
||||
use utils::{
|
||||
completion,
|
||||
@@ -28,6 +29,7 @@ use utils::{
|
||||
};
|
||||
|
||||
use crate::{
|
||||
key::Key,
|
||||
reltag::RelTag,
|
||||
shard::{ShardCount, ShardStripeSize, TenantShardId},
|
||||
};
|
||||
@@ -210,6 +212,68 @@ pub enum TimelineState {
|
||||
Broken { reason: String, backtrace: String },
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
pub struct CompactLsnRange {
|
||||
pub start: Lsn,
|
||||
pub end: Lsn,
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
pub struct CompactKeyRange {
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pub start: Key,
|
||||
#[serde_as(as = "serde_with::DisplayFromStr")]
|
||||
pub end: Key,
|
||||
}
|
||||
|
||||
impl From<Range<Lsn>> for CompactLsnRange {
|
||||
fn from(range: Range<Lsn>) -> Self {
|
||||
Self {
|
||||
start: range.start,
|
||||
end: range.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Range<Key>> for CompactKeyRange {
|
||||
fn from(range: Range<Key>) -> Self {
|
||||
Self {
|
||||
start: range.start,
|
||||
end: range.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CompactLsnRange> for Range<Lsn> {
|
||||
fn from(range: CompactLsnRange) -> Self {
|
||||
range.start..range.end
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CompactKeyRange> for Range<Key> {
|
||||
fn from(range: CompactKeyRange) -> Self {
|
||||
range.start..range.end
|
||||
}
|
||||
}
|
||||
|
||||
impl CompactLsnRange {
|
||||
pub fn above(lsn: Lsn) -> Self {
|
||||
Self {
|
||||
start: lsn,
|
||||
end: Lsn::MAX,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct CompactInfoResponse {
|
||||
pub compact_key_range: Option<CompactKeyRange>,
|
||||
pub compact_lsn_range: Option<CompactLsnRange>,
|
||||
pub sub_compaction: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
pub struct TimelineCreateRequest {
|
||||
pub new_timeline_id: TimelineId,
|
||||
@@ -325,6 +389,115 @@ impl Default for ShardParameters {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Eq, PartialEq)]
|
||||
pub enum FieldPatch<T> {
|
||||
Upsert(T),
|
||||
Remove,
|
||||
#[default]
|
||||
Noop,
|
||||
}
|
||||
|
||||
impl<T> FieldPatch<T> {
|
||||
fn is_noop(&self) -> bool {
|
||||
matches!(self, FieldPatch::Noop)
|
||||
}
|
||||
|
||||
pub fn apply(self, target: &mut Option<T>) {
|
||||
match self {
|
||||
Self::Upsert(v) => *target = Some(v),
|
||||
Self::Remove => *target = None,
|
||||
Self::Noop => {}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn map<U, E, F: FnOnce(T) -> Result<U, E>>(self, map: F) -> Result<FieldPatch<U>, E> {
|
||||
match self {
|
||||
Self::Upsert(v) => Ok(FieldPatch::<U>::Upsert(map(v)?)),
|
||||
Self::Remove => Ok(FieldPatch::<U>::Remove),
|
||||
Self::Noop => Ok(FieldPatch::<U>::Noop),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de, T: Deserialize<'de>> Deserialize<'de> for FieldPatch<T> {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
Option::deserialize(deserializer).map(|opt| match opt {
|
||||
None => FieldPatch::Remove,
|
||||
Some(val) => FieldPatch::Upsert(val),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Serialize> Serialize for FieldPatch<T> {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
match self {
|
||||
FieldPatch::Upsert(val) => serializer.serialize_some(val),
|
||||
FieldPatch::Remove => serializer.serialize_none(),
|
||||
FieldPatch::Noop => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
|
||||
#[serde(default)]
|
||||
pub struct TenantConfigPatch {
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub checkpoint_distance: FieldPatch<u64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub checkpoint_timeout: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_target_size: FieldPatch<u64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_period: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_threshold: FieldPatch<usize>,
|
||||
// defer parsing compaction_algorithm, like eviction_policy
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub compaction_algorithm: FieldPatch<CompactionAlgorithmSettings>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_horizon: FieldPatch<u64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub gc_period: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub image_creation_threshold: FieldPatch<usize>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub pitr_interval: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub walreceiver_connect_timeout: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub lagging_wal_timeout: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub max_lsn_wal_lag: FieldPatch<NonZeroU64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub eviction_policy: FieldPatch<EvictionPolicy>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub min_resident_size_override: FieldPatch<u64>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub evictions_low_residence_duration_metric_threshold: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub heatmap_period: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub lazy_slru_download: FieldPatch<bool>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub timeline_get_throttle: FieldPatch<ThrottleConfig>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub image_layer_creation_check_threshold: FieldPatch<u8>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub lsn_lease_length: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub lsn_lease_length_for_ts: FieldPatch<String>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub timeline_offloading: FieldPatch<bool>,
|
||||
#[serde(skip_serializing_if = "FieldPatch::is_noop")]
|
||||
pub wal_receiver_protocol_override: FieldPatch<PostgresClientProtocol>,
|
||||
}
|
||||
|
||||
/// An alternative representation of `pageserver::tenant::TenantConf` with
|
||||
/// simpler types.
|
||||
#[derive(Serialize, Deserialize, Debug, Default, Clone, Eq, PartialEq)]
|
||||
@@ -356,6 +529,107 @@ pub struct TenantConfig {
|
||||
pub wal_receiver_protocol_override: Option<PostgresClientProtocol>,
|
||||
}
|
||||
|
||||
impl TenantConfig {
|
||||
pub fn apply_patch(self, patch: TenantConfigPatch) -> TenantConfig {
|
||||
let Self {
|
||||
mut checkpoint_distance,
|
||||
mut checkpoint_timeout,
|
||||
mut compaction_target_size,
|
||||
mut compaction_period,
|
||||
mut compaction_threshold,
|
||||
mut compaction_algorithm,
|
||||
mut gc_horizon,
|
||||
mut gc_period,
|
||||
mut image_creation_threshold,
|
||||
mut pitr_interval,
|
||||
mut walreceiver_connect_timeout,
|
||||
mut lagging_wal_timeout,
|
||||
mut max_lsn_wal_lag,
|
||||
mut eviction_policy,
|
||||
mut min_resident_size_override,
|
||||
mut evictions_low_residence_duration_metric_threshold,
|
||||
mut heatmap_period,
|
||||
mut lazy_slru_download,
|
||||
mut timeline_get_throttle,
|
||||
mut image_layer_creation_check_threshold,
|
||||
mut lsn_lease_length,
|
||||
mut lsn_lease_length_for_ts,
|
||||
mut timeline_offloading,
|
||||
mut wal_receiver_protocol_override,
|
||||
} = self;
|
||||
|
||||
patch.checkpoint_distance.apply(&mut checkpoint_distance);
|
||||
patch.checkpoint_timeout.apply(&mut checkpoint_timeout);
|
||||
patch
|
||||
.compaction_target_size
|
||||
.apply(&mut compaction_target_size);
|
||||
patch.compaction_period.apply(&mut compaction_period);
|
||||
patch.compaction_threshold.apply(&mut compaction_threshold);
|
||||
patch.compaction_algorithm.apply(&mut compaction_algorithm);
|
||||
patch.gc_horizon.apply(&mut gc_horizon);
|
||||
patch.gc_period.apply(&mut gc_period);
|
||||
patch
|
||||
.image_creation_threshold
|
||||
.apply(&mut image_creation_threshold);
|
||||
patch.pitr_interval.apply(&mut pitr_interval);
|
||||
patch
|
||||
.walreceiver_connect_timeout
|
||||
.apply(&mut walreceiver_connect_timeout);
|
||||
patch.lagging_wal_timeout.apply(&mut lagging_wal_timeout);
|
||||
patch.max_lsn_wal_lag.apply(&mut max_lsn_wal_lag);
|
||||
patch.eviction_policy.apply(&mut eviction_policy);
|
||||
patch
|
||||
.min_resident_size_override
|
||||
.apply(&mut min_resident_size_override);
|
||||
patch
|
||||
.evictions_low_residence_duration_metric_threshold
|
||||
.apply(&mut evictions_low_residence_duration_metric_threshold);
|
||||
patch.heatmap_period.apply(&mut heatmap_period);
|
||||
patch.lazy_slru_download.apply(&mut lazy_slru_download);
|
||||
patch
|
||||
.timeline_get_throttle
|
||||
.apply(&mut timeline_get_throttle);
|
||||
patch
|
||||
.image_layer_creation_check_threshold
|
||||
.apply(&mut image_layer_creation_check_threshold);
|
||||
patch.lsn_lease_length.apply(&mut lsn_lease_length);
|
||||
patch
|
||||
.lsn_lease_length_for_ts
|
||||
.apply(&mut lsn_lease_length_for_ts);
|
||||
patch.timeline_offloading.apply(&mut timeline_offloading);
|
||||
patch
|
||||
.wal_receiver_protocol_override
|
||||
.apply(&mut wal_receiver_protocol_override);
|
||||
|
||||
Self {
|
||||
checkpoint_distance,
|
||||
checkpoint_timeout,
|
||||
compaction_target_size,
|
||||
compaction_period,
|
||||
compaction_threshold,
|
||||
compaction_algorithm,
|
||||
gc_horizon,
|
||||
gc_period,
|
||||
image_creation_threshold,
|
||||
pitr_interval,
|
||||
walreceiver_connect_timeout,
|
||||
lagging_wal_timeout,
|
||||
max_lsn_wal_lag,
|
||||
eviction_policy,
|
||||
min_resident_size_override,
|
||||
evictions_low_residence_duration_metric_threshold,
|
||||
heatmap_period,
|
||||
lazy_slru_download,
|
||||
timeline_get_throttle,
|
||||
image_layer_creation_check_threshold,
|
||||
lsn_lease_length,
|
||||
lsn_lease_length_for_ts,
|
||||
timeline_offloading,
|
||||
wal_receiver_protocol_override,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The policy for the aux file storage.
|
||||
///
|
||||
/// It can be switched through `switch_aux_file_policy` tenant config.
|
||||
@@ -686,6 +960,14 @@ impl TenantConfigRequest {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct TenantConfigPatchRequest {
|
||||
pub tenant_id: TenantId,
|
||||
#[serde(flatten)]
|
||||
pub config: TenantConfigPatch, // as we have a flattened field, we should reject all unknown fields in it
|
||||
}
|
||||
|
||||
/// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
#[serde(tag = "slug", content = "data", rename_all = "snake_case")]
|
||||
@@ -1699,4 +1981,45 @@ mod tests {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tenant_config_patch_request_serde() {
|
||||
let patch_request = TenantConfigPatchRequest {
|
||||
tenant_id: TenantId::from_str("17c6d121946a61e5ab0fe5a2fd4d8215").unwrap(),
|
||||
config: TenantConfigPatch {
|
||||
checkpoint_distance: FieldPatch::Upsert(42),
|
||||
gc_horizon: FieldPatch::Remove,
|
||||
compaction_threshold: FieldPatch::Noop,
|
||||
..TenantConfigPatch::default()
|
||||
},
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&patch_request).unwrap();
|
||||
|
||||
let expected = r#"{"tenant_id":"17c6d121946a61e5ab0fe5a2fd4d8215","checkpoint_distance":42,"gc_horizon":null}"#;
|
||||
assert_eq!(json, expected);
|
||||
|
||||
let decoded: TenantConfigPatchRequest = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(decoded.tenant_id, patch_request.tenant_id);
|
||||
assert_eq!(decoded.config, patch_request.config);
|
||||
|
||||
// Now apply the patch to a config to demonstrate semantics
|
||||
|
||||
let base = TenantConfig {
|
||||
checkpoint_distance: Some(28),
|
||||
gc_horizon: Some(100),
|
||||
compaction_target_size: Some(1024),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let expected = TenantConfig {
|
||||
checkpoint_distance: Some(42),
|
||||
gc_horizon: None,
|
||||
..base.clone()
|
||||
};
|
||||
|
||||
let patched = base.apply_patch(decoded.config);
|
||||
|
||||
assert_eq!(patched, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,7 +173,11 @@ impl ShardIdentity {
|
||||
|
||||
/// Return true if the key should be stored on all shards, not just one.
|
||||
pub fn is_key_global(&self, key: &Key) -> bool {
|
||||
if key.is_slru_block_key() || key.is_slru_segment_size_key() || key.is_aux_file_key() {
|
||||
if key.is_slru_block_key()
|
||||
|| key.is_slru_segment_size_key()
|
||||
|| key.is_aux_file_key()
|
||||
|| key.is_slru_dir_key()
|
||||
{
|
||||
// Special keys that are only stored on shard 0
|
||||
false
|
||||
} else if key.is_rel_block_key() {
|
||||
|
||||
@@ -9,9 +9,11 @@ regex.workspace = true
|
||||
bytes.workspace = true
|
||||
anyhow.workspace = true
|
||||
crc32c.workspace = true
|
||||
criterion.workspace = true
|
||||
once_cell.workspace = true
|
||||
log.workspace = true
|
||||
memoffset.workspace = true
|
||||
pprof.workspace = true
|
||||
thiserror.workspace = true
|
||||
serde.workspace = true
|
||||
utils.workspace = true
|
||||
@@ -24,3 +26,7 @@ postgres.workspace = true
|
||||
[build-dependencies]
|
||||
anyhow.workspace = true
|
||||
bindgen.workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "waldecoder"
|
||||
harness = false
|
||||
|
||||
26
libs/postgres_ffi/benches/README.md
Normal file
26
libs/postgres_ffi/benches/README.md
Normal file
@@ -0,0 +1,26 @@
|
||||
## Benchmarks
|
||||
|
||||
To run benchmarks:
|
||||
|
||||
```sh
|
||||
# All benchmarks.
|
||||
cargo bench --package postgres_ffi
|
||||
|
||||
# Specific file.
|
||||
cargo bench --package postgres_ffi --bench waldecoder
|
||||
|
||||
# Specific benchmark.
|
||||
cargo bench --package postgres_ffi --bench waldecoder complete_record/size=1024
|
||||
|
||||
# List available benchmarks.
|
||||
cargo bench --package postgres_ffi --benches -- --list
|
||||
|
||||
# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.
|
||||
# Output in target/criterion/*/profile/flamegraph.svg.
|
||||
cargo bench --package postgres_ffi --bench waldecoder complete_record/size=1024 -- --profile-time 10
|
||||
```
|
||||
|
||||
Additional charts and statistics are available in `target/criterion/report/index.html`.
|
||||
|
||||
Benchmarks are automatically compared against the previous run. To compare against other runs, see
|
||||
`--baseline` and `--save-baseline`.
|
||||
49
libs/postgres_ffi/benches/waldecoder.rs
Normal file
49
libs/postgres_ffi/benches/waldecoder.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use std::ffi::CStr;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Bencher, Criterion};
|
||||
use postgres_ffi::v17::wal_generator::LogicalMessageGenerator;
|
||||
use postgres_ffi::v17::waldecoder_handler::WalStreamDecoderHandler;
|
||||
use postgres_ffi::waldecoder::WalStreamDecoder;
|
||||
use pprof::criterion::{Output, PProfProfiler};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
const KB: usize = 1024;
|
||||
|
||||
// Register benchmarks with Criterion.
|
||||
criterion_group!(
|
||||
name = benches;
|
||||
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
|
||||
targets = bench_complete_record,
|
||||
);
|
||||
criterion_main!(benches);
|
||||
|
||||
/// Benchmarks WalStreamDecoder::complete_record() for a logical message of varying size.
|
||||
fn bench_complete_record(c: &mut Criterion) {
|
||||
let mut g = c.benchmark_group("complete_record");
|
||||
for size in [64, KB, 8 * KB, 128 * KB] {
|
||||
// Kind of weird to change the group throughput per benchmark, but it's the only way
|
||||
// to vary it per benchmark. It works.
|
||||
g.throughput(criterion::Throughput::Bytes(size as u64));
|
||||
g.bench_function(format!("size={size}"), |b| run_bench(b, size).unwrap());
|
||||
}
|
||||
|
||||
fn run_bench(b: &mut Bencher, size: usize) -> anyhow::Result<()> {
|
||||
const PREFIX: &CStr = c"";
|
||||
let value_size = LogicalMessageGenerator::make_value_size(size, PREFIX);
|
||||
let value = vec![1; value_size];
|
||||
|
||||
let mut decoder = WalStreamDecoder::new(Lsn(0), 170000);
|
||||
let msg = LogicalMessageGenerator::new(PREFIX, &value)
|
||||
.next()
|
||||
.unwrap()
|
||||
.encode(Lsn(0));
|
||||
assert_eq!(msg.len(), size);
|
||||
|
||||
b.iter(|| {
|
||||
let msg = msg.clone(); // Bytes::clone() is cheap
|
||||
decoder.complete_record(msg).unwrap();
|
||||
});
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -106,11 +106,11 @@ impl<R: RecordGenerator> WalGenerator<R> {
|
||||
const TIMELINE_ID: u32 = 1;
|
||||
|
||||
/// Creates a new WAL generator with the given record generator.
|
||||
pub fn new(record_generator: R) -> WalGenerator<R> {
|
||||
pub fn new(record_generator: R, start_lsn: Lsn) -> WalGenerator<R> {
|
||||
Self {
|
||||
record_generator,
|
||||
lsn: Lsn(0),
|
||||
prev_lsn: Lsn(0),
|
||||
lsn: start_lsn,
|
||||
prev_lsn: start_lsn,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,6 +231,22 @@ impl LogicalMessageGenerator {
|
||||
};
|
||||
[&header.encode(), prefix, message].concat().into()
|
||||
}
|
||||
|
||||
/// Computes how large a value must be to get a record of the given size. Convenience method to
|
||||
/// construct records of pre-determined size. Panics if the record size is too small.
|
||||
pub fn make_value_size(record_size: usize, prefix: &CStr) -> usize {
|
||||
let xlog_header_size = XLOG_SIZE_OF_XLOG_RECORD;
|
||||
let lm_header_size = size_of::<XlLogicalMessage>();
|
||||
let prefix_size = prefix.to_bytes_with_nul().len();
|
||||
let data_header_size = match record_size - xlog_header_size - 2 {
|
||||
0..=255 => 2,
|
||||
256..=258 => panic!("impossible record_size {record_size}"),
|
||||
259.. => 5,
|
||||
};
|
||||
record_size
|
||||
.checked_sub(xlog_header_size + lm_header_size + prefix_size + data_header_size)
|
||||
.expect("record_size too small")
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for LogicalMessageGenerator {
|
||||
|
||||
@@ -81,7 +81,7 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
|
||||
continue;
|
||||
}
|
||||
let mut f = File::options().write(true).open(file.path()).unwrap();
|
||||
const ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE];
|
||||
static ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE];
|
||||
f.write_all(
|
||||
&ZEROS[0..min(
|
||||
WAL_SEGMENT_SIZE,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "postgres-protocol2"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
license = "MIT/Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -9,8 +9,7 @@
|
||||
//!
|
||||
//! This library assumes that the `client_encoding` backend parameter has been
|
||||
//! set to `UTF8`. It will most likely not behave properly if that is not the case.
|
||||
#![doc(html_root_url = "https://docs.rs/postgres-protocol/0.6")]
|
||||
#![warn(missing_docs, rust_2018_idioms, clippy::all)]
|
||||
#![warn(missing_docs, clippy::all)]
|
||||
|
||||
use byteorder::{BigEndian, ByteOrder};
|
||||
use bytes::{BufMut, BytesMut};
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
use byteorder::{BigEndian, ByteOrder};
|
||||
use bytes::{Buf, BufMut, BytesMut};
|
||||
use std::convert::TryFrom;
|
||||
use std::error::Error;
|
||||
use std::io;
|
||||
use std::marker;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "postgres-types2"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
license = "MIT/Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -2,8 +2,7 @@
|
||||
//!
|
||||
//! This crate is used by the `tokio-postgres` and `postgres` crates. You normally don't need to depend directly on it
|
||||
//! unless you want to define your own `ToSql` or `FromSql` definitions.
|
||||
#![doc(html_root_url = "https://docs.rs/postgres-types/0.2")]
|
||||
#![warn(clippy::all, rust_2018_idioms, missing_docs)]
|
||||
#![warn(clippy::all, missing_docs)]
|
||||
|
||||
use fallible_iterator::FallibleIterator;
|
||||
use postgres_protocol2::types;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "tokio-postgres2"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
license = "MIT/Apache-2.0"
|
||||
|
||||
[dependencies]
|
||||
|
||||
@@ -33,10 +33,14 @@ pub struct Response {
|
||||
#[derive(PartialEq, Debug)]
|
||||
enum State {
|
||||
Active,
|
||||
Terminating,
|
||||
Closing,
|
||||
}
|
||||
|
||||
enum WriteReady {
|
||||
Terminating,
|
||||
WaitingOnRead,
|
||||
}
|
||||
|
||||
/// A connection to a PostgreSQL database.
|
||||
///
|
||||
/// This is one half of what is returned when a new connection is established. It performs the actual IO with the
|
||||
@@ -51,7 +55,6 @@ pub struct Connection<S, T> {
|
||||
/// HACK: we need this in the Neon Proxy to forward params.
|
||||
pub parameters: HashMap<String, String>,
|
||||
receiver: mpsc::UnboundedReceiver<Request>,
|
||||
pending_request: Option<RequestMessages>,
|
||||
pending_responses: VecDeque<BackendMessage>,
|
||||
responses: VecDeque<Response>,
|
||||
state: State,
|
||||
@@ -72,7 +75,6 @@ where
|
||||
stream,
|
||||
parameters,
|
||||
receiver,
|
||||
pending_request: None,
|
||||
pending_responses,
|
||||
responses: VecDeque::new(),
|
||||
state: State::Active,
|
||||
@@ -93,26 +95,23 @@ where
|
||||
.map(|o| o.map(|r| r.map_err(Error::io)))
|
||||
}
|
||||
|
||||
fn poll_read(&mut self, cx: &mut Context<'_>) -> Result<Option<AsyncMessage>, Error> {
|
||||
if self.state != State::Active {
|
||||
trace!("poll_read: done");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
/// Read and process messages from the connection to postgres.
|
||||
/// client <- postgres
|
||||
fn poll_read(&mut self, cx: &mut Context<'_>) -> Poll<Result<AsyncMessage, Error>> {
|
||||
loop {
|
||||
let message = match self.poll_response(cx)? {
|
||||
Poll::Ready(Some(message)) => message,
|
||||
Poll::Ready(None) => return Err(Error::closed()),
|
||||
Poll::Ready(None) => return Poll::Ready(Err(Error::closed())),
|
||||
Poll::Pending => {
|
||||
trace!("poll_read: waiting on response");
|
||||
return Ok(None);
|
||||
return Poll::Pending;
|
||||
}
|
||||
};
|
||||
|
||||
let (mut messages, request_complete) = match message {
|
||||
BackendMessage::Async(Message::NoticeResponse(body)) => {
|
||||
let error = DbError::parse(&mut body.fields()).map_err(Error::parse)?;
|
||||
return Ok(Some(AsyncMessage::Notice(error)));
|
||||
return Poll::Ready(Ok(AsyncMessage::Notice(error)));
|
||||
}
|
||||
BackendMessage::Async(Message::NotificationResponse(body)) => {
|
||||
let notification = Notification {
|
||||
@@ -120,7 +119,7 @@ where
|
||||
channel: body.channel().map_err(Error::parse)?.to_string(),
|
||||
payload: body.message().map_err(Error::parse)?.to_string(),
|
||||
};
|
||||
return Ok(Some(AsyncMessage::Notification(notification)));
|
||||
return Poll::Ready(Ok(AsyncMessage::Notification(notification)));
|
||||
}
|
||||
BackendMessage::Async(Message::ParameterStatus(body)) => {
|
||||
self.parameters.insert(
|
||||
@@ -139,8 +138,10 @@ where
|
||||
let mut response = match self.responses.pop_front() {
|
||||
Some(response) => response,
|
||||
None => match messages.next().map_err(Error::parse)? {
|
||||
Some(Message::ErrorResponse(error)) => return Err(Error::db(error)),
|
||||
_ => return Err(Error::unexpected_message()),
|
||||
Some(Message::ErrorResponse(error)) => {
|
||||
return Poll::Ready(Err(Error::db(error)))
|
||||
}
|
||||
_ => return Poll::Ready(Err(Error::unexpected_message())),
|
||||
},
|
||||
};
|
||||
|
||||
@@ -164,18 +165,14 @@ where
|
||||
request_complete,
|
||||
});
|
||||
trace!("poll_read: waiting on sender");
|
||||
return Ok(None);
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch the next client request and enqueue the response sender.
|
||||
fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll<Option<RequestMessages>> {
|
||||
if let Some(messages) = self.pending_request.take() {
|
||||
trace!("retrying pending request");
|
||||
return Poll::Ready(Some(messages));
|
||||
}
|
||||
|
||||
if self.receiver.is_closed() {
|
||||
return Poll::Ready(None);
|
||||
}
|
||||
@@ -193,74 +190,80 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_write(&mut self, cx: &mut Context<'_>) -> Result<bool, Error> {
|
||||
/// Process client requests and write them to the postgres connection, flushing if necessary.
|
||||
/// client -> postgres
|
||||
fn poll_write(&mut self, cx: &mut Context<'_>) -> Poll<Result<WriteReady, Error>> {
|
||||
loop {
|
||||
if self.state == State::Closing {
|
||||
trace!("poll_write: done");
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
if Pin::new(&mut self.stream)
|
||||
.poll_ready(cx)
|
||||
.map_err(Error::io)?
|
||||
.is_pending()
|
||||
{
|
||||
trace!("poll_write: waiting on socket");
|
||||
return Ok(false);
|
||||
|
||||
// poll_ready is self-flushing.
|
||||
return Poll::Pending;
|
||||
}
|
||||
|
||||
let request = match self.poll_request(cx) {
|
||||
Poll::Ready(Some(request)) => request,
|
||||
Poll::Ready(None) if self.responses.is_empty() && self.state == State::Active => {
|
||||
match self.poll_request(cx) {
|
||||
// send the message to postgres
|
||||
Poll::Ready(Some(RequestMessages::Single(request))) => {
|
||||
Pin::new(&mut self.stream)
|
||||
.start_send(request)
|
||||
.map_err(Error::io)?;
|
||||
}
|
||||
// No more messages from the client, and no more responses to wait for.
|
||||
// Send a terminate message to postgres
|
||||
Poll::Ready(None) if self.responses.is_empty() => {
|
||||
trace!("poll_write: at eof, terminating");
|
||||
self.state = State::Terminating;
|
||||
let mut request = BytesMut::new();
|
||||
frontend::terminate(&mut request);
|
||||
RequestMessages::Single(FrontendMessage::Raw(request.freeze()))
|
||||
let request = FrontendMessage::Raw(request.freeze());
|
||||
|
||||
Pin::new(&mut self.stream)
|
||||
.start_send(request)
|
||||
.map_err(Error::io)?;
|
||||
|
||||
trace!("poll_write: sent eof, closing");
|
||||
trace!("poll_write: done");
|
||||
return Poll::Ready(Ok(WriteReady::Terminating));
|
||||
}
|
||||
// No more messages from the client, but there are still some responses to wait for.
|
||||
Poll::Ready(None) => {
|
||||
trace!(
|
||||
"poll_write: at eof, pending responses {}",
|
||||
self.responses.len()
|
||||
);
|
||||
return Ok(true);
|
||||
ready!(self.poll_flush(cx))?;
|
||||
return Poll::Ready(Ok(WriteReady::WaitingOnRead));
|
||||
}
|
||||
// Still waiting for a message from the client.
|
||||
Poll::Pending => {
|
||||
trace!("poll_write: waiting on request");
|
||||
return Ok(true);
|
||||
}
|
||||
};
|
||||
|
||||
match request {
|
||||
RequestMessages::Single(request) => {
|
||||
Pin::new(&mut self.stream)
|
||||
.start_send(request)
|
||||
.map_err(Error::io)?;
|
||||
if self.state == State::Terminating {
|
||||
trace!("poll_write: sent eof, closing");
|
||||
self.state = State::Closing;
|
||||
}
|
||||
ready!(self.poll_flush(cx))?;
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn poll_flush(&mut self, cx: &mut Context<'_>) -> Result<(), Error> {
|
||||
fn poll_flush(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
|
||||
match Pin::new(&mut self.stream)
|
||||
.poll_flush(cx)
|
||||
.map_err(Error::io)?
|
||||
{
|
||||
Poll::Ready(()) => trace!("poll_flush: flushed"),
|
||||
Poll::Pending => trace!("poll_flush: waiting on socket"),
|
||||
Poll::Ready(()) => {
|
||||
trace!("poll_flush: flushed");
|
||||
Poll::Ready(Ok(()))
|
||||
}
|
||||
Poll::Pending => {
|
||||
trace!("poll_flush: waiting on socket");
|
||||
Poll::Pending
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn poll_shutdown(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
|
||||
if self.state != State::Closing {
|
||||
return Poll::Pending;
|
||||
}
|
||||
|
||||
match Pin::new(&mut self.stream)
|
||||
.poll_close(cx)
|
||||
.map_err(Error::io)?
|
||||
@@ -289,18 +292,30 @@ where
|
||||
&mut self,
|
||||
cx: &mut Context<'_>,
|
||||
) -> Poll<Option<Result<AsyncMessage, Error>>> {
|
||||
let message = self.poll_read(cx)?;
|
||||
let want_flush = self.poll_write(cx)?;
|
||||
if want_flush {
|
||||
self.poll_flush(cx)?;
|
||||
if self.state != State::Closing {
|
||||
// if the state is still active, try read from and write to postgres.
|
||||
let message = self.poll_read(cx)?;
|
||||
let closing = self.poll_write(cx)?;
|
||||
if let Poll::Ready(WriteReady::Terminating) = closing {
|
||||
self.state = State::Closing;
|
||||
}
|
||||
|
||||
if let Poll::Ready(message) = message {
|
||||
return Poll::Ready(Some(Ok(message)));
|
||||
}
|
||||
|
||||
// poll_read returned Pending.
|
||||
// poll_write returned Pending or Ready(WriteReady::WaitingOnRead).
|
||||
// if poll_write returned Ready(WriteReady::WaitingOnRead), then we are waiting to read more data from postgres.
|
||||
if self.state != State::Closing {
|
||||
return Poll::Pending;
|
||||
}
|
||||
}
|
||||
match message {
|
||||
Some(message) => Poll::Ready(Some(Ok(message))),
|
||||
None => match self.poll_shutdown(cx) {
|
||||
Poll::Ready(Ok(())) => Poll::Ready(None),
|
||||
Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))),
|
||||
Poll::Pending => Poll::Pending,
|
||||
},
|
||||
|
||||
match self.poll_shutdown(cx) {
|
||||
Poll::Ready(Ok(())) => Poll::Ready(None),
|
||||
Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))),
|
||||
Poll::Pending => Poll::Pending,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
//! An asynchronous, pipelined, PostgreSQL client.
|
||||
#![warn(rust_2018_idioms, clippy::all)]
|
||||
#![warn(clippy::all)]
|
||||
|
||||
pub use crate::cancel_token::CancelToken;
|
||||
pub use crate::client::{Client, SocketConfig};
|
||||
|
||||
@@ -11,7 +11,7 @@ mod private {
|
||||
Query(&'a str),
|
||||
}
|
||||
|
||||
impl<'a> ToStatementType<'a> {
|
||||
impl ToStatementType<'_> {
|
||||
pub async fn into_statement(self, client: &Client) -> Result<Statement, Error> {
|
||||
match self {
|
||||
ToStatementType::Statement(s) => Ok(s.clone()),
|
||||
|
||||
@@ -18,6 +18,7 @@ camino = { workspace = true, features = ["serde1"] }
|
||||
humantime-serde.workspace = true
|
||||
hyper = { workspace = true, features = ["client"] }
|
||||
futures.workspace = true
|
||||
reqwest.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
tokio = { workspace = true, features = ["sync", "fs", "io-util"] }
|
||||
|
||||
@@ -13,10 +13,12 @@ use std::time::Duration;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
|
||||
use azure_core::HttpClient;
|
||||
use azure_core::TransportOptions;
|
||||
use azure_core::{Continuable, RetryOptions};
|
||||
use azure_identity::DefaultAzureCredential;
|
||||
use azure_storage::StorageCredentials;
|
||||
use azure_storage_blobs::blob::CopyStatus;
|
||||
use azure_storage_blobs::prelude::ClientBuilder;
|
||||
@@ -76,12 +78,18 @@ impl AzureBlobStorage {
|
||||
let credentials = if let Ok(access_key) = env::var("AZURE_STORAGE_ACCESS_KEY") {
|
||||
StorageCredentials::access_key(account.clone(), access_key)
|
||||
} else {
|
||||
let token_credential = DefaultAzureCredential::default();
|
||||
StorageCredentials::token_credential(Arc::new(token_credential))
|
||||
let token_credential = azure_identity::create_default_credential()
|
||||
.context("trying to obtain Azure default credentials")?;
|
||||
StorageCredentials::token_credential(token_credential)
|
||||
};
|
||||
|
||||
// we have an outer retry
|
||||
let builder = ClientBuilder::new(account, credentials).retry(RetryOptions::none());
|
||||
let builder = ClientBuilder::new(account, credentials)
|
||||
// we have an outer retry
|
||||
.retry(RetryOptions::none())
|
||||
// Customize transport to configure conneciton pooling
|
||||
.transport(TransportOptions::new(Self::reqwest_client(
|
||||
azure_config.conn_pool_size,
|
||||
)));
|
||||
|
||||
let client = builder.container_client(azure_config.container_name.to_owned());
|
||||
|
||||
@@ -106,6 +114,14 @@ impl AzureBlobStorage {
|
||||
})
|
||||
}
|
||||
|
||||
fn reqwest_client(conn_pool_size: usize) -> Arc<dyn HttpClient> {
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.pool_max_idle_per_host(conn_pool_size)
|
||||
.build()
|
||||
.expect("failed to build `reqwest` client");
|
||||
Arc::new(client)
|
||||
}
|
||||
|
||||
pub fn relative_path_to_name(&self, path: &RemotePath) -> String {
|
||||
assert_eq!(std::path::MAIN_SEPARATOR, REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
let path_string = path.get_path().as_str();
|
||||
@@ -544,9 +560,9 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
.await
|
||||
}
|
||||
|
||||
async fn delete_objects<'a>(
|
||||
async fn delete_objects(
|
||||
&self,
|
||||
paths: &'a [RemotePath],
|
||||
paths: &[RemotePath],
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let kind = RequestKind::Delete;
|
||||
@@ -624,6 +640,10 @@ impl RemoteStorage for AzureBlobStorage {
|
||||
res
|
||||
}
|
||||
|
||||
fn max_keys_per_delete(&self) -> usize {
|
||||
super::MAX_KEYS_PER_DELETE_AZURE
|
||||
}
|
||||
|
||||
async fn copy(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
|
||||
@@ -114,6 +114,16 @@ fn default_max_keys_per_list_response() -> Option<i32> {
|
||||
DEFAULT_MAX_KEYS_PER_LIST_RESPONSE
|
||||
}
|
||||
|
||||
fn default_azure_conn_pool_size() -> usize {
|
||||
// Conservative default: no connection pooling. At time of writing this is the Azure
|
||||
// SDK's default as well, due to historic reports of hard-to-reproduce issues
|
||||
// (https://github.com/hyperium/hyper/issues/2312)
|
||||
//
|
||||
// However, using connection pooling is important to avoid exhausting client ports when
|
||||
// doing huge numbers of requests (https://github.com/neondatabase/cloud/issues/20971)
|
||||
0
|
||||
}
|
||||
|
||||
impl Debug for S3Config {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("S3Config")
|
||||
@@ -146,6 +156,8 @@ pub struct AzureConfig {
|
||||
pub concurrency_limit: NonZeroUsize,
|
||||
#[serde(default = "default_max_keys_per_list_response")]
|
||||
pub max_keys_per_list_response: Option<i32>,
|
||||
#[serde(default = "default_azure_conn_pool_size")]
|
||||
pub conn_pool_size: usize,
|
||||
}
|
||||
|
||||
fn default_remote_storage_azure_concurrency_limit() -> NonZeroUsize {
|
||||
@@ -302,6 +314,7 @@ timeout = '5s'";
|
||||
container_region = 'westeurope'
|
||||
upload_storage_class = 'INTELLIGENT_TIERING'
|
||||
timeout = '7s'
|
||||
conn_pool_size = 8
|
||||
";
|
||||
|
||||
let config = parse(toml).unwrap();
|
||||
@@ -316,6 +329,7 @@ timeout = '5s'";
|
||||
prefix_in_container: None,
|
||||
concurrency_limit: default_remote_storage_azure_concurrency_limit(),
|
||||
max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
|
||||
conn_pool_size: 8,
|
||||
}),
|
||||
timeout: Duration::from_secs(7),
|
||||
small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT
|
||||
|
||||
@@ -70,7 +70,14 @@ pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 100;
|
||||
pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
|
||||
|
||||
/// As defined in S3 docs
|
||||
pub const MAX_KEYS_PER_DELETE: usize = 1000;
|
||||
///
|
||||
/// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html>
|
||||
pub const MAX_KEYS_PER_DELETE_S3: usize = 1000;
|
||||
|
||||
/// As defined in Azure docs
|
||||
///
|
||||
/// <https://learn.microsoft.com/en-us/rest/api/storageservices/blob-batch>
|
||||
pub const MAX_KEYS_PER_DELETE_AZURE: usize = 256;
|
||||
|
||||
const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';
|
||||
|
||||
@@ -334,12 +341,20 @@ pub trait RemoteStorage: Send + Sync + 'static {
|
||||
/// If the operation fails because of timeout or cancellation, the root cause of the error will be
|
||||
/// set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went
|
||||
/// through.
|
||||
async fn delete_objects<'a>(
|
||||
async fn delete_objects(
|
||||
&self,
|
||||
paths: &'a [RemotePath],
|
||||
paths: &[RemotePath],
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()>;
|
||||
|
||||
/// Returns the maximum number of keys that a call to [`Self::delete_objects`] can delete without chunking
|
||||
///
|
||||
/// The value returned is only an optimization hint, One can pass larger number of objects to
|
||||
/// `delete_objects` as well.
|
||||
///
|
||||
/// The value is guaranteed to be >= 1.
|
||||
fn max_keys_per_delete(&self) -> usize;
|
||||
|
||||
/// Deletes all objects matching the given prefix.
|
||||
///
|
||||
/// NB: this uses NoDelimiter and will match partial prefixes. For example, the prefix /a/b will
|
||||
@@ -533,6 +548,16 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
||||
}
|
||||
}
|
||||
|
||||
/// [`RemoteStorage::max_keys_per_delete`]
|
||||
pub fn max_keys_per_delete(&self) -> usize {
|
||||
match self {
|
||||
Self::LocalFs(s) => s.max_keys_per_delete(),
|
||||
Self::AwsS3(s) => s.max_keys_per_delete(),
|
||||
Self::AzureBlob(s) => s.max_keys_per_delete(),
|
||||
Self::Unreliable(s) => s.max_keys_per_delete(),
|
||||
}
|
||||
}
|
||||
|
||||
/// See [`RemoteStorage::delete_prefix`]
|
||||
pub async fn delete_prefix(
|
||||
&self,
|
||||
|
||||
@@ -562,9 +562,9 @@ impl RemoteStorage for LocalFs {
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete_objects<'a>(
|
||||
async fn delete_objects(
|
||||
&self,
|
||||
paths: &'a [RemotePath],
|
||||
paths: &[RemotePath],
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
for path in paths {
|
||||
@@ -573,6 +573,10 @@ impl RemoteStorage for LocalFs {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn max_keys_per_delete(&self) -> usize {
|
||||
super::MAX_KEYS_PER_DELETE_S3
|
||||
}
|
||||
|
||||
async fn copy(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
|
||||
@@ -48,7 +48,7 @@ use crate::{
|
||||
metrics::{start_counting_cancelled_wait, start_measuring_requests},
|
||||
support::PermitCarrying,
|
||||
ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
|
||||
RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
|
||||
RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE_S3,
|
||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
};
|
||||
|
||||
@@ -355,7 +355,7 @@ impl S3Bucket {
|
||||
let kind = RequestKind::Delete;
|
||||
let mut cancel = std::pin::pin!(cancel.cancelled());
|
||||
|
||||
for chunk in delete_objects.chunks(MAX_KEYS_PER_DELETE) {
|
||||
for chunk in delete_objects.chunks(MAX_KEYS_PER_DELETE_S3) {
|
||||
let started_at = start_measuring_requests(kind);
|
||||
|
||||
let req = self
|
||||
@@ -813,9 +813,9 @@ impl RemoteStorage for S3Bucket {
|
||||
.await
|
||||
}
|
||||
|
||||
async fn delete_objects<'a>(
|
||||
async fn delete_objects(
|
||||
&self,
|
||||
paths: &'a [RemotePath],
|
||||
paths: &[RemotePath],
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
let kind = RequestKind::Delete;
|
||||
@@ -832,6 +832,10 @@ impl RemoteStorage for S3Bucket {
|
||||
self.delete_oids(&permit, &delete_objects, cancel).await
|
||||
}
|
||||
|
||||
fn max_keys_per_delete(&self) -> usize {
|
||||
MAX_KEYS_PER_DELETE_S3
|
||||
}
|
||||
|
||||
async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {
|
||||
let paths = std::array::from_ref(path);
|
||||
self.delete_objects(paths, cancel).await
|
||||
|
||||
@@ -181,9 +181,9 @@ impl RemoteStorage for UnreliableWrapper {
|
||||
self.delete_inner(path, true, cancel).await
|
||||
}
|
||||
|
||||
async fn delete_objects<'a>(
|
||||
async fn delete_objects(
|
||||
&self,
|
||||
paths: &'a [RemotePath],
|
||||
paths: &[RemotePath],
|
||||
cancel: &CancellationToken,
|
||||
) -> anyhow::Result<()> {
|
||||
self.attempt(RemoteOp::DeleteObjects(paths.to_vec()))?;
|
||||
@@ -203,6 +203,10 @@ impl RemoteStorage for UnreliableWrapper {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn max_keys_per_delete(&self) -> usize {
|
||||
self.inner.max_keys_per_delete()
|
||||
}
|
||||
|
||||
async fn copy(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
|
||||
@@ -218,6 +218,7 @@ async fn create_azure_client(
|
||||
prefix_in_container: Some(format!("test_{millis}_{random:08x}/")),
|
||||
concurrency_limit: NonZeroUsize::new(100).unwrap(),
|
||||
max_keys_per_list_response,
|
||||
conn_pool_size: 8,
|
||||
}),
|
||||
timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
|
||||
small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,
|
||||
|
||||
@@ -5,6 +5,9 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
serde.workspace = true
|
||||
const_format.workspace = true
|
||||
serde.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
pq_proto.workspace = true
|
||||
tokio.workspace = true
|
||||
utils.workspace = true
|
||||
|
||||
@@ -1,10 +1,27 @@
|
||||
#![deny(unsafe_code)]
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
use const_format::formatcp;
|
||||
use pq_proto::SystemId;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Public API types
|
||||
pub mod models;
|
||||
|
||||
/// Consensus logical timestamp. Note: it is a part of sk control file.
|
||||
pub type Term = u64;
|
||||
pub const INVALID_TERM: Term = 0;
|
||||
|
||||
/// Information about Postgres. Safekeeper gets it once and then verifies all
|
||||
/// further connections from computes match. Note: it is a part of sk control
|
||||
/// file.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ServerInfo {
|
||||
/// Postgres server version
|
||||
pub pg_version: u32,
|
||||
pub system_id: SystemId,
|
||||
pub wal_seg_size: u32,
|
||||
}
|
||||
|
||||
pub const DEFAULT_PG_LISTEN_PORT: u16 = 5454;
|
||||
pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
|
||||
|
||||
|
||||
@@ -1,10 +1,23 @@
|
||||
//! Types used in safekeeper http API. Many of them are also reused internally.
|
||||
|
||||
use postgres_ffi::TimestampTz;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::net::SocketAddr;
|
||||
use tokio::time::Instant;
|
||||
|
||||
use utils::{
|
||||
id::{NodeId, TenantId, TimelineId},
|
||||
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
|
||||
lsn::Lsn,
|
||||
pageserver_feedback::PageserverFeedback,
|
||||
};
|
||||
|
||||
use crate::{ServerInfo, Term};
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct SafekeeperStatus {
|
||||
pub id: NodeId,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct TimelineCreateRequest {
|
||||
pub tenant_id: TenantId,
|
||||
@@ -18,6 +31,161 @@ pub struct TimelineCreateRequest {
|
||||
pub local_start_lsn: Option<Lsn>,
|
||||
}
|
||||
|
||||
/// Same as TermLsn, but serializes LSN using display serializer
|
||||
/// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response.
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct TermSwitchApiEntry {
|
||||
pub term: Term,
|
||||
pub lsn: Lsn,
|
||||
}
|
||||
|
||||
/// Augment AcceptorState with last_log_term for convenience
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct AcceptorStateStatus {
|
||||
pub term: Term,
|
||||
pub epoch: Term, // aka last_log_term, old `epoch` name is left for compatibility
|
||||
pub term_history: Vec<TermSwitchApiEntry>,
|
||||
}
|
||||
|
||||
/// Things safekeeper should know about timeline state on peers.
|
||||
/// Used as both model and internally.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PeerInfo {
|
||||
pub sk_id: NodeId,
|
||||
pub term: Term,
|
||||
/// Term of the last entry.
|
||||
pub last_log_term: Term,
|
||||
/// LSN of the last record.
|
||||
pub flush_lsn: Lsn,
|
||||
pub commit_lsn: Lsn,
|
||||
/// Since which LSN safekeeper has WAL.
|
||||
pub local_start_lsn: Lsn,
|
||||
/// When info was received. Serde annotations are not very useful but make
|
||||
/// the code compile -- we don't rely on this field externally.
|
||||
#[serde(skip)]
|
||||
#[serde(default = "Instant::now")]
|
||||
pub ts: Instant,
|
||||
pub pg_connstr: String,
|
||||
pub http_connstr: String,
|
||||
}
|
||||
|
||||
pub type FullTransactionId = u64;
|
||||
|
||||
/// Hot standby feedback received from replica
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct HotStandbyFeedback {
|
||||
pub ts: TimestampTz,
|
||||
pub xmin: FullTransactionId,
|
||||
pub catalog_xmin: FullTransactionId,
|
||||
}
|
||||
|
||||
pub const INVALID_FULL_TRANSACTION_ID: FullTransactionId = 0;
|
||||
|
||||
impl HotStandbyFeedback {
|
||||
pub fn empty() -> HotStandbyFeedback {
|
||||
HotStandbyFeedback {
|
||||
ts: 0,
|
||||
xmin: 0,
|
||||
catalog_xmin: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Standby status update
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct StandbyReply {
|
||||
pub write_lsn: Lsn, // The location of the last WAL byte + 1 received and written to disk in the standby.
|
||||
pub flush_lsn: Lsn, // The location of the last WAL byte + 1 flushed to disk in the standby.
|
||||
pub apply_lsn: Lsn, // The location of the last WAL byte + 1 applied in the standby.
|
||||
pub reply_ts: TimestampTz, // The client's system clock at the time of transmission, as microseconds since midnight on 2000-01-01.
|
||||
pub reply_requested: bool,
|
||||
}
|
||||
|
||||
impl StandbyReply {
|
||||
pub fn empty() -> Self {
|
||||
StandbyReply {
|
||||
write_lsn: Lsn::INVALID,
|
||||
flush_lsn: Lsn::INVALID,
|
||||
apply_lsn: Lsn::INVALID,
|
||||
reply_ts: 0,
|
||||
reply_requested: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct StandbyFeedback {
|
||||
pub reply: StandbyReply,
|
||||
pub hs_feedback: HotStandbyFeedback,
|
||||
}
|
||||
|
||||
impl StandbyFeedback {
|
||||
pub fn empty() -> Self {
|
||||
StandbyFeedback {
|
||||
reply: StandbyReply::empty(),
|
||||
hs_feedback: HotStandbyFeedback::empty(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Receiver is either pageserver or regular standby, which have different
|
||||
/// feedbacks.
|
||||
/// Used as both model and internally.
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
|
||||
pub enum ReplicationFeedback {
|
||||
Pageserver(PageserverFeedback),
|
||||
Standby(StandbyFeedback),
|
||||
}
|
||||
|
||||
/// Uniquely identifies a WAL service connection. Logged in spans for
|
||||
/// observability.
|
||||
pub type ConnectionId = u32;
|
||||
|
||||
/// Serialize is used only for json'ing in API response. Also used internally.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WalSenderState {
|
||||
pub ttid: TenantTimelineId,
|
||||
pub addr: SocketAddr,
|
||||
pub conn_id: ConnectionId,
|
||||
// postgres application_name
|
||||
pub appname: Option<String>,
|
||||
pub feedback: ReplicationFeedback,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WalReceiverState {
|
||||
/// None means it is recovery initiated by us (this safekeeper).
|
||||
pub conn_id: Option<ConnectionId>,
|
||||
pub status: WalReceiverStatus,
|
||||
}
|
||||
|
||||
/// Walreceiver status. Currently only whether it passed voting stage and
|
||||
/// started receiving the stream, but it is easy to add more if needed.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum WalReceiverStatus {
|
||||
Voting,
|
||||
Streaming,
|
||||
}
|
||||
|
||||
/// Info about timeline on safekeeper ready for reporting.
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct TimelineStatus {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub acceptor_state: AcceptorStateStatus,
|
||||
pub pg_info: ServerInfo,
|
||||
pub flush_lsn: Lsn,
|
||||
pub timeline_start_lsn: Lsn,
|
||||
pub local_start_lsn: Lsn,
|
||||
pub commit_lsn: Lsn,
|
||||
pub backup_lsn: Lsn,
|
||||
pub peer_horizon_lsn: Lsn,
|
||||
pub remote_consistent_lsn: Lsn,
|
||||
pub peers: Vec<PeerInfo>,
|
||||
pub walsenders: Vec<WalSenderState>,
|
||||
pub walreceivers: Vec<WalReceiverState>,
|
||||
}
|
||||
|
||||
fn lsn_invalid() -> Lsn {
|
||||
Lsn::INVALID
|
||||
}
|
||||
|
||||
@@ -15,17 +15,20 @@ arc-swap.workspace = true
|
||||
sentry.workspace = true
|
||||
async-compression.workspace = true
|
||||
anyhow.workspace = true
|
||||
backtrace.workspace = true
|
||||
bincode.workspace = true
|
||||
bytes.workspace = true
|
||||
camino.workspace = true
|
||||
chrono.workspace = true
|
||||
diatomic-waker.workspace = true
|
||||
flate2.workspace = true
|
||||
git-version.workspace = true
|
||||
hex = { workspace = true, features = ["serde"] }
|
||||
humantime.workspace = true
|
||||
hyper0 = { workspace = true, features = ["full"] }
|
||||
itertools.workspace = true
|
||||
fail.workspace = true
|
||||
futures = { workspace = true}
|
||||
futures = { workspace = true }
|
||||
jemalloc_pprof.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
nix.workspace = true
|
||||
|
||||
@@ -1,15 +1,22 @@
|
||||
use crate::auth::{AuthError, Claims, SwappableJwtAuth};
|
||||
use crate::http::error::{api_error_handler, route_error_handler, ApiError};
|
||||
use crate::http::request::{get_query_param, parse_query_param};
|
||||
use crate::pprof;
|
||||
use ::pprof::protos::Message as _;
|
||||
use ::pprof::ProfilerGuardBuilder;
|
||||
use anyhow::{anyhow, Context};
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use hyper::header::{HeaderName, AUTHORIZATION, CONTENT_DISPOSITION};
|
||||
use hyper::http::HeaderValue;
|
||||
use hyper::Method;
|
||||
use hyper::{header::CONTENT_TYPE, Body, Request, Response};
|
||||
use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use routerify::ext::RequestExt;
|
||||
use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_util::io::ReaderStream;
|
||||
use tracing::{debug, info, info_span, warn, Instrument};
|
||||
|
||||
@@ -18,11 +25,6 @@ use std::io::Write as _;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use pprof::protos::Message as _;
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
|
||||
static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"libmetrics_metric_handler_requests_total",
|
||||
@@ -365,7 +367,7 @@ pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, A
|
||||
|
||||
// Take the profile.
|
||||
let report = tokio::task::spawn_blocking(move || {
|
||||
let guard = pprof::ProfilerGuardBuilder::default()
|
||||
let guard = ProfilerGuardBuilder::default()
|
||||
.frequency(frequency_hz)
|
||||
.blocklist(&["libc", "libgcc", "pthread", "vdso"])
|
||||
.build()?;
|
||||
@@ -457,10 +459,34 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
|
||||
}
|
||||
|
||||
Format::Pprof => {
|
||||
let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof())
|
||||
.await
|
||||
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
let data = tokio::task::spawn_blocking(move || {
|
||||
let bytes = prof_ctl.dump_pprof()?;
|
||||
// Symbolize the profile.
|
||||
// TODO: consider moving this upstream to jemalloc_pprof and avoiding the
|
||||
// serialization roundtrip.
|
||||
static STRIP_FUNCTIONS: Lazy<Vec<(Regex, bool)>> = Lazy::new(|| {
|
||||
// Functions to strip from profiles. If true, also remove child frames.
|
||||
vec![
|
||||
(Regex::new("^__rust").unwrap(), false),
|
||||
(Regex::new("^_start$").unwrap(), false),
|
||||
(Regex::new("^irallocx_prof").unwrap(), true),
|
||||
(Regex::new("^prof_alloc_prep").unwrap(), true),
|
||||
(Regex::new("^std::rt::lang_start").unwrap(), false),
|
||||
(Regex::new("^std::sys::backtrace::__rust").unwrap(), false),
|
||||
]
|
||||
});
|
||||
let profile = pprof::decode(&bytes)?;
|
||||
let profile = pprof::symbolize(profile)?;
|
||||
let profile = pprof::strip_locations(
|
||||
profile,
|
||||
&["libc", "libgcc", "pthread", "vdso"],
|
||||
&STRIP_FUNCTIONS,
|
||||
);
|
||||
pprof::encode(&profile)
|
||||
})
|
||||
.await
|
||||
.map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, "application/octet-stream")
|
||||
|
||||
@@ -94,6 +94,10 @@ pub mod toml_edit_ext;
|
||||
|
||||
pub mod circuit_breaker;
|
||||
|
||||
pub mod try_rcu;
|
||||
|
||||
pub mod pprof;
|
||||
|
||||
// Re-export used in macro. Avoids adding git-version as dep in target crates.
|
||||
#[doc(hidden)]
|
||||
pub use git_version;
|
||||
|
||||
190
libs/utils/src/pprof.rs
Normal file
190
libs/utils/src/pprof.rs
Normal file
@@ -0,0 +1,190 @@
|
||||
use flate2::write::{GzDecoder, GzEncoder};
|
||||
use flate2::Compression;
|
||||
use itertools::Itertools as _;
|
||||
use once_cell::sync::Lazy;
|
||||
use pprof::protos::{Function, Line, Message as _, Profile};
|
||||
use regex::Regex;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::ffi::c_void;
|
||||
use std::io::Write as _;
|
||||
|
||||
/// Decodes a gzip-compressed Protobuf-encoded pprof profile.
|
||||
pub fn decode(bytes: &[u8]) -> anyhow::Result<Profile> {
|
||||
let mut gz = GzDecoder::new(Vec::new());
|
||||
gz.write_all(bytes)?;
|
||||
Ok(Profile::parse_from_bytes(&gz.finish()?)?)
|
||||
}
|
||||
|
||||
/// Encodes a pprof profile as gzip-compressed Protobuf.
|
||||
pub fn encode(profile: &Profile) -> anyhow::Result<Vec<u8>> {
|
||||
let mut gz = GzEncoder::new(Vec::new(), Compression::default());
|
||||
profile.write_to_writer(&mut gz)?;
|
||||
Ok(gz.finish()?)
|
||||
}
|
||||
|
||||
/// Symbolizes a pprof profile using the current binary.
|
||||
pub fn symbolize(mut profile: Profile) -> anyhow::Result<Profile> {
|
||||
if !profile.function.is_empty() {
|
||||
return Ok(profile); // already symbolized
|
||||
}
|
||||
|
||||
// Collect function names.
|
||||
let mut functions: HashMap<String, Function> = HashMap::new();
|
||||
let mut strings: HashMap<String, i64> = profile
|
||||
.string_table
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(i, s)| (s, i as i64))
|
||||
.collect();
|
||||
|
||||
// Helper to look up or register a string.
|
||||
let mut string_id = |s: &str| -> i64 {
|
||||
// Don't use .entry() to avoid unnecessary allocations.
|
||||
if let Some(id) = strings.get(s) {
|
||||
return *id;
|
||||
}
|
||||
let id = strings.len() as i64;
|
||||
strings.insert(s.to_string(), id);
|
||||
id
|
||||
};
|
||||
|
||||
for loc in &mut profile.location {
|
||||
if !loc.line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Resolve the line and function for each location.
|
||||
backtrace::resolve(loc.address as *mut c_void, |symbol| {
|
||||
let Some(symname) = symbol.name() else {
|
||||
return;
|
||||
};
|
||||
let mut name = symname.to_string();
|
||||
|
||||
// Strip the Rust monomorphization suffix from the symbol name.
|
||||
static SUFFIX_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new("::h[0-9a-f]{16}$").expect("invalid regex"));
|
||||
if let Some(m) = SUFFIX_REGEX.find(&name) {
|
||||
name.truncate(m.start());
|
||||
}
|
||||
|
||||
let function_id = match functions.get(&name) {
|
||||
Some(function) => function.id,
|
||||
None => {
|
||||
let id = functions.len() as u64 + 1;
|
||||
let system_name = String::from_utf8_lossy(symname.as_bytes());
|
||||
let filename = symbol
|
||||
.filename()
|
||||
.map(|path| path.to_string_lossy())
|
||||
.unwrap_or(Cow::Borrowed(""));
|
||||
let function = Function {
|
||||
id,
|
||||
name: string_id(&name),
|
||||
system_name: string_id(&system_name),
|
||||
filename: string_id(&filename),
|
||||
..Default::default()
|
||||
};
|
||||
functions.insert(name, function);
|
||||
id
|
||||
}
|
||||
};
|
||||
loc.line.push(Line {
|
||||
function_id,
|
||||
line: symbol.lineno().unwrap_or(0) as i64,
|
||||
..Default::default()
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Store the resolved functions, and mark the mapping as resolved.
|
||||
profile.function = functions.into_values().sorted_by_key(|f| f.id).collect();
|
||||
profile.string_table = strings
|
||||
.into_iter()
|
||||
.sorted_by_key(|(_, i)| *i)
|
||||
.map(|(s, _)| s)
|
||||
.collect();
|
||||
|
||||
for mapping in &mut profile.mapping {
|
||||
mapping.has_functions = true;
|
||||
mapping.has_filenames = true;
|
||||
}
|
||||
|
||||
Ok(profile)
|
||||
}
|
||||
|
||||
/// Strips locations (stack frames) matching the given mappings (substring) or function names
|
||||
/// (regex). The function bool specifies whether child frames should be stripped as well.
|
||||
///
|
||||
/// The string definitions are left behind in the profile for simplicity, to avoid rewriting all
|
||||
/// string references.
|
||||
pub fn strip_locations(
|
||||
mut profile: Profile,
|
||||
mappings: &[&str],
|
||||
functions: &[(Regex, bool)],
|
||||
) -> Profile {
|
||||
// Strip mappings.
|
||||
let mut strip_mappings: HashSet<u64> = HashSet::new();
|
||||
|
||||
profile.mapping.retain(|mapping| {
|
||||
let Some(name) = profile.string_table.get(mapping.filename as usize) else {
|
||||
return true;
|
||||
};
|
||||
if mappings.iter().any(|substr| name.contains(substr)) {
|
||||
strip_mappings.insert(mapping.id);
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
|
||||
// Strip functions.
|
||||
let mut strip_functions: HashMap<u64, bool> = HashMap::new();
|
||||
|
||||
profile.function.retain(|function| {
|
||||
let Some(name) = profile.string_table.get(function.name as usize) else {
|
||||
return true;
|
||||
};
|
||||
for (regex, strip_children) in functions {
|
||||
if regex.is_match(name) {
|
||||
strip_functions.insert(function.id, *strip_children);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
});
|
||||
|
||||
// Strip locations. The bool specifies whether child frames should be stripped too.
|
||||
let mut strip_locations: HashMap<u64, bool> = HashMap::new();
|
||||
|
||||
profile.location.retain(|location| {
|
||||
for line in &location.line {
|
||||
if let Some(strip_children) = strip_functions.get(&line.function_id) {
|
||||
strip_locations.insert(location.id, *strip_children);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if strip_mappings.contains(&location.mapping_id) {
|
||||
strip_locations.insert(location.id, false);
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
|
||||
// Strip sample locations.
|
||||
for sample in &mut profile.sample {
|
||||
// First, find the uppermost function with child removal and truncate the stack.
|
||||
if let Some(truncate) = sample
|
||||
.location_id
|
||||
.iter()
|
||||
.rposition(|id| strip_locations.get(id) == Some(&true))
|
||||
{
|
||||
sample.location_id.drain(..=truncate);
|
||||
}
|
||||
// Next, strip any individual frames without child removal.
|
||||
sample
|
||||
.location_id
|
||||
.retain(|id| !strip_locations.contains_key(id));
|
||||
}
|
||||
|
||||
profile
|
||||
}
|
||||
77
libs/utils/src/try_rcu.rs
Normal file
77
libs/utils/src/try_rcu.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
//! Try RCU extension lifted from <https://github.com/vorner/arc-swap/issues/94#issuecomment-1987154023>
|
||||
|
||||
pub trait ArcSwapExt<T> {
|
||||
/// [`ArcSwap::rcu`](arc_swap::ArcSwap::rcu), but with Result that short-circuits on error.
|
||||
fn try_rcu<R, F, E>(&self, f: F) -> Result<T, E>
|
||||
where
|
||||
F: FnMut(&T) -> Result<R, E>,
|
||||
R: Into<T>;
|
||||
}
|
||||
|
||||
impl<T, S> ArcSwapExt<T> for arc_swap::ArcSwapAny<T, S>
|
||||
where
|
||||
T: arc_swap::RefCnt,
|
||||
S: arc_swap::strategy::CaS<T>,
|
||||
{
|
||||
fn try_rcu<R, F, E>(&self, mut f: F) -> Result<T, E>
|
||||
where
|
||||
F: FnMut(&T) -> Result<R, E>,
|
||||
R: Into<T>,
|
||||
{
|
||||
fn ptr_eq<Base, A, B>(a: A, b: B) -> bool
|
||||
where
|
||||
A: arc_swap::AsRaw<Base>,
|
||||
B: arc_swap::AsRaw<Base>,
|
||||
{
|
||||
let a = a.as_raw();
|
||||
let b = b.as_raw();
|
||||
std::ptr::eq(a, b)
|
||||
}
|
||||
|
||||
let mut cur = self.load();
|
||||
loop {
|
||||
let new = f(&cur)?.into();
|
||||
let prev = self.compare_and_swap(&*cur, new);
|
||||
let swapped = ptr_eq(&*cur, &*prev);
|
||||
if swapped {
|
||||
return Ok(arc_swap::Guard::into_inner(prev));
|
||||
} else {
|
||||
cur = prev;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use arc_swap::ArcSwap;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[test]
|
||||
fn test_try_rcu_success() {
|
||||
let swap = ArcSwap::from(Arc::new(42));
|
||||
|
||||
let result = swap.try_rcu(|value| -> Result<_, String> { Ok(**value + 1) });
|
||||
|
||||
assert!(result.is_ok());
|
||||
assert_eq!(**swap.load(), 43);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_try_rcu_error() {
|
||||
let swap = ArcSwap::from(Arc::new(42));
|
||||
|
||||
let result = swap.try_rcu(|value| -> Result<i32, _> {
|
||||
if **value == 42 {
|
||||
Err("err")
|
||||
} else {
|
||||
Ok(**value + 1)
|
||||
}
|
||||
});
|
||||
|
||||
assert!(result.is_err());
|
||||
assert_eq!(result.unwrap_err(), "err");
|
||||
assert_eq!(**swap.load(), 42);
|
||||
}
|
||||
}
|
||||
@@ -37,7 +37,7 @@ message ValueMeta {
|
||||
}
|
||||
|
||||
message CompactKey {
|
||||
int64 high = 1;
|
||||
int64 low = 2;
|
||||
uint64 high = 1;
|
||||
uint64 low = 2;
|
||||
}
|
||||
|
||||
|
||||
@@ -236,8 +236,8 @@ impl From<ValueMeta> for proto::ValueMeta {
|
||||
impl From<CompactKey> for proto::CompactKey {
|
||||
fn from(value: CompactKey) -> Self {
|
||||
proto::CompactKey {
|
||||
high: (value.raw() >> 64) as i64,
|
||||
low: value.raw() as i64,
|
||||
high: (value.raw() >> 64) as u64,
|
||||
low: value.raw() as u64,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -354,3 +354,64 @@ impl From<proto::CompactKey> for CompactKey {
|
||||
(((value.high as i128) << 64) | (value.low as i128)).into()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compact_key_with_large_relnode() {
|
||||
use pageserver_api::key::Key;
|
||||
|
||||
let inputs = vec![
|
||||
Key {
|
||||
field1: 0,
|
||||
field2: 0x100,
|
||||
field3: 0x200,
|
||||
field4: 0,
|
||||
field5: 0x10,
|
||||
field6: 0x5,
|
||||
},
|
||||
Key {
|
||||
field1: 0,
|
||||
field2: 0x100,
|
||||
field3: 0x200,
|
||||
field4: 0x007FFFFF,
|
||||
field5: 0x10,
|
||||
field6: 0x5,
|
||||
},
|
||||
Key {
|
||||
field1: 0,
|
||||
field2: 0x100,
|
||||
field3: 0x200,
|
||||
field4: 0x00800000,
|
||||
field5: 0x10,
|
||||
field6: 0x5,
|
||||
},
|
||||
Key {
|
||||
field1: 0,
|
||||
field2: 0x100,
|
||||
field3: 0x200,
|
||||
field4: 0x00800001,
|
||||
field5: 0x10,
|
||||
field6: 0x5,
|
||||
},
|
||||
Key {
|
||||
field1: 0,
|
||||
field2: 0xFFFFFFFF,
|
||||
field3: 0xFFFFFFFF,
|
||||
field4: 0xFFFFFFFF,
|
||||
field5: 0x0,
|
||||
field6: 0x0,
|
||||
},
|
||||
];
|
||||
|
||||
for input in inputs {
|
||||
assert!(input.is_valid_key_on_write_path());
|
||||
let compact = input.to_compact();
|
||||
let proto: proto::CompactKey = compact.into();
|
||||
let from_proto: CompactKey = proto.into();
|
||||
|
||||
assert_eq!(
|
||||
compact, from_proto,
|
||||
"Round trip failed for key with relnode={:#x}",
|
||||
input.field4
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,9 +30,9 @@ fn main() -> anyhow::Result<()> {
|
||||
let pgxn_neon = std::fs::canonicalize(pgxn_neon)?;
|
||||
let pgxn_neon = pgxn_neon.to_str().ok_or(anyhow!("Bad non-UTF path"))?;
|
||||
|
||||
println!("cargo:rustc-link-lib=static=walproposer");
|
||||
println!("cargo:rustc-link-lib=static=pgport");
|
||||
println!("cargo:rustc-link-lib=static=pgcommon");
|
||||
println!("cargo:rustc-link-lib=static=walproposer");
|
||||
println!("cargo:rustc-link-search={walproposer_lib_search_str}");
|
||||
|
||||
// Rebuild crate when libwalproposer.a changes
|
||||
|
||||
@@ -270,12 +270,18 @@ impl Client {
|
||||
Ok(body)
|
||||
}
|
||||
|
||||
pub async fn tenant_config(&self, req: &TenantConfigRequest) -> Result<()> {
|
||||
pub async fn set_tenant_config(&self, req: &TenantConfigRequest) -> Result<()> {
|
||||
let uri = format!("{}/v1/tenant/config", self.mgmt_api_endpoint);
|
||||
self.request(Method::PUT, &uri, req).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn patch_tenant_config(&self, req: &TenantConfigPatchRequest) -> Result<()> {
|
||||
let uri = format!("{}/v1/tenant/config", self.mgmt_api_endpoint);
|
||||
self.request(Method::PATCH, &uri, req).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn tenant_secondary_download(
|
||||
&self,
|
||||
tenant_id: TenantShardId,
|
||||
|
||||
@@ -272,7 +272,7 @@ struct CompactionJob<E: CompactionJobExecutor> {
|
||||
completed: bool,
|
||||
}
|
||||
|
||||
impl<'a, E> LevelCompactionState<'a, E>
|
||||
impl<E> LevelCompactionState<'_, E>
|
||||
where
|
||||
E: CompactionJobExecutor,
|
||||
{
|
||||
|
||||
@@ -224,9 +224,8 @@ impl<L> Level<L> {
|
||||
}
|
||||
|
||||
// recalculate depth if this was the last event at this point
|
||||
let more_events_at_this_key = events_iter
|
||||
.peek()
|
||||
.map_or(false, |next_e| next_e.key == e.key);
|
||||
let more_events_at_this_key =
|
||||
events_iter.peek().is_some_and(|next_e| next_e.key == e.key);
|
||||
if !more_events_at_this_key {
|
||||
let mut active_depth = 0;
|
||||
for (_end_lsn, is_image, _idx) in active_set.iter().rev() {
|
||||
|
||||
@@ -148,7 +148,7 @@ pub trait CompactionDeltaLayer<E: CompactionJobExecutor + ?Sized>: CompactionLay
|
||||
Self: 'a;
|
||||
|
||||
/// Return all keys in this delta layer.
|
||||
fn load_keys<'a>(
|
||||
fn load_keys(
|
||||
&self,
|
||||
ctx: &E::RequestContext,
|
||||
) -> impl Future<Output = anyhow::Result<Vec<Self::DeltaEntry<'_>>>> + Send;
|
||||
|
||||
@@ -143,7 +143,7 @@ impl interface::CompactionLayer<Key> for Arc<MockDeltaLayer> {
|
||||
impl interface::CompactionDeltaLayer<MockTimeline> for Arc<MockDeltaLayer> {
|
||||
type DeltaEntry<'a> = MockRecord;
|
||||
|
||||
async fn load_keys<'a>(&self, _ctx: &MockRequestContext) -> anyhow::Result<Vec<MockRecord>> {
|
||||
async fn load_keys(&self, _ctx: &MockRequestContext) -> anyhow::Result<Vec<MockRecord>> {
|
||||
Ok(self.records.clone())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
|
||||
println!("operating on timeline {}", timeline);
|
||||
|
||||
mgmt_api_client
|
||||
.tenant_config(&TenantConfigRequest {
|
||||
.set_tenant_config(&TenantConfigRequest {
|
||||
tenant_id: timeline.tenant_id,
|
||||
config: TenantConfig::default(),
|
||||
})
|
||||
|
||||
@@ -248,7 +248,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, W> Basebackup<'a, W>
|
||||
impl<W> Basebackup<'_, W>
|
||||
where
|
||||
W: AsyncWrite + Send + Sync + Unpin,
|
||||
{
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use remote_storage::RemotePath;
|
||||
use remote_storage::TimeoutOrCancel;
|
||||
use remote_storage::MAX_KEYS_PER_DELETE;
|
||||
use std::time::Duration;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::info;
|
||||
@@ -131,7 +130,8 @@ impl Deleter {
|
||||
}
|
||||
|
||||
pub(super) async fn background(&mut self) -> Result<(), DeletionQueueError> {
|
||||
self.accumulator.reserve(MAX_KEYS_PER_DELETE);
|
||||
let max_keys_per_delete = self.remote_storage.max_keys_per_delete();
|
||||
self.accumulator.reserve(max_keys_per_delete);
|
||||
|
||||
loop {
|
||||
if self.cancel.is_cancelled() {
|
||||
@@ -156,14 +156,14 @@ impl Deleter {
|
||||
|
||||
match msg {
|
||||
DeleterMessage::Delete(mut list) => {
|
||||
while !list.is_empty() || self.accumulator.len() == MAX_KEYS_PER_DELETE {
|
||||
if self.accumulator.len() == MAX_KEYS_PER_DELETE {
|
||||
while !list.is_empty() || self.accumulator.len() == max_keys_per_delete {
|
||||
if self.accumulator.len() == max_keys_per_delete {
|
||||
self.flush().await?;
|
||||
// If we have received this number of keys, proceed with attempting to execute
|
||||
assert_eq!(self.accumulator.len(), 0);
|
||||
}
|
||||
|
||||
let available_slots = MAX_KEYS_PER_DELETE - self.accumulator.len();
|
||||
let available_slots = max_keys_per_delete - self.accumulator.len();
|
||||
let take_count = std::cmp::min(available_slots, list.len());
|
||||
for path in list.drain(list.len() - take_count..) {
|
||||
self.accumulator.push(path);
|
||||
|
||||
@@ -767,7 +767,27 @@ paths:
|
||||
/v1/tenant/config:
|
||||
put:
|
||||
description: |
|
||||
Update tenant's config.
|
||||
Update tenant's config by setting it to the provided value
|
||||
|
||||
Invalid fields in the tenant config will cause the request to be rejected with status 400.
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/TenantConfigRequest"
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/TenantInfo"
|
||||
patch:
|
||||
description: |
|
||||
Update tenant's config additively by patching the updated fields provided.
|
||||
Null values unset the field and non-null values upsert it.
|
||||
|
||||
Invalid fields in the tenant config will cause the request to be rejected with status 400.
|
||||
requestBody:
|
||||
|
||||
@@ -28,6 +28,7 @@ use pageserver_api::models::LsnLease;
|
||||
use pageserver_api::models::LsnLeaseRequest;
|
||||
use pageserver_api::models::OffloadedTimelineInfo;
|
||||
use pageserver_api::models::ShardParameters;
|
||||
use pageserver_api::models::TenantConfigPatchRequest;
|
||||
use pageserver_api::models::TenantDetails;
|
||||
use pageserver_api::models::TenantLocationConfigRequest;
|
||||
use pageserver_api::models::TenantLocationConfigResponse;
|
||||
@@ -96,8 +97,8 @@ use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError};
|
||||
use crate::DEFAULT_PG_VERSION;
|
||||
use crate::{disk_usage_eviction_task, tenant};
|
||||
use pageserver_api::models::{
|
||||
StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, TimelineGcRequest,
|
||||
TimelineInfo,
|
||||
CompactInfoResponse, StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest,
|
||||
TimelineGcRequest, TimelineInfo,
|
||||
};
|
||||
use utils::{
|
||||
auth::SwappableJwtAuth,
|
||||
@@ -1695,7 +1696,47 @@ async fn update_tenant_config_handler(
|
||||
crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||
tenant.set_new_tenant_config(new_tenant_conf);
|
||||
|
||||
let _ = tenant
|
||||
.update_tenant_config(|_crnt| Ok(new_tenant_conf.clone()))
|
||||
.expect("Closure returns Ok()");
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn patch_tenant_config_handler(
|
||||
mut request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let request_data: TenantConfigPatchRequest = json_request(&mut request).await?;
|
||||
let tenant_id = request_data.tenant_id;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
|
||||
let tenant_shard_id = TenantShardId::unsharded(tenant_id);
|
||||
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
|
||||
|
||||
let updated = tenant
|
||||
.update_tenant_config(|crnt| crnt.apply_patch(request_data.config.clone()))
|
||||
.map_err(ApiError::BadRequest)?;
|
||||
|
||||
// This is a legacy API that only operates on attached tenants: the preferred
|
||||
// API to use is the location_config/ endpoint, which lets the caller provide
|
||||
// the full LocationConf.
|
||||
let location_conf = LocationConf::attached_single(
|
||||
updated,
|
||||
tenant.get_generation(),
|
||||
&ShardParameters::default(),
|
||||
);
|
||||
|
||||
crate::tenant::Tenant::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
|
||||
.await
|
||||
.map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
@@ -1998,6 +2039,34 @@ async fn timeline_cancel_compact_handler(
|
||||
.await
|
||||
}
|
||||
|
||||
// Get compact info of a timeline
|
||||
async fn timeline_compact_info_handler(
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
let state = get_state(&request);
|
||||
async {
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
let res = tenant.get_scheduled_compaction_tasks(timeline_id);
|
||||
let mut resp = Vec::new();
|
||||
for item in res {
|
||||
resp.push(CompactInfoResponse {
|
||||
compact_key_range: item.compact_key_range,
|
||||
compact_lsn_range: item.compact_lsn_range,
|
||||
sub_compaction: item.sub_compaction,
|
||||
});
|
||||
}
|
||||
json_response(StatusCode::OK, resp)
|
||||
}
|
||||
.instrument(info_span!("timeline_compact_info", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await
|
||||
}
|
||||
|
||||
// Run compaction immediately on given timeline.
|
||||
async fn timeline_compact_handler(
|
||||
mut request: Request<Body>,
|
||||
@@ -2036,15 +2105,30 @@ async fn timeline_compact_handler(
|
||||
parse_query_param::<_, bool>(&request, "wait_until_scheduled_compaction_done")?
|
||||
.unwrap_or(false);
|
||||
|
||||
let sub_compaction = compact_request
|
||||
.as_ref()
|
||||
.map(|r| r.sub_compaction)
|
||||
.unwrap_or(false);
|
||||
let sub_compaction_max_job_size_mb = compact_request
|
||||
.as_ref()
|
||||
.and_then(|r| r.sub_compaction_max_job_size_mb);
|
||||
|
||||
let options = CompactOptions {
|
||||
compact_range: compact_request
|
||||
compact_key_range: compact_request
|
||||
.as_ref()
|
||||
.and_then(|r| r.compact_range.clone()),
|
||||
compact_below_lsn: compact_request.as_ref().and_then(|r| r.compact_below_lsn),
|
||||
.and_then(|r| r.compact_key_range.clone()),
|
||||
compact_lsn_range: compact_request
|
||||
.as_ref()
|
||||
.and_then(|r| r.compact_lsn_range.clone()),
|
||||
flags,
|
||||
sub_compaction,
|
||||
sub_compaction_max_job_size_mb,
|
||||
};
|
||||
|
||||
let scheduled = compact_request.map(|r| r.scheduled).unwrap_or(false);
|
||||
let scheduled = compact_request
|
||||
.as_ref()
|
||||
.map(|r| r.scheduled)
|
||||
.unwrap_or(false);
|
||||
|
||||
async {
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
@@ -2053,7 +2137,7 @@ async fn timeline_compact_handler(
|
||||
let tenant = state
|
||||
.tenant_manager
|
||||
.get_attached_tenant_shard(tenant_shard_id)?;
|
||||
let rx = tenant.schedule_compaction(timeline_id, options).await;
|
||||
let rx = tenant.schedule_compaction(timeline_id, options).await.map_err(ApiError::InternalServerError)?;
|
||||
if wait_until_scheduled_compaction_done {
|
||||
// It is possible that this will take a long time, dropping the HTTP request will not cancel the compaction.
|
||||
rx.await.ok();
|
||||
@@ -3280,6 +3364,9 @@ pub fn make_router(
|
||||
.get("/v1/tenant/:tenant_shard_id/synthetic_size", |r| {
|
||||
api_handler(r, tenant_size_handler)
|
||||
})
|
||||
.patch("/v1/tenant/config", |r| {
|
||||
api_handler(r, patch_tenant_config_handler)
|
||||
})
|
||||
.put("/v1/tenant/config", |r| {
|
||||
api_handler(r, update_tenant_config_handler)
|
||||
})
|
||||
@@ -3341,6 +3428,10 @@ pub fn make_router(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/do_gc",
|
||||
|r| api_handler(r, timeline_gc_handler),
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
|
||||
|r| api_handler(r, timeline_compact_info_handler),
|
||||
)
|
||||
.put(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
|
||||
|r| api_handler(r, timeline_compact_handler),
|
||||
|
||||
@@ -3,7 +3,7 @@ use metrics::{
|
||||
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
|
||||
register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
|
||||
register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
|
||||
Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
|
||||
Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
|
||||
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
@@ -16,7 +16,6 @@ use postgres_backend::{is_expected_io_error, QueryError};
|
||||
use pq_proto::framed::ConnectionError;
|
||||
use strum::{EnumCount, VariantNames};
|
||||
use strum_macros::{IntoStaticStr, VariantNames};
|
||||
use tracing::warn;
|
||||
use utils::id::TimelineId;
|
||||
|
||||
/// Prometheus histogram buckets (in seconds) for operations in the critical
|
||||
@@ -446,15 +445,6 @@ pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
|
||||
register_gauge_vec!(
|
||||
"pageserver_flush_wait_upload_seconds",
|
||||
"Time spent waiting for preceding uploads during layer flush",
|
||||
&["tenant_id", "shard_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
|
||||
register_int_gauge_vec!(
|
||||
"pageserver_last_record_lsn",
|
||||
@@ -1223,54 +1213,163 @@ pub(crate) mod virtual_file_io_engine {
|
||||
});
|
||||
}
|
||||
|
||||
pub(crate) struct SmgrOpTimer {
|
||||
global_latency_histo: Histogram,
|
||||
pub(crate) struct SmgrOpTimer(Option<SmgrOpTimerInner>);
|
||||
pub(crate) struct SmgrOpTimerInner {
|
||||
global_execution_latency_histo: Histogram,
|
||||
per_timeline_execution_latency_histo: Option<Histogram>,
|
||||
|
||||
// Optional because not all op types are tracked per-timeline
|
||||
per_timeline_latency_histo: Option<Histogram>,
|
||||
global_batch_wait_time: Histogram,
|
||||
per_timeline_batch_wait_time: Histogram,
|
||||
|
||||
start: Instant,
|
||||
throttled: Duration,
|
||||
op: SmgrQueryType,
|
||||
global_flush_in_progress_micros: IntCounter,
|
||||
per_timeline_flush_in_progress_micros: IntCounter,
|
||||
|
||||
timings: SmgrOpTimerState,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum SmgrOpTimerState {
|
||||
Received {
|
||||
received_at: Instant,
|
||||
},
|
||||
ThrottleDoneExecutionStarting {
|
||||
received_at: Instant,
|
||||
throttle_started_at: Instant,
|
||||
started_execution_at: Instant,
|
||||
},
|
||||
}
|
||||
|
||||
pub(crate) struct SmgrOpFlushInProgress {
|
||||
flush_started_at: Instant,
|
||||
global_micros: IntCounter,
|
||||
per_timeline_micros: IntCounter,
|
||||
}
|
||||
|
||||
impl SmgrOpTimer {
|
||||
pub(crate) fn deduct_throttle(&mut self, throttle: &Option<Duration>) {
|
||||
let Some(throttle) = throttle else {
|
||||
return;
|
||||
};
|
||||
self.throttled += *throttle;
|
||||
pub(crate) fn observe_throttle_done_execution_starting(&mut self, throttle: &ThrottleResult) {
|
||||
let inner = self.0.as_mut().expect("other public methods consume self");
|
||||
match (&mut inner.timings, throttle) {
|
||||
(SmgrOpTimerState::Received { received_at }, throttle) => match throttle {
|
||||
ThrottleResult::NotThrottled { start } => {
|
||||
inner.timings = SmgrOpTimerState::ThrottleDoneExecutionStarting {
|
||||
received_at: *received_at,
|
||||
throttle_started_at: *start,
|
||||
started_execution_at: *start,
|
||||
};
|
||||
}
|
||||
ThrottleResult::Throttled { start, end } => {
|
||||
inner.timings = SmgrOpTimerState::ThrottleDoneExecutionStarting {
|
||||
received_at: *start,
|
||||
throttle_started_at: *start,
|
||||
started_execution_at: *end,
|
||||
};
|
||||
}
|
||||
},
|
||||
(x, _) => panic!("called in unexpected state: {x:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn observe_smgr_op_completion_and_start_flushing(mut self) -> SmgrOpFlushInProgress {
|
||||
let (flush_start, inner) = self
|
||||
.smgr_op_end()
|
||||
.expect("this method consume self, and the only other caller is drop handler");
|
||||
let SmgrOpTimerInner {
|
||||
global_flush_in_progress_micros,
|
||||
per_timeline_flush_in_progress_micros,
|
||||
..
|
||||
} = inner;
|
||||
SmgrOpFlushInProgress {
|
||||
flush_started_at: flush_start,
|
||||
global_micros: global_flush_in_progress_micros,
|
||||
per_timeline_micros: per_timeline_flush_in_progress_micros,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `None`` if this method has already been called, `Some` otherwise.
|
||||
fn smgr_op_end(&mut self) -> Option<(Instant, SmgrOpTimerInner)> {
|
||||
let inner = self.0.take()?;
|
||||
|
||||
let now = Instant::now();
|
||||
|
||||
let batch;
|
||||
let execution;
|
||||
let throttle;
|
||||
match inner.timings {
|
||||
SmgrOpTimerState::Received { received_at } => {
|
||||
batch = (now - received_at).as_secs_f64();
|
||||
// TODO: use label for dropped requests.
|
||||
// This is quite rare in practice, only during tenant/pageservers shutdown.
|
||||
throttle = Duration::ZERO;
|
||||
execution = Duration::ZERO.as_secs_f64();
|
||||
}
|
||||
SmgrOpTimerState::ThrottleDoneExecutionStarting {
|
||||
received_at,
|
||||
throttle_started_at,
|
||||
started_execution_at,
|
||||
} => {
|
||||
batch = (throttle_started_at - received_at).as_secs_f64();
|
||||
throttle = started_execution_at - throttle_started_at;
|
||||
execution = (now - started_execution_at).as_secs_f64();
|
||||
}
|
||||
}
|
||||
|
||||
// update time spent in batching
|
||||
inner.global_batch_wait_time.observe(batch);
|
||||
inner.per_timeline_batch_wait_time.observe(batch);
|
||||
|
||||
// time spent in throttle metric is updated by throttle impl
|
||||
let _ = throttle;
|
||||
|
||||
// update metrics for execution latency
|
||||
inner.global_execution_latency_histo.observe(execution);
|
||||
if let Some(per_timeline_execution_latency_histo) =
|
||||
&inner.per_timeline_execution_latency_histo
|
||||
{
|
||||
per_timeline_execution_latency_histo.observe(execution);
|
||||
}
|
||||
|
||||
Some((now, inner))
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for SmgrOpTimer {
|
||||
fn drop(&mut self) {
|
||||
let elapsed = self.start.elapsed();
|
||||
self.smgr_op_end();
|
||||
}
|
||||
}
|
||||
|
||||
let elapsed = match elapsed.checked_sub(self.throttled) {
|
||||
Some(elapsed) => elapsed,
|
||||
None => {
|
||||
use utils::rate_limit::RateLimit;
|
||||
static LOGGED: Lazy<Mutex<enum_map::EnumMap<SmgrQueryType, RateLimit>>> =
|
||||
Lazy::new(|| {
|
||||
Mutex::new(enum_map::EnumMap::from_array(std::array::from_fn(|_| {
|
||||
RateLimit::new(Duration::from_secs(10))
|
||||
})))
|
||||
});
|
||||
let mut guard = LOGGED.lock().unwrap();
|
||||
let rate_limit = &mut guard[self.op];
|
||||
rate_limit.call(|| {
|
||||
warn!(op=?self.op, ?elapsed, ?self.throttled, "implementation error: time spent throttled exceeds total request wall clock time");
|
||||
});
|
||||
elapsed // un-throttled time, more info than just saturating to 0
|
||||
impl SmgrOpFlushInProgress {
|
||||
pub(crate) async fn measure<Fut, O>(mut self, mut fut: Fut) -> O
|
||||
where
|
||||
Fut: std::future::Future<Output = O>,
|
||||
{
|
||||
let mut fut = std::pin::pin!(fut);
|
||||
|
||||
let now = Instant::now();
|
||||
// Whenever observe_guard gets called, or dropped,
|
||||
// it adds the time elapsed since its last call to metrics.
|
||||
// Last call is tracked in `now`.
|
||||
let mut observe_guard = scopeguard::guard(
|
||||
|| {
|
||||
let elapsed = now - self.flush_started_at;
|
||||
self.global_micros
|
||||
.inc_by(u64::try_from(elapsed.as_micros()).unwrap());
|
||||
self.per_timeline_micros
|
||||
.inc_by(u64::try_from(elapsed.as_micros()).unwrap());
|
||||
self.flush_started_at = now;
|
||||
},
|
||||
|mut observe| {
|
||||
observe();
|
||||
},
|
||||
);
|
||||
|
||||
loop {
|
||||
match tokio::time::timeout(Duration::from_secs(10), &mut fut).await {
|
||||
Ok(v) => return v,
|
||||
Err(_timeout) => {
|
||||
(*observe_guard)();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let elapsed = elapsed.as_secs_f64();
|
||||
|
||||
self.global_latency_histo.observe(elapsed);
|
||||
if let Some(per_timeline_getpage_histo) = &self.per_timeline_latency_histo {
|
||||
per_timeline_getpage_histo.observe(elapsed);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1302,6 +1401,10 @@ pub(crate) struct SmgrQueryTimePerTimeline {
|
||||
per_timeline_getpage_latency: Histogram,
|
||||
global_batch_size: Histogram,
|
||||
per_timeline_batch_size: Histogram,
|
||||
global_flush_in_progress_micros: IntCounter,
|
||||
per_timeline_flush_in_progress_micros: IntCounter,
|
||||
global_batch_wait_time: Histogram,
|
||||
per_timeline_batch_wait_time: Histogram,
|
||||
}
|
||||
|
||||
static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
@@ -1324,12 +1427,15 @@ static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
// Alias so all histograms recording per-timeline smgr timings use the same buckets.
|
||||
static SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS: &[f64] = CRITICAL_OP_BUCKETS;
|
||||
|
||||
static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
"pageserver_smgr_query_seconds",
|
||||
"Time spent on smgr query handling, aggegated by query type and tenant/timeline.",
|
||||
"Time spent _executing_ smgr query handling, excluding batch and throttle delays.",
|
||||
&["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
|
||||
CRITICAL_OP_BUCKETS.into(),
|
||||
SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
@@ -1387,7 +1493,7 @@ static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
|
||||
static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
"pageserver_smgr_query_seconds_global",
|
||||
"Time spent on smgr query handling, aggregated by query type.",
|
||||
"Like pageserver_smgr_query_seconds, but aggregated to instance level.",
|
||||
&["smgr_query_type"],
|
||||
SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
|
||||
)
|
||||
@@ -1464,6 +1570,45 @@ fn set_page_service_config_max_batch_size(conf: &PageServicePipeliningConfig) {
|
||||
.set(value.try_into().unwrap());
|
||||
}
|
||||
|
||||
static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_page_service_pagestream_flush_in_progress_micros",
|
||||
"Counter that sums up the microseconds that a pagestream response was being flushed into the TCP connection. \
|
||||
If the flush is particularly slow, this counter will be updated periodically to make slow flushes \
|
||||
easily discoverable in monitoring. \
|
||||
Hence, this is NOT a completion latency historgram.",
|
||||
&["tenant_id", "shard_id", "timeline_id"],
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"pageserver_page_service_pagestream_flush_in_progress_micros_global",
|
||||
"Like pageserver_page_service_pagestream_flush_in_progress_seconds, but instance-wide.",
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
"pageserver_page_service_pagestream_batch_wait_time_seconds",
|
||||
"Time a request spent waiting in its batch until the batch moved to throttle&execution.",
|
||||
&["tenant_id", "shard_id", "timeline_id"],
|
||||
SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS.into(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
static PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_page_service_pagestream_batch_wait_time_seconds_global",
|
||||
"Like pageserver_page_service_pagestream_batch_wait_time_seconds, but aggregated to instance level.",
|
||||
SMGR_QUERY_TIME_GLOBAL_BUCKETS.to_vec(),
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
impl SmgrQueryTimePerTimeline {
|
||||
pub(crate) fn new(tenant_shard_id: &TenantShardId, timeline_id: &TimelineId) -> Self {
|
||||
let tenant_id = tenant_shard_id.tenant_id.to_string();
|
||||
@@ -1504,6 +1649,17 @@ impl SmgrQueryTimePerTimeline {
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
|
||||
.unwrap();
|
||||
|
||||
let global_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL.clone();
|
||||
let per_timeline_batch_wait_time = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
|
||||
.unwrap();
|
||||
|
||||
let global_flush_in_progress_micros =
|
||||
PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL.clone();
|
||||
let per_timeline_flush_in_progress_micros = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_slug, &timeline_id])
|
||||
.unwrap();
|
||||
|
||||
Self {
|
||||
global_started,
|
||||
global_latency,
|
||||
@@ -1511,9 +1667,13 @@ impl SmgrQueryTimePerTimeline {
|
||||
per_timeline_getpage_started,
|
||||
global_batch_size,
|
||||
per_timeline_batch_size,
|
||||
global_flush_in_progress_micros,
|
||||
per_timeline_flush_in_progress_micros,
|
||||
global_batch_wait_time,
|
||||
per_timeline_batch_wait_time,
|
||||
}
|
||||
}
|
||||
pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, started_at: Instant) -> SmgrOpTimer {
|
||||
pub(crate) fn start_smgr_op(&self, op: SmgrQueryType, received_at: Instant) -> SmgrOpTimer {
|
||||
self.global_started[op as usize].inc();
|
||||
|
||||
let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) {
|
||||
@@ -1523,13 +1683,17 @@ impl SmgrQueryTimePerTimeline {
|
||||
None
|
||||
};
|
||||
|
||||
SmgrOpTimer {
|
||||
global_latency_histo: self.global_latency[op as usize].clone(),
|
||||
per_timeline_latency_histo,
|
||||
start: started_at,
|
||||
op,
|
||||
throttled: Duration::ZERO,
|
||||
}
|
||||
SmgrOpTimer(Some(SmgrOpTimerInner {
|
||||
global_execution_latency_histo: self.global_latency[op as usize].clone(),
|
||||
per_timeline_execution_latency_histo: per_timeline_latency_histo,
|
||||
timings: SmgrOpTimerState::Received { received_at },
|
||||
global_flush_in_progress_micros: self.global_flush_in_progress_micros.clone(),
|
||||
per_timeline_flush_in_progress_micros: self
|
||||
.per_timeline_flush_in_progress_micros
|
||||
.clone(),
|
||||
global_batch_wait_time: self.global_batch_wait_time.clone(),
|
||||
per_timeline_batch_wait_time: self.per_timeline_batch_wait_time.clone(),
|
||||
}))
|
||||
}
|
||||
|
||||
pub(crate) fn observe_getpage_batch_start(&self, batch_size: usize) {
|
||||
@@ -2204,6 +2368,15 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMet
|
||||
.expect("failed to define a metric"),
|
||||
});
|
||||
|
||||
pub(crate) static PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_timeline_wal_records_received",
|
||||
"Number of WAL records received per shard",
|
||||
&["tenant_id", "shard_id", "timeline_id"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
|
||||
register_histogram!(
|
||||
"pageserver_wal_redo_seconds",
|
||||
@@ -2404,7 +2577,6 @@ pub(crate) struct TimelineMetrics {
|
||||
shard_id: String,
|
||||
timeline_id: String,
|
||||
pub flush_time_histo: StorageTimeMetrics,
|
||||
pub flush_wait_upload_time_gauge: Gauge,
|
||||
pub compact_time_histo: StorageTimeMetrics,
|
||||
pub create_images_time_histo: StorageTimeMetrics,
|
||||
pub logical_size_histo: StorageTimeMetrics,
|
||||
@@ -2431,6 +2603,7 @@ pub(crate) struct TimelineMetrics {
|
||||
pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
|
||||
/// Number of valid LSN leases.
|
||||
pub valid_lsn_lease_count_gauge: UIntGauge,
|
||||
pub wal_records_received: IntCounter,
|
||||
shutdown: std::sync::atomic::AtomicBool,
|
||||
}
|
||||
|
||||
@@ -2449,9 +2622,6 @@ impl TimelineMetrics {
|
||||
&shard_id,
|
||||
&timeline_id,
|
||||
);
|
||||
let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
let compact_time_histo = StorageTimeMetrics::new(
|
||||
StorageTimeOperation::Compact,
|
||||
&tenant_id,
|
||||
@@ -2588,12 +2758,15 @@ impl TimelineMetrics {
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
|
||||
let wal_records_received = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED
|
||||
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
|
||||
.unwrap();
|
||||
|
||||
TimelineMetrics {
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
flush_time_histo,
|
||||
flush_wait_upload_time_gauge,
|
||||
compact_time_histo,
|
||||
create_images_time_histo,
|
||||
logical_size_histo,
|
||||
@@ -2620,6 +2793,7 @@ impl TimelineMetrics {
|
||||
evictions_with_low_residence_duration,
|
||||
),
|
||||
valid_lsn_lease_count_gauge,
|
||||
wal_records_received,
|
||||
shutdown: std::sync::atomic::AtomicBool::default(),
|
||||
}
|
||||
}
|
||||
@@ -2642,14 +2816,6 @@ impl TimelineMetrics {
|
||||
self.resident_physical_size_gauge.get()
|
||||
}
|
||||
|
||||
pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) {
|
||||
self.flush_wait_upload_time_gauge.add(duration);
|
||||
crate::metrics::FLUSH_WAIT_UPLOAD_TIME
|
||||
.get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id])
|
||||
.unwrap()
|
||||
.add(duration);
|
||||
}
|
||||
|
||||
pub(crate) fn shutdown(&self) {
|
||||
let was_shutdown = self
|
||||
.shutdown
|
||||
@@ -2667,7 +2833,6 @@ impl TimelineMetrics {
|
||||
let shard_id = &self.shard_id;
|
||||
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = DISK_CONSISTENT_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
|
||||
{
|
||||
RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
|
||||
@@ -2757,6 +2922,21 @@ impl TimelineMetrics {
|
||||
shard_id,
|
||||
timeline_id,
|
||||
]);
|
||||
let _ = PAGESERVER_TIMELINE_WAL_RECORDS_RECEIVED.remove_label_values(&[
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
]);
|
||||
let _ = PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS.remove_label_values(&[
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
]);
|
||||
let _ = PAGE_SERVICE_SMGR_BATCH_WAIT_TIME.remove_label_values(&[
|
||||
tenant_id,
|
||||
shard_id,
|
||||
timeline_id,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2787,6 +2967,7 @@ use crate::context::{PageContentKind, RequestContext};
|
||||
use crate::task_mgr::TaskKind;
|
||||
use crate::tenant::mgr::TenantSlot;
|
||||
use crate::tenant::tasks::BackgroundLoopKind;
|
||||
use crate::tenant::throttle::ThrottleResult;
|
||||
use crate::tenant::Timeline;
|
||||
|
||||
/// Maintain a per timeline gauge in addition to the global gauge.
|
||||
@@ -3641,6 +3822,7 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) {
|
||||
&REMOTE_ONDEMAND_DOWNLOADED_BYTES,
|
||||
&CIRCUIT_BREAKERS_BROKEN,
|
||||
&CIRCUIT_BREAKERS_UNBROKEN,
|
||||
&PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL,
|
||||
]
|
||||
.into_iter()
|
||||
.for_each(|c| {
|
||||
@@ -3688,6 +3870,7 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) {
|
||||
&WAL_REDO_BYTES_HISTOGRAM,
|
||||
&WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
|
||||
&PAGE_SERVICE_BATCH_SIZE_GLOBAL,
|
||||
&PAGE_SERVICE_SMGR_BATCH_WAIT_TIME_GLOBAL,
|
||||
]
|
||||
.into_iter()
|
||||
.for_each(|h| {
|
||||
|
||||
@@ -575,7 +575,10 @@ enum BatchedFeMessage {
|
||||
}
|
||||
|
||||
impl BatchedFeMessage {
|
||||
async fn throttle(&mut self, cancel: &CancellationToken) -> Result<(), QueryError> {
|
||||
async fn throttle_and_record_start_processing(
|
||||
&mut self,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(), QueryError> {
|
||||
let (shard, tokens, timers) = match self {
|
||||
BatchedFeMessage::Exists { shard, timer, .. }
|
||||
| BatchedFeMessage::Nblocks { shard, timer, .. }
|
||||
@@ -603,7 +606,7 @@ impl BatchedFeMessage {
|
||||
}
|
||||
};
|
||||
for timer in timers {
|
||||
timer.deduct_throttle(&throttled);
|
||||
timer.observe_throttle_done_execution_starting(&throttled);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -1017,10 +1020,8 @@ impl PageServerHandler {
|
||||
// Map handler result to protocol behavior.
|
||||
// Some handler errors cause exit from pagestream protocol.
|
||||
// Other handler errors are sent back as an error message and we stay in pagestream protocol.
|
||||
let mut timers: smallvec::SmallVec<[_; 1]> =
|
||||
smallvec::SmallVec::with_capacity(handler_results.len());
|
||||
for handler_result in handler_results {
|
||||
let response_msg = match handler_result {
|
||||
let (response_msg, timer) = match handler_result {
|
||||
Err(e) => match &e {
|
||||
PageStreamError::Shutdown => {
|
||||
// If we fail to fulfil a request during shutdown, which may be _because_ of
|
||||
@@ -1044,34 +1045,66 @@ impl PageServerHandler {
|
||||
span.in_scope(|| {
|
||||
error!("error reading relation or page version: {full:#}")
|
||||
});
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
})
|
||||
(
|
||||
PagestreamBeMessage::Error(PagestreamErrorResponse {
|
||||
message: e.to_string(),
|
||||
}),
|
||||
None, // TODO: measure errors
|
||||
)
|
||||
}
|
||||
},
|
||||
Ok((response_msg, timer)) => {
|
||||
// Extending the lifetime of the timers so observations on drop
|
||||
// include the flush time.
|
||||
timers.push(timer);
|
||||
response_msg
|
||||
}
|
||||
Ok((response_msg, timer)) => (response_msg, Some(timer)),
|
||||
};
|
||||
|
||||
//
|
||||
// marshal & transmit response message
|
||||
//
|
||||
|
||||
pgb_writer.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?;
|
||||
}
|
||||
tokio::select! {
|
||||
biased;
|
||||
_ = cancel.cancelled() => {
|
||||
// We were requested to shut down.
|
||||
info!("shutdown request received in page handler");
|
||||
return Err(QueryError::Shutdown)
|
||||
}
|
||||
res = pgb_writer.flush() => {
|
||||
res?;
|
||||
|
||||
// We purposefully don't count flush time into the timer.
|
||||
//
|
||||
// The reason is that current compute client will not perform protocol processing
|
||||
// if the postgres backend process is doing things other than `->smgr_read()`.
|
||||
// This is especially the case for prefetch.
|
||||
//
|
||||
// If the compute doesn't read from the connection, eventually TCP will backpressure
|
||||
// all the way into our flush call below.
|
||||
//
|
||||
// The timer's underlying metric is used for a storage-internal latency SLO and
|
||||
// we don't want to include latency in it that we can't control.
|
||||
// And as pointed out above, in this case, we don't control the time that flush will take.
|
||||
let flushing_timer =
|
||||
timer.map(|timer| timer.observe_smgr_op_completion_and_start_flushing());
|
||||
|
||||
// what we want to do
|
||||
let flush_fut = pgb_writer.flush();
|
||||
// metric for how long flushing takes
|
||||
let flush_fut = match flushing_timer {
|
||||
Some(flushing_timer) => {
|
||||
futures::future::Either::Left(flushing_timer.measure(flush_fut))
|
||||
}
|
||||
None => futures::future::Either::Right(flush_fut),
|
||||
};
|
||||
// do it while respecting cancellation
|
||||
let _: () = async move {
|
||||
tokio::select! {
|
||||
biased;
|
||||
_ = cancel.cancelled() => {
|
||||
// We were requested to shut down.
|
||||
info!("shutdown request received in page handler");
|
||||
return Err(QueryError::Shutdown)
|
||||
}
|
||||
res = flush_fut => {
|
||||
res?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
// and log the info! line inside the request span
|
||||
.instrument(span.clone())
|
||||
.await?;
|
||||
}
|
||||
drop(timers);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1200,7 +1233,7 @@ impl PageServerHandler {
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(cancelled) = msg.throttle(&self.cancel).await {
|
||||
if let Err(cancelled) = msg.throttle_and_record_start_processing(&self.cancel).await {
|
||||
break cancelled;
|
||||
}
|
||||
|
||||
@@ -1367,7 +1400,9 @@ impl PageServerHandler {
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
batch.throttle(&self.cancel).await?;
|
||||
batch
|
||||
.throttle_and_record_start_processing(&self.cancel)
|
||||
.await?;
|
||||
self.pagesteam_handle_batched_message(pgb_writer, batch, &cancel, &ctx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
@@ -1242,7 +1242,7 @@ pub struct DatadirModification<'a> {
|
||||
pending_metadata_bytes: usize,
|
||||
}
|
||||
|
||||
impl<'a> DatadirModification<'a> {
|
||||
impl DatadirModification<'_> {
|
||||
// When a DatadirModification is committed, we do a monolithic serialization of all its contents. WAL records can
|
||||
// contain multiple pages, so the pageserver's record-based batch size isn't sufficient to bound this allocation: we
|
||||
// additionally specify a limit on how much payload a DatadirModification may contain before it should be committed.
|
||||
@@ -1263,7 +1263,7 @@ impl<'a> DatadirModification<'a> {
|
||||
pub(crate) fn has_dirty_data(&self) -> bool {
|
||||
self.pending_data_batch
|
||||
.as_ref()
|
||||
.map_or(false, |b| b.has_data())
|
||||
.is_some_and(|b| b.has_data())
|
||||
}
|
||||
|
||||
/// Set the current lsn
|
||||
@@ -1319,18 +1319,23 @@ impl<'a> DatadirModification<'a> {
|
||||
|
||||
let buf: Bytes = SlruSegmentDirectory::ser(&SlruSegmentDirectory::default())?.into();
|
||||
let empty_dir = Value::Image(buf);
|
||||
self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone());
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::SlruSegment(SlruKind::Clog), 0));
|
||||
self.put(
|
||||
slru_dir_to_key(SlruKind::MultiXactMembers),
|
||||
empty_dir.clone(),
|
||||
);
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::SlruSegment(SlruKind::Clog), 0));
|
||||
self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir);
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0));
|
||||
|
||||
// Initialize SLRUs on shard 0 only: creating these on other shards would be
|
||||
// harmless but they'd just be dropped on later compaction.
|
||||
if self.tline.tenant_shard_id.is_shard_zero() {
|
||||
self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone());
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::SlruSegment(SlruKind::Clog), 0));
|
||||
self.put(
|
||||
slru_dir_to_key(SlruKind::MultiXactMembers),
|
||||
empty_dir.clone(),
|
||||
);
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::SlruSegment(SlruKind::Clog), 0));
|
||||
self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir);
|
||||
self.pending_directory_entries
|
||||
.push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -2225,7 +2230,7 @@ impl<'a> DatadirModification<'a> {
|
||||
assert!(!self
|
||||
.pending_data_batch
|
||||
.as_ref()
|
||||
.map_or(false, |b| b.updates_key(&key)));
|
||||
.is_some_and(|b| b.updates_key(&key)));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2294,7 +2299,7 @@ pub enum Version<'a> {
|
||||
Modified(&'a DatadirModification<'a>),
|
||||
}
|
||||
|
||||
impl<'a> Version<'a> {
|
||||
impl Version<'_> {
|
||||
async fn get(
|
||||
&self,
|
||||
timeline: &Timeline,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user