mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-19 22:20:37 +00:00
Compare commits
92 Commits
skyzh/page
...
amasterov/
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bd8428cb28 | ||
|
|
86826669f9 | ||
|
|
3392122824 | ||
|
|
a594cfa1e2 | ||
|
|
35d58a92ab | ||
|
|
eb30bd4799 | ||
|
|
d809743687 | ||
|
|
f7c69b7b84 | ||
|
|
08d7d8dfc3 | ||
|
|
91139a28e5 | ||
|
|
a82dcbbcc8 | ||
|
|
24ccfd89ab | ||
|
|
ab5c7d4365 | ||
|
|
968c87ccac | ||
|
|
756f833269 | ||
|
|
0eee26a083 | ||
|
|
c8d8dfc765 | ||
|
|
2727b79a8c | ||
|
|
b6afe92bd5 | ||
|
|
127247a63c | ||
|
|
bcfce1af3e | ||
|
|
ce2f606995 | ||
|
|
57637f0ed8 | ||
|
|
90b9e90fa5 | ||
|
|
9151a29c2a | ||
|
|
c6120a44f8 | ||
|
|
849c8a1356 | ||
|
|
b6741457ea | ||
|
|
f1b98f83a9 | ||
|
|
0d956bea71 | ||
|
|
e7bba6457d | ||
|
|
996202d4f9 | ||
|
|
4ff7787496 | ||
|
|
f611af797e | ||
|
|
d1c461f529 | ||
|
|
ea9d987cad | ||
|
|
1ca11382e1 | ||
|
|
3bee41c80b | ||
|
|
bb1e7d79c2 | ||
|
|
29565a7ca2 | ||
|
|
b0b7ccb1ba | ||
|
|
be51f997b7 | ||
|
|
0a3fc85f2c | ||
|
|
13d080d6d2 | ||
|
|
67b9e0f34d | ||
|
|
f39f45164c | ||
|
|
f81baf42f1 | ||
|
|
1838bd5603 | ||
|
|
ca21d0bdcc | ||
|
|
3d73e5c642 | ||
|
|
ffb6cb3456 | ||
|
|
f78c92ca52 | ||
|
|
6a5fc86743 | ||
|
|
8db3bf992f | ||
|
|
fef9bd9073 | ||
|
|
7dceb61d3d | ||
|
|
fa94e9f625 | ||
|
|
d1b96d5c0d | ||
|
|
627c0bed85 | ||
|
|
d1286ab935 | ||
|
|
f2c74a64b6 | ||
|
|
ae326a6df3 | ||
|
|
02896e3d09 | ||
|
|
cae480d62f | ||
|
|
2ec7630252 | ||
|
|
d904e56e80 | ||
|
|
d0d49d2985 | ||
|
|
a0fa73d508 | ||
|
|
9a89a8e9ee | ||
|
|
6ebc6e3994 | ||
|
|
307899408f | ||
|
|
ecb059d5f7 | ||
|
|
d912f05e36 | ||
|
|
8378b1c603 | ||
|
|
154d215c78 | ||
|
|
0b10a5336c | ||
|
|
5c36f3786d | ||
|
|
f3e928d781 | ||
|
|
2eaaa0495a | ||
|
|
d6ed3eb557 | ||
|
|
4b4ce9e60a | ||
|
|
3f775d2df1 | ||
|
|
d6de25f85f | ||
|
|
e43968bc95 | ||
|
|
5c8b9457c1 | ||
|
|
27d6ebabb3 | ||
|
|
fa6b4ed659 | ||
|
|
3c7a526971 | ||
|
|
da01a57759 | ||
|
|
9c2e296af7 | ||
|
|
c92dc09104 | ||
|
|
b41bca9f58 |
5
.github/actionlint.yml
vendored
5
.github/actionlint.yml
vendored
@@ -33,14 +33,9 @@ config-variables:
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
- REMOTE_STORAGE_AZURE_REGION
|
||||
- SLACK_CICD_CHANNEL_ID
|
||||
- SLACK_COMPUTE_CHANNEL_ID
|
||||
- SLACK_ON_CALL_DEVPROD_STREAM
|
||||
- SLACK_ON_CALL_QA_STAGING_STREAM
|
||||
- SLACK_ON_CALL_STORAGE_STAGING_STREAM
|
||||
- SLACK_ONCALL_COMPUTE_GROUP
|
||||
- SLACK_ONCALL_PROXY_GROUP
|
||||
- SLACK_ONCALL_STORAGE_GROUP
|
||||
- SLACK_PROXY_CHANNEL_ID
|
||||
- SLACK_RUST_CHANNEL_ID
|
||||
- SLACK_STORAGE_CHANNEL_ID
|
||||
- SLACK_UPCOMING_RELEASE_CHANNEL_ID
|
||||
|
||||
@@ -7,7 +7,7 @@ inputs:
|
||||
type: boolean
|
||||
required: false
|
||||
default: false
|
||||
aws-oidc-role-arn:
|
||||
aws-oicd-role-arn:
|
||||
description: 'OIDC role arn to interract with S3'
|
||||
required: true
|
||||
|
||||
@@ -88,7 +88,7 @@ runs:
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws-oidc-role-arn }}
|
||||
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
|
||||
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
|
||||
|
||||
# Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
|
||||
|
||||
@@ -8,7 +8,7 @@ inputs:
|
||||
unique-key:
|
||||
description: 'string to distinguish different results in the same run'
|
||||
required: true
|
||||
aws-oidc-role-arn:
|
||||
aws-oicd-role-arn:
|
||||
description: 'OIDC role arn to interract with S3'
|
||||
required: true
|
||||
|
||||
@@ -39,7 +39,7 @@ runs:
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws-oidc-role-arn }}
|
||||
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
|
||||
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
|
||||
|
||||
- name: Upload test results
|
||||
|
||||
4
.github/actions/download/action.yml
vendored
4
.github/actions/download/action.yml
vendored
@@ -15,7 +15,7 @@ inputs:
|
||||
prefix:
|
||||
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
|
||||
required: false
|
||||
aws-oidc-role-arn:
|
||||
aws-oicd-role-arn:
|
||||
description: 'OIDC role arn to interract with S3'
|
||||
required: true
|
||||
|
||||
@@ -25,7 +25,7 @@ runs:
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws-oidc-role-arn }}
|
||||
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Download artifact
|
||||
|
||||
14
.github/actions/run-python-test-set/action.yml
vendored
14
.github/actions/run-python-test-set/action.yml
vendored
@@ -53,7 +53,7 @@ inputs:
|
||||
description: 'benchmark durations JSON'
|
||||
required: false
|
||||
default: '{}'
|
||||
aws-oidc-role-arn:
|
||||
aws-oicd-role-arn:
|
||||
description: 'OIDC role arn to interract with S3'
|
||||
required: true
|
||||
|
||||
@@ -66,7 +66,7 @@ runs:
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
|
||||
path: /tmp/neon
|
||||
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
- name: Download Neon binaries for the previous release
|
||||
if: inputs.build_type != 'remote'
|
||||
@@ -75,7 +75,7 @@ runs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
|
||||
path: /tmp/neon-previous
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
- name: Download compatibility snapshot
|
||||
if: inputs.build_type != 'remote'
|
||||
@@ -87,7 +87,7 @@ runs:
|
||||
# The lack of compatibility snapshot (for example, for the new Postgres version)
|
||||
# shouldn't fail the whole job. Only relevant test should fail.
|
||||
skip-if-does-not-exist: true
|
||||
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
- name: Checkout
|
||||
if: inputs.needs_postgres_source == 'true'
|
||||
@@ -228,13 +228,13 @@ runs:
|
||||
# The lack of compatibility snapshot shouldn't fail the job
|
||||
# (for example if we didn't run the test for non build-and-test workflow)
|
||||
skip-if-does-not-exist: true
|
||||
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws-oidc-role-arn }}
|
||||
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
|
||||
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
|
||||
|
||||
- name: Upload test results
|
||||
@@ -243,4 +243,4 @@ runs:
|
||||
with:
|
||||
report-dir: /tmp/test_output/allure/results
|
||||
unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}-${{ runner.arch }}
|
||||
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
@@ -14,11 +14,11 @@ runs:
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage
|
||||
skip-if-does-not-exist: true # skip if there's no previous coverage to download
|
||||
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
- name: Upload coverage data
|
||||
uses: ./.github/actions/upload
|
||||
with:
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage
|
||||
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
|
||||
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
|
||||
|
||||
4
.github/actions/upload/action.yml
vendored
4
.github/actions/upload/action.yml
vendored
@@ -14,7 +14,7 @@ inputs:
|
||||
prefix:
|
||||
description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
|
||||
required: false
|
||||
aws-oidc-role-arn:
|
||||
aws-oicd-role-arn:
|
||||
description: "the OIDC role arn for aws auth"
|
||||
required: false
|
||||
default: ""
|
||||
@@ -61,7 +61,7 @@ runs:
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ inputs.aws-oidc-role-arn }}
|
||||
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
|
||||
role-duration-seconds: 3600
|
||||
|
||||
- name: Upload artifact
|
||||
|
||||
2
.github/scripts/lint-release-pr.sh
vendored
2
.github/scripts/lint-release-pr.sh
vendored
@@ -41,7 +41,7 @@ echo "Merge base of ${MAIN_BRANCH} and ${RELEASE_BRANCH}: ${MERGE_BASE}"
|
||||
LAST_COMMIT=$(git rev-parse HEAD)
|
||||
|
||||
MERGE_COMMIT_MESSAGE=$(git log -1 --format=%s "${LAST_COMMIT}")
|
||||
EXPECTED_MESSAGE_REGEX="^$COMPONENT release [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2} UTC$"
|
||||
EXPECTED_MESSAGE_REGEX="^$COMPONENT release [0-9]{4}-[0-9]{2}-[0-9]{2}$"
|
||||
|
||||
if ! [[ "${MERGE_COMMIT_MESSAGE}" =~ ${EXPECTED_MESSAGE_REGEX} ]]; then
|
||||
report_error "Merge commit message does not match expected pattern: '<component> release YYYY-MM-DD'
|
||||
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
# we create a table that has one row for each database that we want to restore with the status whether the restore is done
|
||||
- name: Create benchmark_restore_status table if it does not exist
|
||||
|
||||
@@ -323,7 +323,7 @@ jobs:
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
|
||||
path: /tmp/neon
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Check diesel schema
|
||||
if: inputs.build-type == 'release' && inputs.arch == 'x64'
|
||||
@@ -394,7 +394,7 @@ jobs:
|
||||
rerun_failed: ${{ inputs.test-run-count == 1 }}
|
||||
pg_version: ${{ matrix.pg_version }}
|
||||
sanitizers: ${{ inputs.sanitizers }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
# `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
|
||||
# Attempt to stop tests gracefully to generate test reports
|
||||
# until they are forcibly stopped by the stricter `timeout-minutes` limit.
|
||||
|
||||
103
.github/workflows/_create-release-pr.yml
vendored
Normal file
103
.github/workflows/_create-release-pr.yml
vendored
Normal file
@@ -0,0 +1,103 @@
|
||||
name: Create Release PR
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
component-name:
|
||||
description: 'Component name'
|
||||
required: true
|
||||
type: string
|
||||
source-branch:
|
||||
description: 'Source branch'
|
||||
required: true
|
||||
type: string
|
||||
secrets:
|
||||
ci-access-token:
|
||||
description: 'CI access token'
|
||||
required: true
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euo pipefail {0}
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
create-release-branch:
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
permissions:
|
||||
contents: write # for `git push`
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
ref: ${{ inputs.source-branch }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set variables
|
||||
id: vars
|
||||
env:
|
||||
COMPONENT_NAME: ${{ inputs.component-name }}
|
||||
RELEASE_BRANCH: >-
|
||||
${{
|
||||
false
|
||||
|| inputs.component-name == 'Storage' && 'release'
|
||||
|| inputs.component-name == 'Proxy' && 'release-proxy'
|
||||
|| inputs.component-name == 'Compute' && 'release-compute'
|
||||
}}
|
||||
run: |
|
||||
now_date=$(date -u +'%Y-%m-%d')
|
||||
now_time=$(date -u +'%H-%M-%Z')
|
||||
{
|
||||
echo "title=${COMPONENT_NAME} release ${now_date}"
|
||||
echo "rc-branch=rc/${RELEASE_BRANCH}/${now_date}_${now_time}"
|
||||
echo "release-branch=${RELEASE_BRANCH}"
|
||||
} | tee -a ${GITHUB_OUTPUT}
|
||||
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
- name: Create RC branch
|
||||
env:
|
||||
RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }}
|
||||
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
|
||||
TITLE: ${{ steps.vars.outputs.title }}
|
||||
run: |
|
||||
git switch -c "${RC_BRANCH}"
|
||||
|
||||
# Manually create a merge commit on the current branch, keeping the
|
||||
# tree and setting the parents to the current HEAD and the HEAD of the
|
||||
# release branch. This commit is what we'll fast-forward the release
|
||||
# branch to when merging the release branch.
|
||||
# For details on why, look at
|
||||
# https://docs.neon.build/overview/repositories/neon.html#background-on-commit-history-of-release-prs
|
||||
current_tree=$(git rev-parse 'HEAD^{tree}')
|
||||
release_head=$(git rev-parse "origin/${RELEASE_BRANCH}")
|
||||
current_head=$(git rev-parse HEAD)
|
||||
merge_commit=$(git commit-tree -p "${current_head}" -p "${release_head}" -m "${TITLE}" "${current_tree}")
|
||||
|
||||
# Fast-forward the current branch to the newly created merge_commit
|
||||
git merge --ff-only ${merge_commit}
|
||||
|
||||
git push origin "${RC_BRANCH}"
|
||||
|
||||
- name: Create a PR into ${{ steps.vars.outputs.release-branch }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ci-access-token }}
|
||||
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
|
||||
RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }}
|
||||
TITLE: ${{ steps.vars.outputs.title }}
|
||||
run: |
|
||||
gh pr create --title "${TITLE}" \
|
||||
--body "" \
|
||||
--head "${RC_BRANCH}" \
|
||||
--base "${RELEASE_BRANCH}"
|
||||
58
.github/workflows/benchmarking.yml
vendored
58
.github/workflows/benchmarking.yml
vendored
@@ -114,7 +114,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
@@ -132,7 +132,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
# Set --sparse-ordering option of pytest-order plugin
|
||||
# to ensure tests are running in order of appears in the file.
|
||||
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
||||
@@ -165,7 +165,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -222,8 +222,8 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Verify that cumulative statistics are preserved
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
@@ -233,7 +233,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 3600
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -282,7 +282,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Run Logical Replication benchmarks
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
@@ -293,7 +293,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -310,7 +310,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -322,7 +322,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
@@ -505,7 +505,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
if: contains(fromJSON('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
|
||||
@@ -557,7 +557,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -573,7 +573,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -588,7 +588,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -603,7 +603,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -621,7 +621,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -694,7 +694,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
@@ -726,7 +726,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -741,7 +741,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -752,7 +752,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -828,7 +828,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
@@ -871,7 +871,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -885,7 +885,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -954,7 +954,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Get Connstring Secret Name
|
||||
run: |
|
||||
@@ -1003,7 +1003,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -1015,7 +1015,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
@@ -1078,7 +1078,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
@@ -1121,7 +1121,7 @@ jobs:
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -1132,7 +1132,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
|
||||
121
.github/workflows/build-macos.yml
vendored
121
.github/workflows/build-macos.yml
vendored
@@ -34,10 +34,11 @@ permissions:
|
||||
jobs:
|
||||
build-pgxn:
|
||||
if: |
|
||||
inputs.pg_versions != '[]' || inputs.rebuild_everything ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
strategy:
|
||||
@@ -99,21 +100,13 @@ jobs:
|
||||
run: |
|
||||
make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Upload "pg_install/${{ matrix.postgres-version }}" artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: pg_install--${{ matrix.postgres-version }}
|
||||
path: pg_install/${{ matrix.postgres-version }}
|
||||
# The artifact is supposed to be used by the next job in the same workflow,
|
||||
# so there’s no need to store it for too long.
|
||||
retention-days: 1
|
||||
|
||||
build-walproposer-lib:
|
||||
if: |
|
||||
inputs.pg_versions != '[]' || inputs.rebuild_everything ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
needs: [build-pgxn]
|
||||
@@ -134,11 +127,12 @@ jobs:
|
||||
id: pg_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Download "pg_install/v17" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
name: pg_install--v17
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
@@ -169,21 +163,13 @@ jobs:
|
||||
run:
|
||||
make walproposer-lib -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Upload "pg_install/build/walproposer-lib" artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: pg_install--build--walproposer-lib
|
||||
path: pg_install/build/walproposer-lib
|
||||
# The artifact is supposed to be used by the next job in the same workflow,
|
||||
# so there’s no need to store it for too long.
|
||||
retention-days: 1
|
||||
|
||||
cargo-build:
|
||||
if: |
|
||||
inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
(inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything) && (
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
)
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-15
|
||||
needs: [build-pgxn, build-walproposer-lib]
|
||||
@@ -202,43 +188,45 @@ jobs:
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Download "pg_install/v14" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
- name: Set pg v14 for caching
|
||||
id: pg_rev_v14
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v15 for caching
|
||||
id: pg_rev_v15
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v16 for caching
|
||||
id: pg_rev_v16
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) | tee -a "${GITHUB_OUTPUT}"
|
||||
- name: Set pg v17 for caching
|
||||
id: pg_rev_v17
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Cache postgres v14 build
|
||||
id: cache_pg
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
name: pg_install--v14
|
||||
path: pg_install/v14
|
||||
|
||||
- name: Download "pg_install/v15" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_v15
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
name: pg_install--v15
|
||||
path: pg_install/v15
|
||||
|
||||
- name: Download "pg_install/v16" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_v16
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
name: pg_install--v16
|
||||
path: pg_install/v16
|
||||
|
||||
- name: Download "pg_install/v17" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_v17
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
name: pg_install--v17
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Download "pg_install/build/walproposer-lib" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: pg_install--build--walproposer-lib
|
||||
path: pg_install/build/walproposer-lib
|
||||
|
||||
# `actions/download-artifact` doesn't preserve permissions:
|
||||
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
|
||||
- name: Make pg_install/v*/bin/* executable
|
||||
run: |
|
||||
chmod +x pg_install/v*/bin/*
|
||||
|
||||
- name: Cache cargo deps
|
||||
- name: Cache cargo deps (only for v17)
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: |
|
||||
@@ -248,6 +236,13 @@ jobs:
|
||||
target
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: pg_install/build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
brew install flex bison openssl protobuf icu4c
|
||||
@@ -257,8 +252,8 @@ jobs:
|
||||
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
|
||||
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
|
||||
|
||||
- name: Run cargo build
|
||||
- name: Run cargo build (only for v17)
|
||||
run: cargo build --all --release -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Check that no warnings are produced
|
||||
- name: Check that no warnings are produced (only for v17)
|
||||
run: ./run_clippy.sh
|
||||
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_DEV }}
|
||||
|
||||
|
||||
51
.github/workflows/build_and_test.yml
vendored
51
.github/workflows/build_and_test.yml
vendored
@@ -69,7 +69,7 @@ jobs:
|
||||
submodules: true
|
||||
|
||||
- name: Check for file changes
|
||||
uses: step-security/paths-filter@v3
|
||||
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
|
||||
id: files-changed
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -317,7 +317,7 @@ jobs:
|
||||
extra_params: --splits 5 --group ${{ matrix.pytest_split_group }}
|
||||
benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }}
|
||||
pg_version: v16
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
@@ -384,7 +384,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
@@ -451,14 +451,14 @@ jobs:
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-artifact
|
||||
path: /tmp/neon
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Get coverage artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: coverage-data-artifact
|
||||
path: /tmp/coverage
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Merge coverage data
|
||||
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
|
||||
@@ -824,7 +824,7 @@ jobs:
|
||||
- pg: v17
|
||||
debian: bookworm
|
||||
env:
|
||||
VM_BUILDER_VERSION: v0.46.0
|
||||
VM_BUILDER_VERSION: v0.42.2
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
@@ -965,7 +965,7 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Verify docker-compose example and test extensions
|
||||
timeout-minutes: 20
|
||||
timeout-minutes: 40
|
||||
env:
|
||||
TAG: >-
|
||||
${{
|
||||
@@ -1434,10 +1434,10 @@ jobs:
|
||||
;;
|
||||
esac
|
||||
|
||||
notify-release-deploy-failure:
|
||||
needs: [ meta, deploy ]
|
||||
notify-storage-release-deploy-failure:
|
||||
needs: [ deploy ]
|
||||
# We want this to run even if (transitive) dependencies are skipped, because deploy should really be successful on release branch workflow runs.
|
||||
if: contains(fromJSON('["storage-release", "compute-release", "proxy-release"]'), needs.meta.outputs.run-kind) && needs.deploy.result != 'success' && always()
|
||||
if: github.ref_name == 'release' && needs.deploy.result != 'success' && always()
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
@@ -1445,40 +1445,15 @@ jobs:
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- name: Post release-deploy failure to team slack channel
|
||||
- name: Post release-deploy failure to team-storage slack channel
|
||||
uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0
|
||||
env:
|
||||
TEAM_ONCALL: >-
|
||||
${{
|
||||
fromJSON(format('{
|
||||
"storage-release": "<!subteam^{0}|@oncall-storage>",
|
||||
"compute-release": "<!subteam^{1}|@oncall-compute>",
|
||||
"proxy-release": "<!subteam^{2}|@oncall-proxy>"
|
||||
}',
|
||||
vars.SLACK_ONCALL_STORAGE_GROUP,
|
||||
vars.SLACK_ONCALL_COMPUTE_GROUP,
|
||||
vars.SLACK_ONCALL_PROXY_GROUP
|
||||
))[needs.meta.outputs.run-kind]
|
||||
}}
|
||||
CHANNEL: >-
|
||||
${{
|
||||
fromJSON(format('{
|
||||
"storage-release": "{0}",
|
||||
"compute-release": "{1}",
|
||||
"proxy-release": "{2}"
|
||||
}',
|
||||
vars.SLACK_STORAGE_CHANNEL_ID,
|
||||
vars.SLACK_COMPUTE_CHANNEL_ID,
|
||||
vars.SLACK_PROXY_CHANNEL_ID
|
||||
))[needs.meta.outputs.run-kind]
|
||||
}}
|
||||
with:
|
||||
method: chat.postMessage
|
||||
token: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
payload: |
|
||||
channel: ${{ env.CHANNEL }}
|
||||
channel: ${{ vars.SLACK_STORAGE_CHANNEL_ID }}
|
||||
text: |
|
||||
🔴 ${{ env.TEAM_ONCALL }}: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
|
||||
🔴 <!subteam^S06CJ87UMNY|@oncall-storage>: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
|
||||
|
||||
# The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
|
||||
promote-compatibility-data:
|
||||
|
||||
@@ -117,7 +117,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
|
||||
2
.github/workflows/cloud-extensions.yml
vendored
2
.github/workflows/cloud-extensions.yml
vendored
@@ -68,7 +68,7 @@ jobs:
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: ${{ inputs.region_id || 'aws-us-east-2' }}
|
||||
region_id: ${{ inputs.region_id }}
|
||||
postgres_version: ${{ matrix.pg-version }}
|
||||
project_settings: ${{ steps.project-settings.outputs.settings }}
|
||||
# We need these settings to get the expected output results.
|
||||
|
||||
6
.github/workflows/cloud-regress.yml
vendored
6
.github/workflows/cloud-regress.yml
vendored
@@ -89,7 +89,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create a new branch
|
||||
id: create-branch
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
test_selection: cloud_regress
|
||||
pg_version: ${{matrix.pg-version}}
|
||||
extra_params: -m remote_cluster
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{steps.create-branch.outputs.dsn}}
|
||||
|
||||
@@ -122,7 +122,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
|
||||
10
.github/workflows/ingest_benchmark.yml
vendored
10
.github/workflows/ingest_benchmark.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
include:
|
||||
- target_project: new_empty_project_stripe_size_2048
|
||||
- target_project: new_empty_project_stripe_size_2048
|
||||
stripe_size: 2048 # 16 MiB
|
||||
postgres_version: 16
|
||||
disable_sharding: false
|
||||
@@ -98,7 +98,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
|
||||
@@ -110,10 +110,10 @@ jobs:
|
||||
compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
shard_split_project: ${{ matrix.stripe_size != null && 'true' || 'false' }}
|
||||
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
|
||||
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
|
||||
shard_count: 8
|
||||
stripe_size: ${{ matrix.stripe_size }}
|
||||
disable_sharding: ${{ matrix.disable_sharding }}
|
||||
disable_sharding: ${{ matrix.disable_sharding }}
|
||||
|
||||
- name: Initialize Neon project
|
||||
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
|
||||
@@ -171,7 +171,7 @@ jobs:
|
||||
extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
|
||||
pg_version: v${{ matrix.postgres_version }}
|
||||
save_perf_report: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
|
||||
TARGET_PROJECT_TYPE: ${{ matrix.target_project }}
|
||||
|
||||
18
.github/workflows/large_oltp_benchmark.yml
vendored
18
.github/workflows/large_oltp_benchmark.yml
vendored
@@ -33,9 +33,9 @@ jobs:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
include:
|
||||
- target: new_branch
|
||||
- target: new_branch
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
- target: reuse_branch
|
||||
- target: reuse_branch
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
|
||||
permissions:
|
||||
@@ -43,7 +43,7 @@ jobs:
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h
|
||||
TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ matrix.custom_scripts }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: 16 # pre-determined by pre-determined project
|
||||
@@ -85,7 +85,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Branch for large tenant
|
||||
if: ${{ matrix.target == 'new_branch' }}
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';"
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs"
|
||||
|
||||
- name: Benchmark pgbench with custom-scripts
|
||||
- name: Benchmark pgbench with custom-scripts
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
@@ -138,7 +138,7 @@ jobs:
|
||||
save_perf_report: true
|
||||
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -153,7 +153,7 @@ jobs:
|
||||
save_perf_report: true
|
||||
extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -179,8 +179,8 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
|
||||
|
||||
6
.github/workflows/neon_extra_builds.yml
vendored
6
.github/workflows/neon_extra_builds.yml
vendored
@@ -53,7 +53,7 @@ jobs:
|
||||
submodules: true
|
||||
|
||||
- name: Check for Postgres changes
|
||||
uses: step-security/paths-filter@v3
|
||||
uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242 #v3
|
||||
id: files_changed
|
||||
with:
|
||||
token: ${{ github.token }}
|
||||
@@ -69,6 +69,10 @@ jobs:
|
||||
|
||||
check-macos-build:
|
||||
needs: [ check-permissions, files-changed ]
|
||||
if: |
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
uses: ./.github/workflows/build-macos.yml
|
||||
with:
|
||||
pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
|
||||
|
||||
2
.github/workflows/periodic_pagebench.yml
vendored
2
.github/workflows/periodic_pagebench.yml
vendored
@@ -147,7 +147,7 @@ jobs:
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
|
||||
12
.github/workflows/pg-clients.yml
vendored
12
.github/workflows/pg-clients.yml
vendored
@@ -103,7 +103,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
@@ -122,7 +122,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
extra_params: -m remote_cluster
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
|
||||
@@ -139,7 +139,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
@@ -178,7 +178,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
@@ -195,7 +195,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
extra_params: -m remote_cluster
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
|
||||
@@ -212,7 +212,7 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
|
||||
6
.github/workflows/random-ops-test.yml
vendored
6
.github/workflows/random-ops-test.yml
vendored
@@ -66,7 +66,7 @@ jobs:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Run tests
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
@@ -76,7 +76,7 @@ jobs:
|
||||
run_in_parallel: false
|
||||
extra_params: -m remote_cluster
|
||||
pg_version: ${{ matrix.pg-version }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
RANDOM_SEED: ${{ inputs.random_seed }}
|
||||
@@ -88,6 +88,6 @@ jobs:
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
12
.github/workflows/release-compute.yml
vendored
12
.github/workflows/release-compute.yml
vendored
@@ -1,12 +0,0 @@
|
||||
name: Create compute release PR
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 7 * * FRI'
|
||||
|
||||
jobs:
|
||||
create-release-pr:
|
||||
uses: ./.github/workflows/release.yml
|
||||
with:
|
||||
component: compute
|
||||
secrets: inherit
|
||||
12
.github/workflows/release-proxy.yml
vendored
12
.github/workflows/release-proxy.yml
vendored
@@ -1,12 +0,0 @@
|
||||
name: Create proxy release PR
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 6 * * TUE'
|
||||
|
||||
jobs:
|
||||
create-release-pr:
|
||||
uses: ./.github/workflows/release.yml
|
||||
with:
|
||||
component: proxy
|
||||
secrets: inherit
|
||||
12
.github/workflows/release-storage.yml
vendored
12
.github/workflows/release-storage.yml
vendored
@@ -1,12 +0,0 @@
|
||||
name: Create storage release PR
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 6 * * FRI'
|
||||
|
||||
jobs:
|
||||
create-release-pr:
|
||||
uses: ./.github/workflows/release.yml
|
||||
with:
|
||||
component: storage
|
||||
secrets: inherit
|
||||
93
.github/workflows/release.yml
vendored
93
.github/workflows/release.yml
vendored
@@ -1,34 +1,25 @@
|
||||
name: Create release PR
|
||||
name: Create Release Branch
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# It should be kept in sync with if-condition in jobs
|
||||
- cron: '0 6 * * TUE' # Proxy release
|
||||
- cron: '0 6 * * FRI' # Storage release
|
||||
- cron: '0 7 * * FRI' # Compute release
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
component:
|
||||
description: "Component to release"
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- compute
|
||||
- proxy
|
||||
- storage
|
||||
cherry-pick:
|
||||
description: "Commits to cherry-pick (space separated, makes this a hotfix based on previous release)"
|
||||
create-storage-release-branch:
|
||||
type: boolean
|
||||
description: 'Create Storage release PR'
|
||||
required: false
|
||||
type: string
|
||||
default: ''
|
||||
|
||||
workflow_call:
|
||||
inputs:
|
||||
component:
|
||||
description: "Component to release"
|
||||
required: true
|
||||
type: string
|
||||
cherry-pick:
|
||||
description: "Commits to cherry-pick (space separated, makes this a hotfix based on previous release)"
|
||||
create-proxy-release-branch:
|
||||
type: boolean
|
||||
description: 'Create Proxy release PR'
|
||||
required: false
|
||||
create-compute-release-branch:
|
||||
type: boolean
|
||||
description: 'Create Compute release PR'
|
||||
required: false
|
||||
type: string
|
||||
default: ''
|
||||
|
||||
|
||||
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
|
||||
permissions: {}
|
||||
@@ -38,31 +29,41 @@ defaults:
|
||||
shell: bash -euo pipefail {0}
|
||||
|
||||
jobs:
|
||||
create-release-pr:
|
||||
runs-on: ubuntu-22.04
|
||||
create-storage-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * FRI' || inputs.create-storage-release-branch }}
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
uses: ./.github/workflows/_create-release-pr.yml
|
||||
with:
|
||||
component-name: 'Storage'
|
||||
source-branch: ${{ github.ref_name }}
|
||||
secrets:
|
||||
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
create-proxy-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * TUE' || inputs.create-proxy-release-branch }}
|
||||
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
- name: Create release PR
|
||||
uses: neondatabase/dev-actions/release-pr@290dec821d86fa8a93f019e8c69720f5865b5677
|
||||
with:
|
||||
component: ${{ inputs.component }}
|
||||
cherry-pick: ${{ inputs.cherry-pick }}
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
uses: ./.github/workflows/_create-release-pr.yml
|
||||
with:
|
||||
component-name: 'Proxy'
|
||||
source-branch: ${{ github.ref_name }}
|
||||
secrets:
|
||||
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
create-compute-release-branch:
|
||||
if: ${{ github.event.schedule == '0 7 * * FRI' || inputs.create-compute-release-branch }}
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
uses: ./.github/workflows/_create-release-pr.yml
|
||||
with:
|
||||
component-name: 'Compute'
|
||||
source-branch: ${{ github.ref_name }}
|
||||
secrets:
|
||||
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
@@ -297,6 +297,7 @@ RUN ./autogen.sh && \
|
||||
./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
make staged-install && \
|
||||
cd extensions/postgis && \
|
||||
make clean && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
@@ -1083,34 +1084,6 @@ ARG PG_VERSION
|
||||
RUN cargo install --locked --version 0.12.9 cargo-pgrx && \
|
||||
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
||||
|
||||
USER root
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "rust extensions pgrx14"
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx14
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN cargo install --locked --version 0.14.1 cargo-pgrx && \
|
||||
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
||||
|
||||
USER root
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "rust extensions pgrx14"
|
||||
#
|
||||
# Version 14 is now required by a few
|
||||
# This layer should be used as a base for new pgrx extensions,
|
||||
# and eventually get merged with `rust-extensions-build`
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx14
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN cargo install --locked --version 0.14.1 cargo-pgrx && \
|
||||
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
|
||||
|
||||
USER root
|
||||
|
||||
#########################################################################################
|
||||
@@ -1128,11 +1101,11 @@ RUN wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.
|
||||
mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
|
||||
echo "#nothing to test here" > neon-test.sh
|
||||
|
||||
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.1.1.tar.gz -O pgrag.tar.gz && \
|
||||
echo "087b2ecd11ba307dc968042ef2e9e43dc04d9ba60e8306e882c407bbe1350a50 pgrag.tar.gz" | sha256sum --check && \
|
||||
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz && \
|
||||
echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \
|
||||
mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C .
|
||||
|
||||
FROM rust-extensions-build-pgrx14 AS pgrag-build
|
||||
FROM rust-extensions-build-pgrx12 AS pgrag-build
|
||||
COPY --from=pgrag-src /ext-src/ /ext-src/
|
||||
|
||||
# Install build-time dependencies
|
||||
@@ -1152,19 +1125,19 @@ RUN . venv/bin/activate && \
|
||||
|
||||
WORKDIR /ext-src/pgrag-src
|
||||
RUN cd exts/rag && \
|
||||
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag.control
|
||||
|
||||
RUN cd exts/rag_bge_small_en_v15 && \
|
||||
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
|
||||
cargo pgrx install --release --features remote_onnx && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control
|
||||
|
||||
RUN cd exts/rag_jina_reranker_v1_tiny_en && \
|
||||
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
|
||||
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
|
||||
cargo pgrx install --release --features remote_onnx && \
|
||||
@@ -1347,39 +1320,6 @@ COPY --from=pg_session_jwt-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src/pg_session_jwt-src
|
||||
RUN cargo pgrx install --release
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-anon-pg-build"
|
||||
# compile anon extension
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build AS pg_anon-src
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
WORKDIR /ext-src
|
||||
COPY compute/patches/anon_v2.patch .
|
||||
|
||||
# This is an experimental extension, never got to real production.
|
||||
# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
|
||||
ENV PATH="/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/latest/postgresql_anonymizer-latest.tar.gz -O pg_anon.tar.gz && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt && \
|
||||
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "=0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
patch -p1 < /ext-src/anon_v2.patch
|
||||
|
||||
FROM rust-extensions-build-pgrx14 AS pg-anon-pg-build
|
||||
ARG PG_VERSION
|
||||
COPY --from=pg_anon-src /ext-src/ /ext-src/
|
||||
WORKDIR /ext-src
|
||||
RUN cd pg_anon-src && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) extension PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo "$PG_VERSION" | sed 's/^v//') && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo "$PG_VERSION" | sed 's/^v//') && \
|
||||
chmod -R a+r ../pg_anon-src && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control;
|
||||
|
||||
########################################################################################
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "wal2json-build"
|
||||
@@ -1676,7 +1616,6 @@ COPY --from=pg_uuidv7-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_roaringbitmap-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_semver-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=wal2json-build /usr/local/pgsql /usr/local/pgsql
|
||||
COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
@@ -1819,10 +1758,25 @@ RUN make PG_VERSION="${PG_VERSION:?}" -C compute
|
||||
|
||||
FROM pg-build AS extension-tests
|
||||
ARG PG_VERSION
|
||||
# This is required for the PostGIS test
|
||||
RUN apt-get update && case $DEBIAN_VERSION in \
|
||||
bullseye) \
|
||||
apt-get install -y libproj19 libgdal28 time; \
|
||||
;; \
|
||||
bookworm) \
|
||||
apt-get install -y libgdal32 libproj25 time; \
|
||||
;; \
|
||||
*) \
|
||||
echo "Unknown Debian version ${DEBIAN_VERSION}" && exit 1 \
|
||||
;; \
|
||||
esac
|
||||
|
||||
COPY docker-compose/ext-src/ /ext-src/
|
||||
|
||||
COPY --from=pg-build /postgres /postgres
|
||||
#COPY --from=postgis-src /ext-src/ /ext-src/
|
||||
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
COPY --from=postgis-build /ext-src/postgis-src /ext-src/postgis-src
|
||||
COPY --from=postgis-build /sfcgal/* /usr
|
||||
COPY --from=plv8-src /ext-src/ /ext-src/
|
||||
#COPY --from=h3-pg-src /ext-src/ /ext-src/
|
||||
COPY --from=postgresql-unit-src /ext-src/ /ext-src/
|
||||
|
||||
@@ -23,8 +23,6 @@
|
||||
import 'sql_exporter/getpage_prefetch_requests_total.libsonnet',
|
||||
import 'sql_exporter/getpage_prefetches_buffered.libsonnet',
|
||||
import 'sql_exporter/getpage_sync_requests_total.libsonnet',
|
||||
import 'sql_exporter/compute_getpage_stuck_requests_total.libsonnet',
|
||||
import 'sql_exporter/compute_getpage_max_inflight_stuck_time_ms.libsonnet',
|
||||
import 'sql_exporter/getpage_wait_seconds_bucket.libsonnet',
|
||||
import 'sql_exporter/getpage_wait_seconds_count.libsonnet',
|
||||
import 'sql_exporter/getpage_wait_seconds_sum.libsonnet',
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
{
|
||||
metric_name: 'compute_getpage_max_inflight_stuck_time_ms',
|
||||
type: 'gauge',
|
||||
help: 'Max wait time for stuck requests among all backends. Includes only active stuck requests, terminated or disconnected ones are not accounted for',
|
||||
values: [
|
||||
'compute_getpage_max_inflight_stuck_time_ms',
|
||||
],
|
||||
query_ref: 'neon_perf_counters',
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
{
|
||||
metric_name: 'compute_getpage_stuck_requests_total',
|
||||
type: 'counter',
|
||||
help: 'Total number of Getpage requests left without an answer for more than pageserver_response_log_timeout but less than pageserver_response_disconnect_timeout',
|
||||
values: [
|
||||
'compute_getpage_stuck_requests_total',
|
||||
],
|
||||
query_ref: 'neon_perf_counters',
|
||||
}
|
||||
@@ -9,8 +9,6 @@ SELECT d.* FROM pg_catalog.jsonb_to_record((SELECT jb FROM c)) AS d(
|
||||
getpage_wait_seconds_sum numeric,
|
||||
getpage_prefetch_requests_total numeric,
|
||||
getpage_sync_requests_total numeric,
|
||||
compute_getpage_stuck_requests_total numeric,
|
||||
compute_getpage_max_inflight_stuck_time_ms numeric,
|
||||
getpage_prefetch_misses_total numeric,
|
||||
getpage_prefetch_discards_total numeric,
|
||||
getpage_prefetches_buffered numeric,
|
||||
|
||||
@@ -1,129 +0,0 @@
|
||||
diff --git a/sql/anon.sql b/sql/anon.sql
|
||||
index 0cdc769..f6cc950 100644
|
||||
--- a/sql/anon.sql
|
||||
+++ b/sql/anon.sql
|
||||
@@ -1141,3 +1141,8 @@ $$
|
||||
-- TODO : https://en.wikipedia.org/wiki/L-diversity
|
||||
|
||||
-- TODO : https://en.wikipedia.org/wiki/T-closeness
|
||||
+
|
||||
+-- NEON Patches
|
||||
+
|
||||
+GRANT ALL ON SCHEMA anon to neon_superuser;
|
||||
+GRANT ALL ON ALL TABLES IN SCHEMA anon TO neon_superuser;
|
||||
diff --git a/sql/init.sql b/sql/init.sql
|
||||
index 7da6553..9b6164b 100644
|
||||
--- a/sql/init.sql
|
||||
+++ b/sql/init.sql
|
||||
@@ -74,50 +74,49 @@ $$
|
||||
|
||||
SECURITY LABEL FOR anon ON FUNCTION anon.load_csv IS 'UNTRUSTED';
|
||||
|
||||
--- load fake data from a given path
|
||||
-CREATE OR REPLACE FUNCTION anon.init(
|
||||
- datapath TEXT
|
||||
-)
|
||||
+CREATE OR REPLACE FUNCTION anon.load_fake_data()
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
DECLARE
|
||||
- datapath_check TEXT;
|
||||
success BOOLEAN;
|
||||
+ sharedir TEXT;
|
||||
+ datapath TEXT;
|
||||
BEGIN
|
||||
|
||||
- IF anon.is_initialized() THEN
|
||||
- RAISE NOTICE 'The anon extension is already initialized.';
|
||||
- RETURN TRUE;
|
||||
- END IF;
|
||||
+ datapath := '/extension/anon/';
|
||||
+ -- find the local extension directory
|
||||
+ SELECT setting INTO sharedir
|
||||
+ FROM pg_catalog.pg_config
|
||||
+ WHERE name = 'SHAREDIR';
|
||||
|
||||
SELECT bool_or(results) INTO success
|
||||
FROM unnest(array[
|
||||
- anon.load_csv('anon.identifiers_category',datapath||'/identifiers_category.csv'),
|
||||
- anon.load_csv('anon.identifier',datapath ||'/identifier.csv'),
|
||||
- anon.load_csv('anon.address',datapath ||'/address.csv'),
|
||||
- anon.load_csv('anon.city',datapath ||'/city.csv'),
|
||||
- anon.load_csv('anon.company',datapath ||'/company.csv'),
|
||||
- anon.load_csv('anon.country',datapath ||'/country.csv'),
|
||||
- anon.load_csv('anon.email', datapath ||'/email.csv'),
|
||||
- anon.load_csv('anon.first_name',datapath ||'/first_name.csv'),
|
||||
- anon.load_csv('anon.iban',datapath ||'/iban.csv'),
|
||||
- anon.load_csv('anon.last_name',datapath ||'/last_name.csv'),
|
||||
- anon.load_csv('anon.postcode',datapath ||'/postcode.csv'),
|
||||
- anon.load_csv('anon.siret',datapath ||'/siret.csv'),
|
||||
- anon.load_csv('anon.lorem_ipsum',datapath ||'/lorem_ipsum.csv')
|
||||
+ anon.load_csv('anon.identifiers_category',sharedir || datapath || '/identifiers_category.csv'),
|
||||
+ anon.load_csv('anon.identifier',sharedir || datapath || '/identifier.csv'),
|
||||
+ anon.load_csv('anon.address',sharedir || datapath || '/address.csv'),
|
||||
+ anon.load_csv('anon.city',sharedir || datapath || '/city.csv'),
|
||||
+ anon.load_csv('anon.company',sharedir || datapath || '/company.csv'),
|
||||
+ anon.load_csv('anon.country',sharedir || datapath || '/country.csv'),
|
||||
+ anon.load_csv('anon.email', sharedir || datapath || '/email.csv'),
|
||||
+ anon.load_csv('anon.first_name',sharedir || datapath || '/first_name.csv'),
|
||||
+ anon.load_csv('anon.iban',sharedir || datapath || '/iban.csv'),
|
||||
+ anon.load_csv('anon.last_name',sharedir || datapath || '/last_name.csv'),
|
||||
+ anon.load_csv('anon.postcode',sharedir || datapath || '/postcode.csv'),
|
||||
+ anon.load_csv('anon.siret',sharedir || datapath || '/siret.csv'),
|
||||
+ anon.load_csv('anon.lorem_ipsum',sharedir || datapath || '/lorem_ipsum.csv')
|
||||
]) results;
|
||||
RETURN success;
|
||||
-
|
||||
END;
|
||||
$$
|
||||
- LANGUAGE PLPGSQL
|
||||
+ LANGUAGE plpgsql
|
||||
VOLATILE
|
||||
RETURNS NULL ON NULL INPUT
|
||||
- PARALLEL UNSAFE -- because load_csv is unsafe
|
||||
- SECURITY INVOKER
|
||||
+ PARALLEL UNSAFE -- because of the EXCEPTION
|
||||
+ SECURITY DEFINER
|
||||
SET search_path=''
|
||||
;
|
||||
-SECURITY LABEL FOR anon ON FUNCTION anon.init(TEXT) IS 'UNTRUSTED';
|
||||
+
|
||||
+SECURITY LABEL FOR anon ON FUNCTION anon.load_fake_data IS 'UNTRUSTED';
|
||||
|
||||
-- People tend to forget the anon.init() step
|
||||
-- This is a friendly notice for them
|
||||
@@ -144,7 +143,7 @@ SECURITY LABEL FOR anon ON FUNCTION anon.notice_if_not_init IS 'UNTRUSTED';
|
||||
CREATE OR REPLACE FUNCTION anon.load(TEXT)
|
||||
RETURNS BOOLEAN AS
|
||||
$$
|
||||
- SELECT anon.init($1);
|
||||
+ SELECT anon.init();
|
||||
$$
|
||||
LANGUAGE SQL
|
||||
VOLATILE
|
||||
@@ -159,16 +158,16 @@ SECURITY LABEL FOR anon ON FUNCTION anon.load(TEXT) IS 'UNTRUSTED';
|
||||
CREATE OR REPLACE FUNCTION anon.init()
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
- WITH conf AS (
|
||||
- -- find the local extension directory
|
||||
- SELECT setting AS sharedir
|
||||
- FROM pg_catalog.pg_config
|
||||
- WHERE name = 'SHAREDIR'
|
||||
- )
|
||||
- SELECT anon.init(conf.sharedir || '/extension/anon/')
|
||||
- FROM conf;
|
||||
+BEGIN
|
||||
+ IF anon.is_initialized() THEN
|
||||
+ RAISE NOTICE 'The anon extension is already initialized.';
|
||||
+ RETURN TRUE;
|
||||
+ END IF;
|
||||
+
|
||||
+ RETURN anon.load_fake_data();
|
||||
+END;
|
||||
$$
|
||||
- LANGUAGE SQL
|
||||
+ LANGUAGE plpgsql
|
||||
VOLATILE
|
||||
PARALLEL UNSAFE -- because init is unsafe
|
||||
SECURITY INVOKER
|
||||
@@ -22,7 +22,7 @@ commands:
|
||||
- name: local_proxy
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: 'RUST_LOG="error" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -22,7 +22,7 @@ commands:
|
||||
- name: local_proxy
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
shell: 'RUST_LOG="error" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
|
||||
@@ -424,10 +424,10 @@ pub fn launch_monitor(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
|
||||
experimental,
|
||||
};
|
||||
|
||||
let span = span!(Level::INFO, "compute_monitor");
|
||||
thread::Builder::new()
|
||||
.name("compute-monitor".into())
|
||||
.spawn(move || {
|
||||
let span = span!(Level::INFO, "compute_monitor");
|
||||
let _enter = span.enter();
|
||||
monitor.run();
|
||||
})
|
||||
|
||||
@@ -112,7 +112,7 @@ impl SafekeeperNode {
|
||||
}
|
||||
|
||||
/// Initializes a safekeeper node by creating all necessary files,
|
||||
/// e.g. SSL certificates and JWT token file.
|
||||
/// e.g. SSL certificates.
|
||||
pub fn initialize(&self) -> anyhow::Result<()> {
|
||||
if self.env.generate_local_ssl_certs {
|
||||
self.env.generate_ssl_cert(
|
||||
@@ -120,17 +120,6 @@ impl SafekeeperNode {
|
||||
&self.datadir_path().join("server.key"),
|
||||
)?;
|
||||
}
|
||||
|
||||
// Generate a token file for authentication with other safekeepers
|
||||
if self.conf.auth_enabled {
|
||||
let token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;
|
||||
|
||||
let token_path = self.datadir_path().join("peer_jwt_token");
|
||||
std::fs::write(token_path, token)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -229,26 +218,14 @@ impl SafekeeperNode {
|
||||
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
|
||||
}
|
||||
|
||||
if self.conf.auth_enabled {
|
||||
let token_path = self.datadir_path().join("peer_jwt_token");
|
||||
let token_path_str = token_path
|
||||
.to_str()
|
||||
.with_context(|| {
|
||||
format!("Token path {token_path:?} cannot be represented as a unicode string")
|
||||
})?
|
||||
.to_owned();
|
||||
args.extend(["--auth-token-path".to_owned(), token_path_str]);
|
||||
}
|
||||
|
||||
args.extend_from_slice(extra_opts);
|
||||
|
||||
let env_variables = Vec::new();
|
||||
background_process::start_process(
|
||||
&format!("safekeeper-{id}"),
|
||||
&datadir,
|
||||
&self.env.safekeeper_bin(),
|
||||
&args,
|
||||
env_variables,
|
||||
self.safekeeper_env_variables()?,
|
||||
background_process::InitialPidFile::Expect(self.pid_file()),
|
||||
retry_timeout,
|
||||
|| async {
|
||||
@@ -262,6 +239,18 @@ impl SafekeeperNode {
|
||||
.await
|
||||
}
|
||||
|
||||
fn safekeeper_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {
|
||||
// Generate a token to connect from safekeeper to peers
|
||||
if self.conf.auth_enabled {
|
||||
let token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;
|
||||
Ok(vec![("SAFEKEEPER_AUTH_TOKEN".to_owned(), token)])
|
||||
} else {
|
||||
Ok(Vec::new())
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Stop the server.
|
||||
///
|
||||
|
||||
@@ -13,6 +13,6 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
|
||||
jq \
|
||||
netcat-openbsd
|
||||
#This is required for the pg_hintplan test
|
||||
RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw
|
||||
RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw /ext-src/postgis-src/ && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw /ext-src/postgis-src
|
||||
|
||||
USER postgres
|
||||
|
||||
@@ -186,13 +186,14 @@ services:
|
||||
|
||||
neon-test-extensions:
|
||||
profiles: ["test-extensions"]
|
||||
image: ${REPOSITORY:-ghcr.io/neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
|
||||
image: ${REPOSITORY:-ghcr.io/neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-${PG_VERSION:-16}}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
|
||||
environment:
|
||||
- PGPASSWORD=cloud_admin
|
||||
- PGUSER=${PGUSER:-cloud_admin}
|
||||
- PGPASSWORD=${PGPASSWORD:-cloud_admin}
|
||||
entrypoint:
|
||||
- "/bin/bash"
|
||||
- "-c"
|
||||
command:
|
||||
- sleep 1800
|
||||
- sleep 360000
|
||||
depends_on:
|
||||
- compute
|
||||
|
||||
@@ -54,6 +54,15 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
# Prepare for the PostGIS test
|
||||
docker compose exec compute mkdir -p /tmp/pgis_reg/pgis_reg_tmp
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${TMPDIR}"
|
||||
docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${TMPDIR}"
|
||||
docker compose exec compute mkdir -p /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
|
||||
docker compose cp "${TMPDIR}/test" compute:/ext-src/postgis-src/raster/test
|
||||
docker compose cp "${TMPDIR}/00-regress-install" compute:/ext-src/postgis-src/regress
|
||||
rm -rf "${TMPDIR}"
|
||||
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
|
||||
@@ -68,7 +77,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
|
||||
# We are running tests now
|
||||
rm -f testout.txt testout_contrib.txt
|
||||
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
|
||||
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
|
||||
neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
|
||||
docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
|
||||
neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
|
||||
|
||||
9
docker-compose/ext-src/postgis-src/neon-test.sh
Executable file
9
docker-compose/ext-src/postgis-src/neon-test.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
cd "$(dirname "$0")"
|
||||
if [[ ${PG_VERSION} = v17 ]]; then
|
||||
sed -i '/computed_columns/d' regress/core/tests.mk
|
||||
fi
|
||||
patch -p1 <postgis-no-upgrade-test.patch
|
||||
trap 'echo Cleaning up; patch -R -p1 <postgis-no-upgrade-test.patch' EXIT
|
||||
make installcheck-base
|
||||
@@ -0,0 +1,21 @@
|
||||
diff --git a/regress/runtest.mk b/regress/runtest.mk
|
||||
index c051f03..010e493 100644
|
||||
--- a/regress/runtest.mk
|
||||
+++ b/regress/runtest.mk
|
||||
@@ -24,16 +24,6 @@ check-regress:
|
||||
|
||||
POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(RUNTESTFLAGS_INTERNAL) $(TESTS)
|
||||
|
||||
- @if echo "$(RUNTESTFLAGS)" | grep -vq -- --upgrade; then \
|
||||
- echo "Running upgrade test as RUNTESTFLAGS did not contain that"; \
|
||||
- POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl \
|
||||
- --upgrade \
|
||||
- $(RUNTESTFLAGS) \
|
||||
- $(RUNTESTFLAGS_INTERNAL) \
|
||||
- $(TESTS); \
|
||||
- else \
|
||||
- echo "Skipping upgrade test as RUNTESTFLAGS already requested upgrades"; \
|
||||
- fi
|
||||
|
||||
check-long:
|
||||
$(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(TESTS) $(TESTS_SLOW)
|
||||
218
docker-compose/ext-src/postgis-src/postgis-regular-v17.patch
Normal file
218
docker-compose/ext-src/postgis-src/postgis-regular-v17.patch
Normal file
@@ -0,0 +1,218 @@
|
||||
diff --git a/raster/test/regress/tests.mk b/raster/test/regress/tests.mk
|
||||
index 00918e1..7e2b6cd 100644
|
||||
--- a/raster/test/regress/tests.mk
|
||||
+++ b/raster/test/regress/tests.mk
|
||||
@@ -17,9 +17,7 @@ override RUNTESTFLAGS_INTERNAL := \
|
||||
$(RUNTESTFLAGS_INTERNAL) \
|
||||
--after-upgrade-script $(top_srcdir)/raster/test/regress/hooks/hook-after-upgrade-raster.sql
|
||||
|
||||
-RASTER_TEST_FIRST = \
|
||||
- $(top_srcdir)/raster/test/regress/check_gdal \
|
||||
- $(top_srcdir)/raster/test/regress/loader/load_outdb
|
||||
+RASTER_TEST_FIRST =
|
||||
|
||||
RASTER_TEST_LAST = \
|
||||
$(top_srcdir)/raster/test/regress/clean
|
||||
@@ -33,9 +31,7 @@ RASTER_TEST_IO = \
|
||||
|
||||
RASTER_TEST_BASIC_FUNC = \
|
||||
$(top_srcdir)/raster/test/regress/rt_bytea \
|
||||
- $(top_srcdir)/raster/test/regress/rt_wkb \
|
||||
$(top_srcdir)/raster/test/regress/box3d \
|
||||
- $(top_srcdir)/raster/test/regress/rt_addband \
|
||||
$(top_srcdir)/raster/test/regress/rt_band \
|
||||
$(top_srcdir)/raster/test/regress/rt_tile
|
||||
|
||||
@@ -73,16 +69,10 @@ RASTER_TEST_BANDPROPS = \
|
||||
$(top_srcdir)/raster/test/regress/rt_neighborhood \
|
||||
$(top_srcdir)/raster/test/regress/rt_nearestvalue \
|
||||
$(top_srcdir)/raster/test/regress/rt_pixelofvalue \
|
||||
- $(top_srcdir)/raster/test/regress/rt_polygon \
|
||||
- $(top_srcdir)/raster/test/regress/rt_setbandpath
|
||||
+ $(top_srcdir)/raster/test/regress/rt_polygon
|
||||
|
||||
RASTER_TEST_UTILITY = \
|
||||
$(top_srcdir)/raster/test/regress/rt_utility \
|
||||
- $(top_srcdir)/raster/test/regress/rt_fromgdalraster \
|
||||
- $(top_srcdir)/raster/test/regress/rt_asgdalraster \
|
||||
- $(top_srcdir)/raster/test/regress/rt_astiff \
|
||||
- $(top_srcdir)/raster/test/regress/rt_asjpeg \
|
||||
- $(top_srcdir)/raster/test/regress/rt_aspng \
|
||||
$(top_srcdir)/raster/test/regress/rt_reclass \
|
||||
$(top_srcdir)/raster/test/regress/rt_gdalwarp \
|
||||
$(top_srcdir)/raster/test/regress/rt_gdalcontour \
|
||||
@@ -120,21 +110,13 @@ RASTER_TEST_SREL = \
|
||||
|
||||
RASTER_TEST_BUGS = \
|
||||
$(top_srcdir)/raster/test/regress/bug_test_car5 \
|
||||
- $(top_srcdir)/raster/test/regress/permitted_gdal_drivers \
|
||||
$(top_srcdir)/raster/test/regress/tickets
|
||||
|
||||
RASTER_TEST_LOADER = \
|
||||
$(top_srcdir)/raster/test/regress/loader/Basic \
|
||||
$(top_srcdir)/raster/test/regress/loader/Projected \
|
||||
$(top_srcdir)/raster/test/regress/loader/BasicCopy \
|
||||
- $(top_srcdir)/raster/test/regress/loader/BasicFilename \
|
||||
- $(top_srcdir)/raster/test/regress/loader/BasicOutDB \
|
||||
- $(top_srcdir)/raster/test/regress/loader/Tiled10x10 \
|
||||
- $(top_srcdir)/raster/test/regress/loader/Tiled10x10Copy \
|
||||
- $(top_srcdir)/raster/test/regress/loader/Tiled8x8 \
|
||||
- $(top_srcdir)/raster/test/regress/loader/TiledAuto \
|
||||
- $(top_srcdir)/raster/test/regress/loader/TiledAutoSkipNoData \
|
||||
- $(top_srcdir)/raster/test/regress/loader/TiledAutoCopyn
|
||||
+ $(top_srcdir)/raster/test/regress/loader/BasicFilename
|
||||
|
||||
RASTER_TESTS := $(RASTER_TEST_FIRST) \
|
||||
$(RASTER_TEST_METADATA) $(RASTER_TEST_IO) $(RASTER_TEST_BASIC_FUNC) \
|
||||
diff --git a/regress/core/binary.sql b/regress/core/binary.sql
|
||||
index 7a36b65..ad78fc7 100644
|
||||
--- a/regress/core/binary.sql
|
||||
+++ b/regress/core/binary.sql
|
||||
@@ -1,4 +1,5 @@
|
||||
SET client_min_messages TO warning;
|
||||
+
|
||||
CREATE SCHEMA tm;
|
||||
|
||||
CREATE TABLE tm.geoms (id serial, g geometry);
|
||||
@@ -31,24 +32,39 @@ SELECT st_force4d(g) FROM tm.geoms WHERE id < 15 ORDER BY id;
|
||||
INSERT INTO tm.geoms(g)
|
||||
SELECT st_setsrid(g,4326) FROM tm.geoms ORDER BY id;
|
||||
|
||||
-COPY tm.geoms TO :tmpfile WITH BINARY;
|
||||
+-- define temp file path
|
||||
+\set tmpfile '/tmp/postgis_binary_test.dat'
|
||||
+
|
||||
+-- export
|
||||
+\set command '\\copy tm.geoms TO ':tmpfile' WITH (FORMAT BINARY)'
|
||||
+:command
|
||||
+
|
||||
+-- import
|
||||
CREATE TABLE tm.geoms_in AS SELECT * FROM tm.geoms LIMIT 0;
|
||||
-COPY tm.geoms_in FROM :tmpfile WITH BINARY;
|
||||
-SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o WHERE i.id = o.id
|
||||
- AND ST_OrderingEquals(i.g, o.g);
|
||||
+\set command '\\copy tm.geoms_in FROM ':tmpfile' WITH (FORMAT BINARY)'
|
||||
+:command
|
||||
+
|
||||
+SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o
|
||||
+WHERE i.id = o.id AND ST_OrderingEquals(i.g, o.g);
|
||||
|
||||
CREATE TABLE tm.geogs AS SELECT id,g::geography FROM tm.geoms
|
||||
WHERE geometrytype(g) NOT LIKE '%CURVE%'
|
||||
AND geometrytype(g) NOT LIKE '%CIRCULAR%'
|
||||
AND geometrytype(g) NOT LIKE '%SURFACE%'
|
||||
AND geometrytype(g) NOT LIKE 'TRIANGLE%'
|
||||
- AND geometrytype(g) NOT LIKE 'TIN%'
|
||||
-;
|
||||
+ AND geometrytype(g) NOT LIKE 'TIN%';
|
||||
|
||||
-COPY tm.geogs TO :tmpfile WITH BINARY;
|
||||
+-- export
|
||||
+\set command '\\copy tm.geogs TO ':tmpfile' WITH (FORMAT BINARY)'
|
||||
+:command
|
||||
+
|
||||
+-- import
|
||||
CREATE TABLE tm.geogs_in AS SELECT * FROM tm.geogs LIMIT 0;
|
||||
-COPY tm.geogs_in FROM :tmpfile WITH BINARY;
|
||||
-SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o WHERE i.id = o.id
|
||||
- AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
|
||||
+\set command '\\copy tm.geogs_in FROM ':tmpfile' WITH (FORMAT BINARY)'
|
||||
+:command
|
||||
+
|
||||
+SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o
|
||||
+WHERE i.id = o.id AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
|
||||
|
||||
DROP SCHEMA tm CASCADE;
|
||||
+
|
||||
diff --git a/regress/core/tests.mk b/regress/core/tests.mk
|
||||
index 9e05244..a63a3e1 100644
|
||||
--- a/regress/core/tests.mk
|
||||
+++ b/regress/core/tests.mk
|
||||
@@ -16,14 +16,13 @@ POSTGIS_PGSQL_VERSION=170
|
||||
POSTGIS_GEOS_VERSION=31101
|
||||
HAVE_JSON=yes
|
||||
HAVE_SPGIST=yes
|
||||
-INTERRUPTTESTS=yes
|
||||
+INTERRUPTTESTS=no
|
||||
|
||||
current_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
RUNTESTFLAGS_INTERNAL += \
|
||||
--before-upgrade-script $(top_srcdir)/regress/hooks/hook-before-upgrade.sql \
|
||||
--after-upgrade-script $(top_srcdir)/regress/hooks/hook-after-upgrade.sql \
|
||||
- --after-create-script $(top_srcdir)/regress/hooks/hook-after-create.sql \
|
||||
--before-uninstall-script $(top_srcdir)/regress/hooks/hook-before-uninstall.sql
|
||||
|
||||
TESTS += \
|
||||
@@ -40,7 +39,6 @@ TESTS += \
|
||||
$(top_srcdir)/regress/core/dumppoints \
|
||||
$(top_srcdir)/regress/core/dumpsegments \
|
||||
$(top_srcdir)/regress/core/empty \
|
||||
- $(top_srcdir)/regress/core/estimatedextent \
|
||||
$(top_srcdir)/regress/core/forcecurve \
|
||||
$(top_srcdir)/regress/core/flatgeobuf \
|
||||
$(top_srcdir)/regress/core/frechet \
|
||||
@@ -60,7 +58,6 @@ TESTS += \
|
||||
$(top_srcdir)/regress/core/out_marc21 \
|
||||
$(top_srcdir)/regress/core/in_encodedpolyline \
|
||||
$(top_srcdir)/regress/core/iscollection \
|
||||
- $(top_srcdir)/regress/core/legacy \
|
||||
$(top_srcdir)/regress/core/letters \
|
||||
$(top_srcdir)/regress/core/lwgeom_regress \
|
||||
$(top_srcdir)/regress/core/measures \
|
||||
@@ -119,7 +116,6 @@ TESTS += \
|
||||
$(top_srcdir)/regress/core/temporal_knn \
|
||||
$(top_srcdir)/regress/core/tickets \
|
||||
$(top_srcdir)/regress/core/twkb \
|
||||
- $(top_srcdir)/regress/core/typmod \
|
||||
$(top_srcdir)/regress/core/wkb \
|
||||
$(top_srcdir)/regress/core/wkt \
|
||||
$(top_srcdir)/regress/core/wmsservers \
|
||||
@@ -143,8 +139,7 @@ TESTS += \
|
||||
$(top_srcdir)/regress/core/oriented_envelope \
|
||||
$(top_srcdir)/regress/core/point_coordinates \
|
||||
$(top_srcdir)/regress/core/out_geojson \
|
||||
- $(top_srcdir)/regress/core/wrapx \
|
||||
- $(top_srcdir)/regress/core/computed_columns
|
||||
+ $(top_srcdir)/regress/core/wrapx
|
||||
|
||||
# Slow slow tests
|
||||
TESTS_SLOW = \
|
||||
diff --git a/regress/loader/tests.mk b/regress/loader/tests.mk
|
||||
index ac4f8ad..4bad4fc 100644
|
||||
--- a/regress/loader/tests.mk
|
||||
+++ b/regress/loader/tests.mk
|
||||
@@ -38,7 +38,5 @@ TESTS += \
|
||||
$(top_srcdir)/regress/loader/Latin1 \
|
||||
$(top_srcdir)/regress/loader/Latin1-implicit \
|
||||
$(top_srcdir)/regress/loader/mfile \
|
||||
- $(top_srcdir)/regress/loader/TestSkipANALYZE \
|
||||
- $(top_srcdir)/regress/loader/TestANALYZE \
|
||||
$(top_srcdir)/regress/loader/CharNoWidth \
|
||||
|
||||
diff --git a/regress/run_test.pl b/regress/run_test.pl
|
||||
index cac4b2e..4c7c82b 100755
|
||||
--- a/regress/run_test.pl
|
||||
+++ b/regress/run_test.pl
|
||||
@@ -238,7 +238,6 @@ $ENV{"LANG"} = "C";
|
||||
# Add locale info to the psql options
|
||||
# Add pg12 precision suppression
|
||||
my $PGOPTIONS = $ENV{"PGOPTIONS"};
|
||||
-$PGOPTIONS .= " -c lc_messages=C";
|
||||
$PGOPTIONS .= " -c client_min_messages=NOTICE";
|
||||
$PGOPTIONS .= " -c extra_float_digits=0";
|
||||
$ENV{"PGOPTIONS"} = $PGOPTIONS;
|
||||
diff --git a/topology/test/tests.mk b/topology/test/tests.mk
|
||||
index cbe2633..2c7c18f 100644
|
||||
--- a/topology/test/tests.mk
|
||||
+++ b/topology/test/tests.mk
|
||||
@@ -46,9 +46,7 @@ TESTS += \
|
||||
$(top_srcdir)/topology/test/regress/legacy_query.sql \
|
||||
$(top_srcdir)/topology/test/regress/legacy_validate.sql \
|
||||
$(top_srcdir)/topology/test/regress/polygonize.sql \
|
||||
- $(top_srcdir)/topology/test/regress/populate_topology_layer.sql \
|
||||
$(top_srcdir)/topology/test/regress/removeunusedprimitives.sql \
|
||||
- $(top_srcdir)/topology/test/regress/renametopogeometrycolumn.sql \
|
||||
$(top_srcdir)/topology/test/regress/renametopology.sql \
|
||||
$(top_srcdir)/topology/test/regress/share_sequences.sql \
|
||||
$(top_srcdir)/topology/test/regress/sqlmm.sql \
|
||||
46
docker-compose/ext-src/postgis-src/raster_outdb_template.sql
Normal file
46
docker-compose/ext-src/postgis-src/raster_outdb_template.sql
Normal file
File diff suppressed because one or more lines are too long
19
docker-compose/ext-src/postgis-src/regular-test.sh
Executable file
19
docker-compose/ext-src/postgis-src/regular-test.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
cd "$(dirname "${0}")"
|
||||
dropdb --if-exist contrib_regression
|
||||
createdb contrib_regression
|
||||
psql -d contrib_regression -c "ALTER DATABASE contrib_regression SET TimeZone='UTC'" \
|
||||
-c "ALTER DATABASE contrib_regression SET DateStyle='ISO, MDY'" \
|
||||
-c "CREATE EXTENSION postgis SCHEMA public" \
|
||||
-c "CREATE EXTENSION postgis_topology" \
|
||||
-c "CREATE EXTENSION postgis_tiger_geocoder CASCADE" \
|
||||
-c "CREATE EXTENSION postgis_raster SCHEMA public" \
|
||||
-c "CREATE EXTENSION postgis_sfcgal SCHEMA public"
|
||||
patch -p1 <postgis-no-upgrade-test.patch
|
||||
patch -p1 <"postgis-regular-${PG_VERSION}.patch"
|
||||
psql -d contrib_regression -f raster_outdb_template.sql
|
||||
trap 'patch -R -p1 <postgis-no-upgrade-test.patch && patch -R -p1 <"postgis-regular-${PG_VERSION}.patch"' EXIT
|
||||
#XXX remove after fix of using system libpq problem
|
||||
export LD_LIBRARY_PATH=/usr/local/pgsql/lib
|
||||
POSTGIS_REGRESS_DB=contrib_regression RUNTESTFLAGS=--nocreate make installcheck-base
|
||||
@@ -3,5 +3,3 @@ pg_distrib_dir='/usr/local/'
|
||||
listen_pg_addr='0.0.0.0:6400'
|
||||
listen_http_addr='0.0.0.0:9898'
|
||||
remote_storage={ endpoint='http://minio:9000', bucket_name='neon', bucket_region='eu-north-1', prefix_in_bucket='/pageserver' }
|
||||
control_plane_api='http://0.0.0.0:6666' # No storage controller in docker compose, specify a junk address
|
||||
control_plane_emergency_mode=true
|
||||
|
||||
@@ -38,6 +38,11 @@ Currently, the following metrics are collected:
|
||||
Amount of WAL produced , by a timeline, i.e. last_record_lsn
|
||||
This is an absolute, per-timeline metric.
|
||||
|
||||
- `resident_size`
|
||||
|
||||
Size of all the layer files in the tenant's directory on disk on the pageserver.
|
||||
This is an absolute, per-tenant metric.
|
||||
|
||||
- `remote_storage_size`
|
||||
|
||||
Size of the remote storage (S3) directory.
|
||||
|
||||
@@ -841,10 +841,6 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
|
||||
|
||||
let expected_end = match &end {
|
||||
ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF | Cancelled => true,
|
||||
// The timeline doesn't exist and we have been requested to not auto-create it.
|
||||
// Compute requests for timelines that haven't been created yet
|
||||
// might reach us before the storcon request to create those timelines.
|
||||
TimelineNoCreate => true,
|
||||
CopyStreamHandlerEnd::Disconnected(ConnectionError::Io(io_error))
|
||||
if is_expected_io_error(io_error) =>
|
||||
{
|
||||
@@ -1063,8 +1059,6 @@ pub enum CopyStreamHandlerEnd {
|
||||
Terminate,
|
||||
#[error("EOF on COPY stream")]
|
||||
EOF,
|
||||
#[error("timeline not found, and allow_timeline_creation is false")]
|
||||
TimelineNoCreate,
|
||||
/// The connection was lost
|
||||
#[error("connection error: {0}")]
|
||||
Disconnected(#[from] ConnectionError),
|
||||
|
||||
@@ -303,8 +303,7 @@ pub struct PullTimelineRequest {
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PullTimelineResponse {
|
||||
/// Donor safekeeper host.
|
||||
/// None if no pull happened because the timeline already exists.
|
||||
pub safekeeper_host: Option<String>,
|
||||
// Donor safekeeper host
|
||||
pub safekeeper_host: String,
|
||||
// TODO: add more fields?
|
||||
}
|
||||
|
||||
@@ -77,9 +77,7 @@ impl StorageModel {
|
||||
}
|
||||
|
||||
SizeResult {
|
||||
// If total_size is 0, it means that the tenant has all timelines offloaded; we need to report 1
|
||||
// here so that the data point shows up in the s3 files.
|
||||
total_size: total_size.max(1),
|
||||
total_size,
|
||||
segments: segment_results,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,6 @@ use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer, delta_layer, ima
|
||||
use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
|
||||
use pageserver::virtual_file::api::IoMode;
|
||||
use pageserver::{page_cache, virtual_file};
|
||||
use pageserver_api::key::Key;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use crate::layer_map_analyzer::parse_filename;
|
||||
@@ -28,7 +27,6 @@ pub(crate) enum LayerCmd {
|
||||
path: PathBuf,
|
||||
tenant: String,
|
||||
timeline: String,
|
||||
key: Option<Key>,
|
||||
},
|
||||
/// Dump all information of a layer file
|
||||
DumpLayer {
|
||||
@@ -102,7 +100,6 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
path,
|
||||
tenant,
|
||||
timeline,
|
||||
key,
|
||||
} => {
|
||||
let timeline_path = path
|
||||
.join(TENANTS_SEGMENT_NAME)
|
||||
@@ -110,37 +107,21 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
.join(TIMELINES_SEGMENT_NAME)
|
||||
.join(timeline);
|
||||
let mut idx = 0;
|
||||
let mut to_print = Vec::default();
|
||||
for layer in fs::read_dir(timeline_path)? {
|
||||
let layer = layer?;
|
||||
if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) {
|
||||
if let Some(key) = key {
|
||||
if layer_file.key_range.start <= *key && *key < layer_file.key_range.end {
|
||||
to_print.push((idx, layer_file));
|
||||
}
|
||||
} else {
|
||||
to_print.push((idx, layer_file));
|
||||
}
|
||||
println!(
|
||||
"[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}",
|
||||
idx,
|
||||
layer_file.key_range.start,
|
||||
layer_file.key_range.end,
|
||||
layer_file.lsn_range.start,
|
||||
layer_file.lsn_range.end,
|
||||
layer_file.is_delta,
|
||||
);
|
||||
idx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if key.is_some() {
|
||||
to_print
|
||||
.sort_by_key(|(_idx, layer_file)| std::cmp::Reverse(layer_file.lsn_range.end));
|
||||
}
|
||||
|
||||
for (idx, layer_file) in to_print {
|
||||
println!(
|
||||
"[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}",
|
||||
idx,
|
||||
layer_file.key_range.start,
|
||||
layer_file.key_range.end,
|
||||
layer_file.lsn_range.start,
|
||||
layer_file.lsn_range.end,
|
||||
layer_file.is_delta,
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
LayerCmd::DumpLayer {
|
||||
|
||||
@@ -504,7 +504,7 @@ fn start_pageserver(
|
||||
// Set up deletion queue
|
||||
let (deletion_queue, deletion_workers) = DeletionQueue::new(
|
||||
remote_storage.clone(),
|
||||
StorageControllerUpcallClient::new(conf, &shutdown_pageserver),
|
||||
StorageControllerUpcallClient::new(conf, &shutdown_pageserver)?,
|
||||
conf,
|
||||
);
|
||||
deletion_workers.spawn_with(BACKGROUND_RUNTIME.handle());
|
||||
|
||||
@@ -150,7 +150,7 @@ pub struct PageServerConf {
|
||||
/// not terrible.
|
||||
pub background_task_maximum_delay: Duration,
|
||||
|
||||
pub control_plane_api: Url,
|
||||
pub control_plane_api: Option<Url>,
|
||||
|
||||
/// JWT token for use with the control plane API.
|
||||
pub control_plane_api_token: Option<SecretString>,
|
||||
@@ -438,8 +438,7 @@ impl PageServerConf {
|
||||
test_remote_failures,
|
||||
ondemand_download_behavior_treat_error_as_warn,
|
||||
background_task_maximum_delay,
|
||||
control_plane_api: control_plane_api
|
||||
.ok_or_else(|| anyhow::anyhow!("`control_plane_api` must be set"))?,
|
||||
control_plane_api,
|
||||
control_plane_emergency_mode,
|
||||
heatmap_upload_concurrency,
|
||||
secondary_download_concurrency,
|
||||
@@ -574,7 +573,6 @@ impl PageServerConf {
|
||||
background_task_maximum_delay: Duration::ZERO,
|
||||
load_previous_heatmap: Some(true),
|
||||
generate_unarchival_heatmap: Some(true),
|
||||
control_plane_api: Some(Url::parse("http://localhost:6666").unwrap()),
|
||||
..Default::default()
|
||||
};
|
||||
PageServerConf::parse_and_validate(NodeId(0), config_toml, &repo_dir).unwrap()
|
||||
@@ -643,12 +641,9 @@ mod tests {
|
||||
use super::PageServerConf;
|
||||
|
||||
#[test]
|
||||
fn test_minimal_config_toml_is_valid() {
|
||||
// The minimal valid config for running a pageserver:
|
||||
// - control_plane_api is mandatory, as pageservers cannot run in isolation
|
||||
// - we use Default impl of everything else in this situation
|
||||
fn test_empty_config_toml_is_valid() {
|
||||
// we use Default impl of everything in this situation
|
||||
let input = r#"
|
||||
control_plane_api = "http://localhost:6666"
|
||||
"#;
|
||||
let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
|
||||
.expect("empty config is valid");
|
||||
|
||||
@@ -30,6 +30,9 @@ pub(super) enum Name {
|
||||
/// Tenant remote size
|
||||
#[serde(rename = "remote_storage_size")]
|
||||
RemoteSize,
|
||||
/// Tenant resident size
|
||||
#[serde(rename = "resident_size")]
|
||||
ResidentSize,
|
||||
/// Tenant synthetic size
|
||||
#[serde(rename = "synthetic_storage_size")]
|
||||
SyntheticSize,
|
||||
@@ -184,6 +187,18 @@ impl MetricsKey {
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// Sum of [`Timeline::resident_physical_size`] for each `Tenant`.
|
||||
///
|
||||
/// [`Timeline::resident_physical_size`]: crate::tenant::Timeline::resident_physical_size
|
||||
const fn resident_size(tenant_id: TenantId) -> AbsoluteValueFactory {
|
||||
MetricsKey {
|
||||
tenant_id,
|
||||
timeline_id: None,
|
||||
metric: Name::ResidentSize,
|
||||
}
|
||||
.absolute_values()
|
||||
}
|
||||
|
||||
/// [`TenantShard::cached_synthetic_size`] as refreshed by [`calculate_synthetic_size_worker`].
|
||||
///
|
||||
/// [`TenantShard::cached_synthetic_size`]: crate::tenant::TenantShard::cached_synthetic_size
|
||||
@@ -246,7 +261,10 @@ where
|
||||
let mut tenants = std::pin::pin!(tenants);
|
||||
|
||||
while let Some((tenant_id, tenant)) = tenants.next().await {
|
||||
let mut tenant_resident_size = 0;
|
||||
|
||||
let timelines = tenant.list_timelines();
|
||||
let timelines_len = timelines.len();
|
||||
for timeline in timelines {
|
||||
let timeline_id = timeline.timeline_id;
|
||||
|
||||
@@ -269,9 +287,16 @@ where
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
tenant_resident_size += timeline.resident_physical_size();
|
||||
}
|
||||
|
||||
let snap = TenantSnapshot::collect(&tenant);
|
||||
if timelines_len == 0 {
|
||||
// Force set it to 1 byte to avoid not being reported -- all timelines are offloaded.
|
||||
tenant_resident_size = 1;
|
||||
}
|
||||
|
||||
let snap = TenantSnapshot::collect(&tenant, tenant_resident_size);
|
||||
snap.to_metrics(tenant_id, Utc::now(), cache, &mut current_metrics);
|
||||
}
|
||||
|
||||
@@ -280,14 +305,19 @@ where
|
||||
|
||||
/// In-between abstraction to allow testing metrics without actual Tenants.
|
||||
struct TenantSnapshot {
|
||||
resident_size: u64,
|
||||
remote_size: u64,
|
||||
synthetic_size: u64,
|
||||
}
|
||||
|
||||
impl TenantSnapshot {
|
||||
/// Collect tenant status to have metrics created out of it.
|
||||
fn collect(t: &Arc<crate::tenant::TenantShard>) -> Self {
|
||||
///
|
||||
/// `resident_size` is calculated of the timelines we had access to for other metrics, so we
|
||||
/// cannot just list timelines here.
|
||||
fn collect(t: &Arc<crate::tenant::TenantShard>, resident_size: u64) -> Self {
|
||||
TenantSnapshot {
|
||||
resident_size,
|
||||
remote_size: t.remote_size(),
|
||||
// Note that this metric is calculated in a separate bgworker
|
||||
// Here we only use cached value, which may lag behind the real latest one
|
||||
@@ -304,6 +334,8 @@ impl TenantSnapshot {
|
||||
) {
|
||||
let remote_size = MetricsKey::remote_storage_size(tenant_id).at(now, self.remote_size);
|
||||
|
||||
let resident_size = MetricsKey::resident_size(tenant_id).at(now, self.resident_size);
|
||||
|
||||
let synthetic_size = {
|
||||
let factory = MetricsKey::synthetic_size(tenant_id);
|
||||
let mut synthetic_size = self.synthetic_size;
|
||||
@@ -323,7 +355,11 @@ impl TenantSnapshot {
|
||||
}
|
||||
};
|
||||
|
||||
metrics.extend([Some(remote_size), synthetic_size].into_iter().flatten());
|
||||
metrics.extend(
|
||||
[Some(remote_size), Some(resident_size), synthetic_size]
|
||||
.into_iter()
|
||||
.flatten(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -224,6 +224,7 @@ fn post_restart_synthetic_size_uses_cached_if_available() {
|
||||
let tenant_id = TenantId::generate();
|
||||
|
||||
let ts = TenantSnapshot {
|
||||
resident_size: 1000,
|
||||
remote_size: 1000,
|
||||
// not yet calculated
|
||||
synthetic_size: 0,
|
||||
@@ -244,6 +245,7 @@ fn post_restart_synthetic_size_uses_cached_if_available() {
|
||||
metrics,
|
||||
&[
|
||||
MetricsKey::remote_storage_size(tenant_id).at(now, 1000),
|
||||
MetricsKey::resident_size(tenant_id).at(now, 1000),
|
||||
MetricsKey::synthetic_size(tenant_id).at(now, 1000),
|
||||
]
|
||||
);
|
||||
@@ -254,6 +256,7 @@ fn post_restart_synthetic_size_is_not_sent_when_not_cached() {
|
||||
let tenant_id = TenantId::generate();
|
||||
|
||||
let ts = TenantSnapshot {
|
||||
resident_size: 1000,
|
||||
remote_size: 1000,
|
||||
// not yet calculated
|
||||
synthetic_size: 0,
|
||||
@@ -271,6 +274,7 @@ fn post_restart_synthetic_size_is_not_sent_when_not_cached() {
|
||||
metrics,
|
||||
&[
|
||||
MetricsKey::remote_storage_size(tenant_id).at(now, 1000),
|
||||
MetricsKey::resident_size(tenant_id).at(now, 1000),
|
||||
// no synthetic size here
|
||||
]
|
||||
);
|
||||
@@ -291,13 +295,14 @@ pub(crate) const fn metric_examples_old(
|
||||
timeline_id: TimelineId,
|
||||
now: DateTime<Utc>,
|
||||
before: DateTime<Utc>,
|
||||
) -> [RawMetric; 5] {
|
||||
) -> [RawMetric; 6] {
|
||||
[
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id)
|
||||
.from_until_old_format(before, now, 0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at_old_format(now, 0),
|
||||
MetricsKey::remote_storage_size(tenant_id).at_old_format(now, 0),
|
||||
MetricsKey::resident_size(tenant_id).at_old_format(now, 0),
|
||||
MetricsKey::synthetic_size(tenant_id).at_old_format(now, 1),
|
||||
]
|
||||
}
|
||||
@@ -307,12 +312,13 @@ pub(crate) const fn metric_examples(
|
||||
timeline_id: TimelineId,
|
||||
now: DateTime<Utc>,
|
||||
before: DateTime<Utc>,
|
||||
) -> [NewRawMetric; 5] {
|
||||
) -> [NewRawMetric; 6] {
|
||||
[
|
||||
MetricsKey::written_size(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::written_size_delta(tenant_id, timeline_id).from_until(before, now, 0),
|
||||
MetricsKey::timeline_logical_size(tenant_id, timeline_id).at(now, 0),
|
||||
MetricsKey::remote_storage_size(tenant_id).at(now, 0),
|
||||
MetricsKey::resident_size(tenant_id).at(now, 0),
|
||||
MetricsKey::synthetic_size(tenant_id).at(now, 1),
|
||||
]
|
||||
}
|
||||
|
||||
@@ -521,6 +521,10 @@ mod tests {
|
||||
line!(),
|
||||
r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"remote_storage_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000"}"#,
|
||||
),
|
||||
(
|
||||
line!(),
|
||||
r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"resident_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":0,"tenant_id":"00000000000000000000000000000000"}"#,
|
||||
),
|
||||
(
|
||||
line!(),
|
||||
r#"{"type":"absolute","time":"2023-09-15T00:00:00.123456789Z","metric":"synthetic_storage_size","idempotency_key":"2023-09-15 00:00:00.123456789 UTC-1-0000","value":1,"tenant_id":"00000000000000000000000000000000"}"#,
|
||||
@@ -560,7 +564,7 @@ mod tests {
|
||||
assert_eq!(upgraded_samples, new_samples);
|
||||
}
|
||||
|
||||
fn metric_samples_old() -> [RawMetric; 5] {
|
||||
fn metric_samples_old() -> [RawMetric; 6] {
|
||||
let tenant_id = TenantId::from_array([0; 16]);
|
||||
let timeline_id = TimelineId::from_array([0xff; 16]);
|
||||
|
||||
@@ -572,7 +576,7 @@ mod tests {
|
||||
super::super::metrics::metric_examples_old(tenant_id, timeline_id, now, before)
|
||||
}
|
||||
|
||||
fn metric_samples() -> [NewRawMetric; 5] {
|
||||
fn metric_samples() -> [NewRawMetric; 6] {
|
||||
let tenant_id = TenantId::from_array([0; 16]);
|
||||
let timeline_id = TimelineId::from_array([0xff; 16]);
|
||||
|
||||
|
||||
@@ -58,8 +58,14 @@ pub trait StorageControllerUpcallApi {
|
||||
impl StorageControllerUpcallClient {
|
||||
/// A None return value indicates that the input `conf` object does not have control
|
||||
/// plane API enabled.
|
||||
pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Self {
|
||||
let mut url = conf.control_plane_api.clone();
|
||||
pub fn new(
|
||||
conf: &'static PageServerConf,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Option<Self>, reqwest::Error> {
|
||||
let mut url = match conf.control_plane_api.as_ref() {
|
||||
Some(u) => u.clone(),
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
if let Ok(mut segs) = url.path_segments_mut() {
|
||||
// This ensures that `url` ends with a slash if it doesn't already.
|
||||
@@ -79,17 +85,15 @@ impl StorageControllerUpcallClient {
|
||||
}
|
||||
|
||||
for cert in &conf.ssl_ca_certs {
|
||||
client = client.add_root_certificate(
|
||||
Certificate::from_der(cert.contents()).expect("Invalid certificate in config"),
|
||||
);
|
||||
client = client.add_root_certificate(Certificate::from_der(cert.contents())?);
|
||||
}
|
||||
|
||||
Self {
|
||||
http_client: client.build().expect("Failed to construct HTTP client"),
|
||||
Ok(Some(Self {
|
||||
http_client: client.build()?,
|
||||
base_url: url,
|
||||
node_id: conf.id,
|
||||
cancel: cancel.clone(),
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip_all)]
|
||||
|
||||
@@ -585,7 +585,7 @@ impl DeletionQueue {
|
||||
/// we don't spawn those inside new() so that the caller can use their runtime/spans of choice.
|
||||
pub fn new<C>(
|
||||
remote_storage: GenericRemoteStorage,
|
||||
controller_upcall_client: C,
|
||||
controller_upcall_client: Option<C>,
|
||||
conf: &'static PageServerConf,
|
||||
) -> (Self, DeletionQueueWorkers<C>)
|
||||
where
|
||||
@@ -701,7 +701,7 @@ mod test {
|
||||
async fn restart(&mut self) {
|
||||
let (deletion_queue, workers) = DeletionQueue::new(
|
||||
self.storage.clone(),
|
||||
self.mock_control_plane.clone(),
|
||||
Some(self.mock_control_plane.clone()),
|
||||
self.harness.conf,
|
||||
);
|
||||
|
||||
@@ -821,8 +821,11 @@ mod test {
|
||||
|
||||
let mock_control_plane = MockStorageController::new();
|
||||
|
||||
let (deletion_queue, worker) =
|
||||
DeletionQueue::new(storage.clone(), mock_control_plane.clone(), harness.conf);
|
||||
let (deletion_queue, worker) = DeletionQueue::new(
|
||||
storage.clone(),
|
||||
Some(mock_control_plane.clone()),
|
||||
harness.conf,
|
||||
);
|
||||
|
||||
let worker_join = worker.spawn_with(&tokio::runtime::Handle::current());
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ where
|
||||
tx: tokio::sync::mpsc::Sender<DeleterMessage>,
|
||||
|
||||
// Client for calling into control plane API for validation of deletes
|
||||
controller_upcall_client: C,
|
||||
controller_upcall_client: Option<C>,
|
||||
|
||||
// DeletionLists which are waiting generation validation. Not safe to
|
||||
// execute until [`validate`] has processed them.
|
||||
@@ -86,7 +86,7 @@ where
|
||||
conf: &'static PageServerConf,
|
||||
rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,
|
||||
tx: tokio::sync::mpsc::Sender<DeleterMessage>,
|
||||
controller_upcall_client: C,
|
||||
controller_upcall_client: Option<C>,
|
||||
lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
|
||||
cancel: CancellationToken,
|
||||
) -> Self {
|
||||
@@ -137,16 +137,20 @@ where
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let tenants_valid = match self
|
||||
.controller_upcall_client
|
||||
.validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())
|
||||
.await
|
||||
{
|
||||
Ok(tenants) => tenants,
|
||||
Err(RetryForeverError::ShuttingDown) => {
|
||||
// The only way a validation call returns an error is when the cancellation token fires
|
||||
return Err(DeletionQueueError::ShuttingDown);
|
||||
let tenants_valid = if let Some(controller_upcall_client) = &self.controller_upcall_client {
|
||||
match controller_upcall_client
|
||||
.validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())
|
||||
.await
|
||||
{
|
||||
Ok(tenants) => tenants,
|
||||
Err(RetryForeverError::ShuttingDown) => {
|
||||
// The only way a validation call returns an error is when the cancellation token fires
|
||||
return Err(DeletionQueueError::ShuttingDown);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Control plane API disabled. In legacy mode we consider everything valid.
|
||||
tenant_generations.keys().map(|k| (*k, true)).collect()
|
||||
};
|
||||
|
||||
let mut validated_sequence: Option<u64> = None;
|
||||
|
||||
@@ -497,24 +497,6 @@ pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy<IntCounter> = Lazy::n
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static ONDEMAND_DOWNLOAD_BYTES: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_ondemand_download_bytes_total",
|
||||
"Total bytes of layers on-demand downloaded",
|
||||
&["task_kind"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static ONDEMAND_DOWNLOAD_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
register_int_counter_vec!(
|
||||
"pageserver_ondemand_download_count",
|
||||
"Total count of layers on-demand downloaded",
|
||||
&["task_kind"]
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) mod wait_ondemand_download_time {
|
||||
use super::*;
|
||||
const WAIT_ONDEMAND_DOWNLOAD_TIME_BUCKETS: &[f64] = &[
|
||||
@@ -1792,12 +1774,8 @@ static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
/// Per-timeline smgr histogram buckets should be the same as the compute buckets, such that the
|
||||
/// metrics are comparable across compute and Pageserver. See also:
|
||||
/// <https://github.com/neondatabase/neon/blob/1a87975d956a8ad17ec8b85da32a137ec4893fcc/pgxn/neon/neon_perf_counters.h#L18-L27>
|
||||
/// <https://github.com/neondatabase/flux-fleet/blob/556182a939edda87ff1d85a6b02e5cec901e0e9e/apps/base/compute-metrics/scrape-compute-sql-exporter.yaml#L29-L35>
|
||||
static SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS: &[f64] =
|
||||
&[0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.1, 1.0, 3.0];
|
||||
// Alias so all histograms recording per-timeline smgr timings use the same buckets.
|
||||
static SMGR_QUERY_TIME_PER_TENANT_TIMELINE_BUCKETS: &[f64] = CRITICAL_OP_BUCKETS;
|
||||
|
||||
static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
|
||||
register_histogram_vec!(
|
||||
@@ -2198,10 +2176,6 @@ impl BasebackupQueryTimeOngoingRecording<'_> {
|
||||
// If you want to change categorize of a specific error, also change it in `log_query_error`.
|
||||
let metric = match res {
|
||||
Ok(_) => &self.parent.ok,
|
||||
Err(QueryError::Shutdown) => {
|
||||
// Do not observe ok/err for shutdown
|
||||
return;
|
||||
}
|
||||
Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
|
||||
if is_expected_io_error(io_error) =>
|
||||
{
|
||||
|
||||
@@ -1035,25 +1035,10 @@ impl PageServerHandler {
|
||||
// avoid a somewhat costly Span::record() by constructing the entire span in one go.
|
||||
macro_rules! mkspan {
|
||||
(before shard routing) => {{
|
||||
tracing::info_span!(
|
||||
parent: &parent_span,
|
||||
"handle_get_page_request",
|
||||
rel = %req.rel,
|
||||
blkno = %req.blkno,
|
||||
req_lsn = %req.hdr.request_lsn,
|
||||
not_modified_since_lsn = %req.hdr.not_modified_since
|
||||
)
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn)
|
||||
}};
|
||||
($shard_id:expr) => {{
|
||||
tracing::info_span!(
|
||||
parent: &parent_span,
|
||||
"handle_get_page_request",
|
||||
rel = %req.rel,
|
||||
blkno = %req.blkno,
|
||||
req_lsn = %req.hdr.request_lsn,
|
||||
not_modified_since_lsn = %req.hdr.not_modified_since,
|
||||
shard_id = %$shard_id
|
||||
)
|
||||
tracing::info_span!(parent: &parent_span, "handle_get_page_request", rel = %req.rel, blkno = %req.blkno, req_lsn = %req.hdr.request_lsn, shard_id = %$shard_id)
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -1117,7 +1102,6 @@ impl PageServerHandler {
|
||||
shard_id = %shard.get_shard_identity().shard_slug(),
|
||||
timeline_id = %timeline_id,
|
||||
lsn = %req.hdr.request_lsn,
|
||||
not_modified_since_lsn = %req.hdr.not_modified_since,
|
||||
request_id = %req.hdr.reqid,
|
||||
key = %key,
|
||||
)
|
||||
|
||||
@@ -1084,17 +1084,8 @@ impl Timeline {
|
||||
let mut result = HashMap::new();
|
||||
for (k, v) in kv {
|
||||
let v = v?;
|
||||
if v.is_empty() {
|
||||
// This is a tombstone -- we can skip it.
|
||||
// Originally, the replorigin code uses `Lsn::INVALID` to represent a tombstone. However, as it part of
|
||||
// the sparse keyspace and the sparse keyspace uses an empty image to universally represent a tombstone,
|
||||
// we also need to consider that. Such tombstones might be written on the detach ancestor code path to
|
||||
// avoid the value going into the child branch. (See [`crate::tenant::timeline::detach_ancestor::generate_tombstone_image_layer`] for more details.)
|
||||
continue;
|
||||
}
|
||||
let origin_id = k.field6 as RepOriginId;
|
||||
let origin_lsn = Lsn::des(&v)
|
||||
.with_context(|| format!("decode replorigin value for {}: {v:?}", origin_id))?;
|
||||
let origin_lsn = Lsn::des(&v).unwrap();
|
||||
if origin_lsn != Lsn::INVALID {
|
||||
result.insert(origin_id, origin_lsn);
|
||||
}
|
||||
@@ -2587,11 +2578,6 @@ impl DatadirModification<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn put_for_unit_test(&mut self, key: Key, val: Value) {
|
||||
self.put(key, val);
|
||||
}
|
||||
|
||||
fn put(&mut self, key: Key, val: Value) {
|
||||
if Self::is_data_key(&key) {
|
||||
self.put_data(key.to_compact(), val)
|
||||
|
||||
@@ -4254,7 +4254,9 @@ impl TenantShard {
|
||||
deletion_queue_client: DeletionQueueClient,
|
||||
l0_flush_global_state: L0FlushGlobalState,
|
||||
) -> TenantShard {
|
||||
assert!(!attached_conf.location.generation.is_none());
|
||||
debug_assert!(
|
||||
!attached_conf.location.generation.is_none() || conf.control_plane_api.is_none()
|
||||
);
|
||||
|
||||
let (state, mut rx) = watch::channel(state);
|
||||
|
||||
@@ -5947,9 +5949,7 @@ mod tests {
|
||||
use itertools::Itertools;
|
||||
#[cfg(feature = "testing")]
|
||||
use models::CompactLsnRange;
|
||||
use pageserver_api::key::{
|
||||
AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX, repl_origin_key,
|
||||
};
|
||||
use pageserver_api::key::{AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX};
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
#[cfg(feature = "testing")]
|
||||
use pageserver_api::keyspace::KeySpaceRandomAccum;
|
||||
@@ -8185,54 +8185,6 @@ mod tests {
|
||||
assert_eq!(files.get("pg_logical/mappings/test2"), None);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_repl_origin_tombstones() {
|
||||
let harness = TenantHarness::create("test_repl_origin_tombstones")
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let (tenant, ctx) = harness.load().await;
|
||||
let io_concurrency = IoConcurrency::spawn_for_test();
|
||||
|
||||
let mut lsn = Lsn(0x08);
|
||||
|
||||
let tline: Arc<Timeline> = tenant
|
||||
.create_test_timeline(TIMELINE_ID, lsn, DEFAULT_PG_VERSION, &ctx)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let repl_lsn = Lsn(0x10);
|
||||
{
|
||||
lsn += 8;
|
||||
let mut modification = tline.begin_modification(lsn);
|
||||
modification.put_for_unit_test(repl_origin_key(2), Value::Image(Bytes::new()));
|
||||
modification.set_replorigin(1, repl_lsn).await.unwrap();
|
||||
modification.commit(&ctx).await.unwrap();
|
||||
}
|
||||
|
||||
// we can read everything from the storage
|
||||
let repl_origins = tline
|
||||
.get_replorigins(lsn, &ctx, io_concurrency.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(repl_origins.len(), 1);
|
||||
assert_eq!(repl_origins[&1], lsn);
|
||||
|
||||
{
|
||||
lsn += 8;
|
||||
let mut modification = tline.begin_modification(lsn);
|
||||
modification.put_for_unit_test(
|
||||
repl_origin_key(3),
|
||||
Value::Image(Bytes::copy_from_slice(b"cannot_decode_this")),
|
||||
);
|
||||
modification.commit(&ctx).await.unwrap();
|
||||
}
|
||||
let result = tline
|
||||
.get_replorigins(lsn, &ctx, io_concurrency.clone())
|
||||
.await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_metadata_image_creation() -> anyhow::Result<()> {
|
||||
let harness = TenantHarness::create("test_metadata_image_creation").await?;
|
||||
|
||||
@@ -346,8 +346,7 @@ async fn init_load_generations(
|
||||
"Emergency mode! Tenants will be attached unsafely using their last known generation"
|
||||
);
|
||||
emergency_generations(tenant_confs)
|
||||
} else {
|
||||
let client = StorageControllerUpcallClient::new(conf, cancel);
|
||||
} else if let Some(client) = StorageControllerUpcallClient::new(conf, cancel)? {
|
||||
info!("Calling {} API to re-attach tenants", client.base_url());
|
||||
// If we are configured to use the control plane API, then it is the source of truth for what tenants to load.
|
||||
match client.re_attach(conf).await {
|
||||
@@ -361,6 +360,9 @@ async fn init_load_generations(
|
||||
anyhow::bail!("Shut down while waiting for control plane re-attach response")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
info!("Control plane API not configured, tenant generations are disabled");
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
// The deletion queue needs to know about the startup attachment state to decide which (if any) stored
|
||||
@@ -1151,8 +1153,17 @@ impl TenantManager {
|
||||
// Testing hack: if we are configured with no control plane, then drop the generation
|
||||
// from upserts. This enables creating generation-less tenants even though neon_local
|
||||
// always uses generations when calling the location conf API.
|
||||
let attached_conf = AttachedTenantConf::try_from(new_location_config)
|
||||
.map_err(UpsertLocationError::BadRequest)?;
|
||||
let attached_conf = if cfg!(feature = "testing") {
|
||||
let mut conf = AttachedTenantConf::try_from(new_location_config)
|
||||
.map_err(UpsertLocationError::BadRequest)?;
|
||||
if self.conf.control_plane_api.is_none() {
|
||||
conf.location.generation = Generation::none();
|
||||
}
|
||||
conf
|
||||
} else {
|
||||
AttachedTenantConf::try_from(new_location_config)
|
||||
.map_err(UpsertLocationError::BadRequest)?
|
||||
};
|
||||
|
||||
let tenant = tenant_spawn(
|
||||
self.conf,
|
||||
|
||||
@@ -4,7 +4,6 @@ use std::sync::{Arc, Weak};
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use crate::PERF_TRACE_TARGET;
|
||||
use crate::metrics::{ONDEMAND_DOWNLOAD_BYTES, ONDEMAND_DOWNLOAD_COUNT};
|
||||
use anyhow::Context;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
@@ -1256,14 +1255,6 @@ impl LayerInner {
|
||||
|
||||
self.access_stats.record_residence_event();
|
||||
|
||||
let task_kind: &'static str = ctx.task_kind().into();
|
||||
ONDEMAND_DOWNLOAD_BYTES
|
||||
.with_label_values(&[task_kind])
|
||||
.inc_by(self.desc.file_size);
|
||||
ONDEMAND_DOWNLOAD_COUNT
|
||||
.with_label_values(&[task_kind])
|
||||
.inc();
|
||||
|
||||
Ok(self.initialize_after_layer_is_on_disk(permit))
|
||||
}
|
||||
Err(e) => {
|
||||
|
||||
@@ -178,7 +178,7 @@ impl Attempt {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn generate_tombstone_image_layer(
|
||||
async fn generate_tombstone_image_layer(
|
||||
detached: &Arc<Timeline>,
|
||||
ancestor: &Arc<Timeline>,
|
||||
ancestor_lsn: Lsn,
|
||||
|
||||
@@ -163,7 +163,8 @@ pub async fn doit(
|
||||
// Ensure at-least-once delivery of the upcall to storage controller
|
||||
// before we mark the task as done and never come here again.
|
||||
//
|
||||
let storcon_client = StorageControllerUpcallClient::new(timeline.conf, &cancel);
|
||||
let storcon_client = StorageControllerUpcallClient::new(timeline.conf, &cancel)?
|
||||
.expect("storcon configured");
|
||||
storcon_client
|
||||
.put_timeline_import_status(
|
||||
timeline.tenant_shard_id,
|
||||
|
||||
@@ -36,8 +36,6 @@ DATA = \
|
||||
neon--1.2--1.3.sql \
|
||||
neon--1.3--1.4.sql \
|
||||
neon--1.4--1.5.sql \
|
||||
neon--1.5--1.6.sql \
|
||||
neon--1.6--1.5.sql \
|
||||
neon--1.5--1.4.sql \
|
||||
neon--1.4--1.3.sql \
|
||||
neon--1.3--1.2.sql \
|
||||
|
||||
@@ -88,6 +88,9 @@ typedef PGAlignedBlock PGIOAlignedBlock;
|
||||
|
||||
page_server_api *page_server;
|
||||
|
||||
static uint32 local_request_counter;
|
||||
#define GENERATE_REQUEST_ID() (((NeonRequestId)MyProcPid << 32) | ++local_request_counter)
|
||||
|
||||
/*
|
||||
* Various settings related to prompt (fast) handling of PageStream responses
|
||||
* at any CHECK_FOR_INTERRUPTS point.
|
||||
@@ -687,14 +690,8 @@ prefetch_wait_for(uint64 ring_index)
|
||||
END_PREFETCH_RECEIVE_WORK();
|
||||
CHECK_FOR_INTERRUPTS();
|
||||
}
|
||||
if (result)
|
||||
{
|
||||
/* Check that slot is actually received (srver can be disconnected in prefetch_pump_state called from CHECK_FOR_INTERRUPTS */
|
||||
PrefetchRequest *slot = GetPrfSlot(ring_index);
|
||||
return slot->status == PRFS_RECEIVED;
|
||||
}
|
||||
return false;
|
||||
;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -791,27 +788,6 @@ prefetch_read(PrefetchRequest *slot)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Wait completion of previosly registered prefetch request.
|
||||
* Prefetch result should be placed in LFC by prefetch_wait_for.
|
||||
*/
|
||||
bool
|
||||
communicator_prefetch_receive(BufferTag tag)
|
||||
{
|
||||
PrfHashEntry *entry;
|
||||
PrefetchRequest hashkey;
|
||||
|
||||
hashkey.buftag = tag;
|
||||
entry = prfh_lookup(MyPState->prf_hash, &hashkey);
|
||||
if (entry != NULL && prefetch_wait_for(entry->slot->my_ring_index))
|
||||
{
|
||||
prefetch_set_unused(entry->slot->my_ring_index);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disconnect hook - drop prefetches when the connection drops
|
||||
*
|
||||
@@ -930,6 +906,7 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
|
||||
|
||||
NeonGetPageRequest request = {
|
||||
.hdr.tag = T_NeonGetPageRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
/* lsn and not_modified_since are filled in below */
|
||||
.rinfo = BufTagGetNRelFileInfo(slot->buftag),
|
||||
.forknum = slot->buftag.forkNum,
|
||||
@@ -938,6 +915,8 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
|
||||
|
||||
Assert(mySlotNo == MyPState->ring_unused);
|
||||
|
||||
slot->reqid = request.hdr.reqid;
|
||||
|
||||
if (force_request_lsns)
|
||||
slot->request_lsns = *force_request_lsns;
|
||||
else
|
||||
@@ -955,7 +934,6 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
|
||||
Assert(mySlotNo == MyPState->ring_unused);
|
||||
/* loop */
|
||||
}
|
||||
slot->reqid = request.hdr.reqid;
|
||||
|
||||
/* update prefetch state */
|
||||
MyPState->n_requests_inflight += 1;
|
||||
@@ -1959,6 +1937,7 @@ communicator_exists(NRelFileInfo rinfo, ForkNumber forkNum, neon_request_lsns *r
|
||||
{
|
||||
NeonExistsRequest request = {
|
||||
.hdr.tag = T_NeonExistsRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsns->request_lsn,
|
||||
.hdr.not_modified_since = request_lsns->not_modified_since,
|
||||
.rinfo = rinfo,
|
||||
@@ -2233,6 +2212,7 @@ communicator_nblocks(NRelFileInfo rinfo, ForkNumber forknum, neon_request_lsns *
|
||||
{
|
||||
NeonNblocksRequest request = {
|
||||
.hdr.tag = T_NeonNblocksRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsns->request_lsn,
|
||||
.hdr.not_modified_since = request_lsns->not_modified_since,
|
||||
.rinfo = rinfo,
|
||||
@@ -2305,6 +2285,7 @@ communicator_dbsize(Oid dbNode, neon_request_lsns *request_lsns)
|
||||
{
|
||||
NeonDbSizeRequest request = {
|
||||
.hdr.tag = T_NeonDbSizeRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsns->request_lsn,
|
||||
.hdr.not_modified_since = request_lsns->not_modified_since,
|
||||
.dbNode = dbNode,
|
||||
@@ -2372,6 +2353,7 @@ communicator_read_slru_segment(SlruKind kind, int64 segno, neon_request_lsns *re
|
||||
|
||||
request = (NeonGetSlruSegmentRequest) {
|
||||
.hdr.tag = T_NeonGetSlruSegmentRequest,
|
||||
.hdr.reqid = GENERATE_REQUEST_ID(),
|
||||
.hdr.lsn = request_lsns->request_lsn,
|
||||
.hdr.not_modified_since = request_lsns->not_modified_since,
|
||||
.kind = kind,
|
||||
|
||||
@@ -37,8 +37,6 @@ extern int communicator_prefetch_lookupv(NRelFileInfo rinfo, ForkNumber forknum,
|
||||
BlockNumber nblocks, void **buffers, bits8 *mask);
|
||||
extern void communicator_prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
|
||||
BlockNumber nblocks, const bits8 *mask);
|
||||
extern bool communicator_prefetch_receive(BufferTag tag);
|
||||
|
||||
extern int communicator_read_slru_segment(SlruKind kind, int64 segno,
|
||||
neon_request_lsns *request_lsns,
|
||||
void *buffer);
|
||||
|
||||
@@ -25,7 +25,6 @@
|
||||
#include "pgstat.h"
|
||||
#include "port/pg_iovec.h"
|
||||
#include "postmaster/bgworker.h"
|
||||
#include "postmaster/interrupt.h"
|
||||
#include RELFILEINFO_HDR
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/fd.h"
|
||||
@@ -33,8 +32,6 @@
|
||||
#include "storage/latch.h"
|
||||
#include "storage/lwlock.h"
|
||||
#include "storage/pg_shmem.h"
|
||||
#include "storage/procsignal.h"
|
||||
#include "tcop/tcopprot.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/dynahash.h"
|
||||
#include "utils/guc.h"
|
||||
@@ -49,8 +46,6 @@
|
||||
#include "neon.h"
|
||||
#include "neon_lwlsncache.h"
|
||||
#include "neon_perf_counters.h"
|
||||
#include "pagestore_client.h"
|
||||
#include "communicator.h"
|
||||
|
||||
#define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "LFC: assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)
|
||||
|
||||
@@ -92,13 +87,14 @@
|
||||
* 1Mb chunks can reduce hash map size to 320Mb.
|
||||
* 2. Improve access locality, subsequent pages will be allocated together improving seqscan speed
|
||||
*/
|
||||
#define MAX_BLOCKS_PER_CHUNK_LOG 7 /* 1Mb chunk */
|
||||
#define MAX_BLOCKS_PER_CHUNK (1 << MAX_BLOCKS_PER_CHUNK_LOG)
|
||||
|
||||
#define BLOCKS_PER_CHUNK 128 /* 1Mb chunk */
|
||||
/*
|
||||
* Smaller chunk seems to be better for OLTP workload
|
||||
*/
|
||||
// #define BLOCKS_PER_CHUNK 8 /* 64kb chunk */
|
||||
#define MB ((uint64)1024*1024)
|
||||
|
||||
#define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ >> lfc_chunk_size_log))
|
||||
#define BLOCK_TO_CHUNK_OFF(blkno) ((blkno) & (lfc_blocks_per_chunk-1))
|
||||
#define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ / BLOCKS_PER_CHUNK))
|
||||
|
||||
/*
|
||||
* Blocks are read or written to LFC file outside LFC critical section.
|
||||
@@ -123,26 +119,16 @@ typedef struct FileCacheEntry
|
||||
uint32 hash;
|
||||
uint32 offset;
|
||||
uint32 access_count;
|
||||
uint32 state[(BLOCKS_PER_CHUNK + 31) / 32 * 2]; /* two bits per block */
|
||||
dlist_node list_node; /* LRU/holes list node */
|
||||
uint32 state[FLEXIBLE_ARRAY_MEMBER]; /* two bits per block */
|
||||
} FileCacheEntry;
|
||||
|
||||
#define FILE_CACHE_ENRTY_SIZE MAXALIGN(offsetof(FileCacheEntry, state) + (lfc_blocks_per_chunk*2+31)/32*4)
|
||||
#define GET_STATE(entry, i) (((entry)->state[(i) / 16] >> ((i) % 16 * 2)) & 3)
|
||||
#define SET_STATE(entry, i, new_state) (entry)->state[(i) / 16] = ((entry)->state[(i) / 16] & ~(3 << ((i) % 16 * 2))) | ((new_state) << ((i) % 16 * 2))
|
||||
|
||||
#define N_COND_VARS 64
|
||||
#define CV_WAIT_TIMEOUT 10
|
||||
|
||||
#define MAX_PREWARM_WORKERS 8
|
||||
|
||||
typedef struct PrewarmWorkerState
|
||||
{
|
||||
uint32 prewarmed_pages;
|
||||
uint32 skipped_pages;
|
||||
TimestampTz completed;
|
||||
} PrewarmWorkerState;
|
||||
|
||||
typedef struct FileCacheControl
|
||||
{
|
||||
uint64 generation; /* generation is needed to handle correct hash
|
||||
@@ -150,7 +136,6 @@ typedef struct FileCacheControl
|
||||
uint32 size; /* size of cache file in chunks */
|
||||
uint32 used; /* number of used chunks */
|
||||
uint32 used_pages; /* number of used pages */
|
||||
uint32 pinned; /* number of pinned chunks */
|
||||
uint32 limit; /* shared copy of lfc_size_limit */
|
||||
uint64 hits;
|
||||
uint64 misses;
|
||||
@@ -164,43 +149,23 @@ typedef struct FileCacheControl
|
||||
dlist_head holes; /* double linked list of punched holes */
|
||||
HyperLogLogState wss_estimation; /* estimation of working set size */
|
||||
ConditionVariable cv[N_COND_VARS]; /* turnstile of condition variables */
|
||||
PrewarmWorkerState prewarm_workers[MAX_PREWARM_WORKERS];
|
||||
size_t n_prewarm_workers;
|
||||
size_t n_prewarm_entries;
|
||||
size_t total_prewarm_pages;
|
||||
size_t prewarm_batch;
|
||||
bool prewarm_active;
|
||||
bool prewarm_canceled;
|
||||
dsm_handle prewarm_lfc_state_handle;
|
||||
} FileCacheControl;
|
||||
|
||||
#define FILE_CACHE_STATE_MAGIC 0xfcfcfcfc
|
||||
|
||||
#define FILE_CACHE_STATE_BITMAP(fcs) ((uint8*)&(fcs)->chunks[(fcs)->n_chunks])
|
||||
#define FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_chunks) (sizeof(FileCacheState) + (n_chunks)*sizeof(BufferTag) + (((n_chunks) * lfc_blocks_per_chunk)+7)/8)
|
||||
#define FILE_CACHE_STATE_SIZE(fcs) (sizeof(FileCacheState) + (fcs->n_chunks)*sizeof(BufferTag) + (((fcs->n_chunks) << fcs->chunk_size_log)+7)/8)
|
||||
bool lfc_store_prefetch_result;
|
||||
|
||||
static HTAB *lfc_hash;
|
||||
static int lfc_desc = -1;
|
||||
static LWLockId lfc_lock;
|
||||
static int lfc_max_size;
|
||||
static int lfc_size_limit;
|
||||
static int lfc_prewarm_limit;
|
||||
static int lfc_prewarm_batch;
|
||||
static int lfc_chunk_size_log = MAX_BLOCKS_PER_CHUNK_LOG;
|
||||
static int lfc_blocks_per_chunk = MAX_BLOCKS_PER_CHUNK;
|
||||
static char *lfc_path;
|
||||
static uint64 lfc_generation;
|
||||
static FileCacheControl *lfc_ctl;
|
||||
static bool lfc_do_prewarm;
|
||||
static shmem_startup_hook_type prev_shmem_startup_hook;
|
||||
#if PG_VERSION_NUM>=150000
|
||||
static shmem_request_hook_type prev_shmem_request_hook;
|
||||
#endif
|
||||
|
||||
bool lfc_store_prefetch_result;
|
||||
bool lfc_prewarm_update_ws_estimation;
|
||||
|
||||
#define LFC_ENABLED() (lfc_ctl->limit != 0)
|
||||
|
||||
/*
|
||||
@@ -241,9 +206,7 @@ lfc_switch_off(void)
|
||||
}
|
||||
lfc_ctl->generation += 1;
|
||||
lfc_ctl->size = 0;
|
||||
lfc_ctl->pinned = 0;
|
||||
lfc_ctl->used = 0;
|
||||
lfc_ctl->used_pages = 0;
|
||||
lfc_ctl->limit = 0;
|
||||
dlist_init(&lfc_ctl->lru);
|
||||
dlist_init(&lfc_ctl->holes);
|
||||
@@ -333,7 +296,7 @@ lfc_shmem_startup(void)
|
||||
|
||||
lfc_lock = (LWLockId) GetNamedLWLockTranche("lfc_lock");
|
||||
info.keysize = sizeof(BufferTag);
|
||||
info.entrysize = FILE_CACHE_ENRTY_SIZE;
|
||||
info.entrysize = sizeof(FileCacheEntry);
|
||||
|
||||
/*
|
||||
* n_chunks+1 because we add new element to hash table before eviction
|
||||
@@ -379,7 +342,7 @@ lfc_shmem_request(void)
|
||||
prev_shmem_request_hook();
|
||||
#endif
|
||||
|
||||
RequestAddinShmemSpace(sizeof(FileCacheControl) + hash_estimate_size(SIZE_MB_TO_CHUNKS(lfc_max_size) + 1, FILE_CACHE_ENRTY_SIZE));
|
||||
RequestAddinShmemSpace(sizeof(FileCacheControl) + hash_estimate_size(SIZE_MB_TO_CHUNKS(lfc_max_size) + 1, sizeof(FileCacheEntry)));
|
||||
RequestNamedLWLockTranche("lfc_lock", 1);
|
||||
}
|
||||
|
||||
@@ -396,24 +359,6 @@ is_normal_backend(void)
|
||||
return lfc_ctl && MyProc && UsedShmemSegAddr && !IsParallelWorker();
|
||||
}
|
||||
|
||||
static bool
|
||||
lfc_check_chunk_size(int *newval, void **extra, GucSource source)
|
||||
{
|
||||
if (*newval & (*newval - 1))
|
||||
{
|
||||
elog(ERROR, "LFC chunk size should be power of two");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
lfc_change_chunk_size(int newval, void* extra)
|
||||
{
|
||||
lfc_chunk_size_log = pg_ceil_log2_32(newval);
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
lfc_check_limit_hook(int *newval, void **extra, GucSource source)
|
||||
{
|
||||
@@ -470,11 +415,11 @@ lfc_change_limit_hook(int newval, void *extra)
|
||||
|
||||
CriticalAssert(victim->access_count == 0);
|
||||
#ifdef FALLOC_FL_PUNCH_HOLE
|
||||
if (fallocate(lfc_desc, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, (off_t) victim->offset * lfc_blocks_per_chunk * BLCKSZ, lfc_blocks_per_chunk * BLCKSZ) < 0)
|
||||
if (fallocate(lfc_desc, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, (off_t) victim->offset * BLOCKS_PER_CHUNK * BLCKSZ, BLOCKS_PER_CHUNK * BLCKSZ) < 0)
|
||||
neon_log(LOG, "Failed to punch hole in file: %m");
|
||||
#endif
|
||||
/* We remove the old entry, and re-enter a hole to the hash table */
|
||||
for (int i = 0; i < lfc_blocks_per_chunk; i++)
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
{
|
||||
bool is_page_cached = GET_STATE(victim, i) == AVAILABLE;
|
||||
lfc_ctl->used_pages -= is_page_cached;
|
||||
@@ -526,17 +471,6 @@ lfc_init(void)
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
DefineCustomBoolVariable("neon.prewarm_update_ws_estimation",
|
||||
"Consider prewarmed pages for working set estimation",
|
||||
NULL,
|
||||
&lfc_prewarm_update_ws_estimation,
|
||||
true,
|
||||
PGC_SUSET,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
DefineCustomIntVariable("neon.max_file_cache_size",
|
||||
"Maximal size of Neon local file cache",
|
||||
NULL,
|
||||
@@ -574,45 +508,6 @@ lfc_init(void)
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
DefineCustomIntVariable("neon.file_cache_chunk_size",
|
||||
"LFC chunk size in blocks (should be power of two)",
|
||||
NULL,
|
||||
&lfc_blocks_per_chunk,
|
||||
MAX_BLOCKS_PER_CHUNK,
|
||||
1,
|
||||
MAX_BLOCKS_PER_CHUNK,
|
||||
PGC_POSTMASTER,
|
||||
GUC_UNIT_BLOCKS,
|
||||
lfc_check_chunk_size,
|
||||
lfc_change_chunk_size,
|
||||
NULL);
|
||||
|
||||
DefineCustomIntVariable("neon.file_cache_prewarm_limit",
|
||||
"Maximal number of prewarmed chunks",
|
||||
NULL,
|
||||
&lfc_prewarm_limit,
|
||||
INT_MAX, /* no limit by default */
|
||||
0,
|
||||
INT_MAX,
|
||||
PGC_SIGHUP,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
DefineCustomIntVariable("neon.file_cache_prewarm_batch",
|
||||
"Number of pages retrivied by prewarm from page server",
|
||||
NULL,
|
||||
&lfc_prewarm_batch,
|
||||
64,
|
||||
1,
|
||||
INT_MAX,
|
||||
PGC_SIGHUP,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
if (lfc_max_size == 0)
|
||||
return;
|
||||
|
||||
@@ -626,317 +521,6 @@ lfc_init(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
FileCacheState*
|
||||
lfc_get_state(size_t max_entries)
|
||||
{
|
||||
FileCacheState* fcs = NULL;
|
||||
|
||||
if (lfc_maybe_disabled() || max_entries == 0) /* fast exit if file cache is disabled */
|
||||
return NULL;
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_SHARED);
|
||||
|
||||
if (LFC_ENABLED())
|
||||
{
|
||||
dlist_iter iter;
|
||||
size_t i = 0;
|
||||
uint8* bitmap;
|
||||
size_t n_pages = 0;
|
||||
size_t n_entries = Min(max_entries, lfc_ctl->used - lfc_ctl->pinned);
|
||||
size_t state_size = FILE_CACHE_STATE_SIZE_FOR_CHUNKS(n_entries);
|
||||
fcs = (FileCacheState*)palloc0(state_size);
|
||||
SET_VARSIZE(fcs, state_size);
|
||||
fcs->magic = FILE_CACHE_STATE_MAGIC;
|
||||
fcs->chunk_size_log = lfc_chunk_size_log;
|
||||
fcs->n_chunks = n_entries;
|
||||
bitmap = FILE_CACHE_STATE_BITMAP(fcs);
|
||||
|
||||
dlist_reverse_foreach(iter, &lfc_ctl->lru)
|
||||
{
|
||||
FileCacheEntry *entry = dlist_container(FileCacheEntry, list_node, iter.cur);
|
||||
fcs->chunks[i] = entry->key;
|
||||
for (int j = 0; j < lfc_blocks_per_chunk; j++)
|
||||
{
|
||||
if (GET_STATE(entry, j) != UNAVAILABLE)
|
||||
{
|
||||
BITMAP_SET(bitmap, i*lfc_blocks_per_chunk + j);
|
||||
n_pages += 1;
|
||||
}
|
||||
}
|
||||
if (++i == n_entries)
|
||||
break;
|
||||
}
|
||||
Assert(i == n_entries);
|
||||
fcs->n_pages = n_pages;
|
||||
Assert(pg_popcount((char*)bitmap, ((n_entries << lfc_chunk_size_log) + 7)/8) == n_pages);
|
||||
elog(LOG, "LFC: save state of %d chunks %d pages", (int)n_entries, (int)n_pages);
|
||||
}
|
||||
|
||||
LWLockRelease(lfc_lock);
|
||||
|
||||
return fcs;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prewarm LFC cache to the specified state. It uses lfc_prefetch function to load prewarmed page without hoilding shared buffer lock
|
||||
* and avoid race conditions with other backends.
|
||||
*/
|
||||
void
|
||||
lfc_prewarm(FileCacheState* fcs, uint32 n_workers)
|
||||
{
|
||||
size_t fcs_chunk_size_log;
|
||||
size_t n_entries;
|
||||
size_t prewarm_batch = Min(lfc_prewarm_batch, readahead_buffer_size);
|
||||
size_t fcs_size;
|
||||
dsm_segment *seg;
|
||||
BackgroundWorkerHandle* bgw_handle[MAX_PREWARM_WORKERS];
|
||||
|
||||
|
||||
if (!lfc_ensure_opened())
|
||||
return;
|
||||
|
||||
if (prewarm_batch == 0 || lfc_prewarm_limit == 0 || n_workers == 0)
|
||||
{
|
||||
elog(LOG, "LFC: prewarm is disabled");
|
||||
return;
|
||||
}
|
||||
|
||||
if (n_workers > MAX_PREWARM_WORKERS)
|
||||
{
|
||||
elog(ERROR, "LFC: Too much prewarm workers, maximum is %d", MAX_PREWARM_WORKERS);
|
||||
}
|
||||
|
||||
if (fcs == NULL || fcs->n_chunks == 0)
|
||||
{
|
||||
elog(LOG, "LFC: nothing to prewarm");
|
||||
return;
|
||||
}
|
||||
|
||||
if (fcs->magic != FILE_CACHE_STATE_MAGIC)
|
||||
{
|
||||
elog(ERROR, "LFC: Invalid file cache state magic: %X", fcs->magic);
|
||||
}
|
||||
|
||||
fcs_size = VARSIZE(fcs);
|
||||
if (FILE_CACHE_STATE_SIZE(fcs) != fcs_size)
|
||||
{
|
||||
elog(ERROR, "LFC: Invalid file cache state size: %u vs. %u", (unsigned)FILE_CACHE_STATE_SIZE(fcs), VARSIZE(fcs));
|
||||
}
|
||||
|
||||
fcs_chunk_size_log = fcs->chunk_size_log;
|
||||
if (fcs_chunk_size_log > MAX_BLOCKS_PER_CHUNK_LOG)
|
||||
{
|
||||
elog(ERROR, "LFC: Invalid chunk size log: %u", fcs->chunk_size_log);
|
||||
}
|
||||
|
||||
n_entries = Min(fcs->n_chunks, lfc_prewarm_limit);
|
||||
Assert(n_entries != 0);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
|
||||
/* Do not prewarm more entries than LFC limit */
|
||||
if (lfc_ctl->limit <= lfc_ctl->size)
|
||||
{
|
||||
elog(LOG, "LFC: skip prewarm because LFC is already filled");
|
||||
LWLockRelease(lfc_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
if (lfc_ctl->prewarm_active)
|
||||
{
|
||||
LWLockRelease(lfc_lock);
|
||||
elog(ERROR, "LFC: skip prewarm because another prewarm is still active");
|
||||
}
|
||||
lfc_ctl->n_prewarm_entries = n_entries;
|
||||
lfc_ctl->n_prewarm_workers = n_workers;
|
||||
lfc_ctl->prewarm_active = true;
|
||||
lfc_ctl->prewarm_canceled = false;
|
||||
lfc_ctl->prewarm_batch = prewarm_batch;
|
||||
memset(lfc_ctl->prewarm_workers, 0, n_workers*sizeof(PrewarmWorkerState));
|
||||
|
||||
LWLockRelease(lfc_lock);
|
||||
|
||||
/* Calculate total number of pages to be prewarmed */
|
||||
lfc_ctl->total_prewarm_pages = fcs->n_pages;
|
||||
|
||||
seg = dsm_create(fcs_size, 0);
|
||||
memcpy(dsm_segment_address(seg), fcs, fcs_size);
|
||||
lfc_ctl->prewarm_lfc_state_handle = dsm_segment_handle(seg);
|
||||
|
||||
/* Spawn background workers */
|
||||
for (uint32 i = 0; i < n_workers; i++)
|
||||
{
|
||||
BackgroundWorker worker = {0};
|
||||
|
||||
worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
|
||||
worker.bgw_start_time = BgWorkerStart_ConsistentState;
|
||||
worker.bgw_restart_time = BGW_NEVER_RESTART;
|
||||
strcpy(worker.bgw_library_name, "neon");
|
||||
strcpy(worker.bgw_function_name, "lfc_prewarm_main");
|
||||
snprintf(worker.bgw_name, BGW_MAXLEN, "LFC prewarm worker %d", i+1);
|
||||
strcpy(worker.bgw_type, "LFC prewarm worker");
|
||||
worker.bgw_main_arg = Int32GetDatum(i);
|
||||
/* must set notify PID to wait for shutdown */
|
||||
worker.bgw_notify_pid = MyProcPid;
|
||||
|
||||
if (!RegisterDynamicBackgroundWorker(&worker, &bgw_handle[i]))
|
||||
{
|
||||
ereport(LOG,
|
||||
(errcode(ERRCODE_INSUFFICIENT_RESOURCES),
|
||||
errmsg("LFC: registering dynamic bgworker prewarm failed"),
|
||||
errhint("Consider increasing the configuration parameter \"%s\".", "max_worker_processes")));
|
||||
n_workers = i;
|
||||
lfc_ctl->prewarm_canceled = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32 i = 0; i < n_workers; i++)
|
||||
{
|
||||
bool interrupted;
|
||||
do
|
||||
{
|
||||
interrupted = false;
|
||||
PG_TRY();
|
||||
{
|
||||
BgwHandleStatus status = WaitForBackgroundWorkerShutdown(bgw_handle[i]);
|
||||
if (status != BGWH_STOPPED && status != BGWH_POSTMASTER_DIED)
|
||||
{
|
||||
elog(LOG, "LFC: Unexpected status of prewarm worker termination: %d", status);
|
||||
}
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
elog(LOG, "LFC: cancel prewarm");
|
||||
lfc_ctl->prewarm_canceled = true;
|
||||
interrupted = true;
|
||||
}
|
||||
PG_END_TRY();
|
||||
} while (interrupted);
|
||||
|
||||
if (!lfc_ctl->prewarm_workers[i].completed)
|
||||
{
|
||||
/* Background worker doesn't set completion time: it means that it was abnormally terminated */
|
||||
elog(LOG, "LFC: prewarm worker %d failed", i+1);
|
||||
/* Set completion time to prevent get_prewarm_info from considering this worker as active */
|
||||
lfc_ctl->prewarm_workers[i].completed = GetCurrentTimestamp();
|
||||
}
|
||||
}
|
||||
dsm_detach(seg);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
|
||||
lfc_ctl->prewarm_active = false;
|
||||
LWLockRelease(lfc_lock);
|
||||
}
|
||||
|
||||
void
|
||||
lfc_prewarm_main(Datum main_arg)
|
||||
{
|
||||
size_t snd_idx = 0, rcv_idx = 0;
|
||||
size_t n_sent = 0, n_received = 0;
|
||||
size_t fcs_chunk_size_log;
|
||||
size_t max_prefetch_pages;
|
||||
size_t prewarm_batch;
|
||||
size_t n_workers;
|
||||
dsm_segment *seg;
|
||||
FileCacheState* fcs;
|
||||
uint8* bitmap;
|
||||
BufferTag tag;
|
||||
PrewarmWorkerState* ws;
|
||||
uint32 worker_id = DatumGetInt32(main_arg);
|
||||
|
||||
pqsignal(SIGTERM, die);
|
||||
BackgroundWorkerUnblockSignals();
|
||||
|
||||
seg = dsm_attach(lfc_ctl->prewarm_lfc_state_handle);
|
||||
if (seg == NULL)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("could not map dynamic shared memory segment")));
|
||||
|
||||
fcs = (FileCacheState*) dsm_segment_address(seg);
|
||||
prewarm_batch = lfc_ctl->prewarm_batch;
|
||||
fcs_chunk_size_log = fcs->chunk_size_log;
|
||||
n_workers = lfc_ctl->n_prewarm_workers;
|
||||
max_prefetch_pages = lfc_ctl->n_prewarm_entries << fcs_chunk_size_log;
|
||||
ws = &lfc_ctl->prewarm_workers[worker_id];
|
||||
bitmap = FILE_CACHE_STATE_BITMAP(fcs);
|
||||
|
||||
/* enable prefetch in LFC */
|
||||
lfc_store_prefetch_result = true;
|
||||
lfc_do_prewarm = true; /* Flag for lfc_prefetch preventing replacement of existed entries if LFC cache is full */
|
||||
|
||||
elog(LOG, "LFC: worker %d start prewarming", worker_id);
|
||||
while (!lfc_ctl->prewarm_canceled)
|
||||
{
|
||||
if (snd_idx < max_prefetch_pages)
|
||||
{
|
||||
if ((snd_idx >> fcs_chunk_size_log) % n_workers != worker_id)
|
||||
{
|
||||
/* If there are multiple workers, split chunks between them */
|
||||
snd_idx += 1 << fcs_chunk_size_log;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (BITMAP_ISSET(bitmap, snd_idx))
|
||||
{
|
||||
tag = fcs->chunks[snd_idx >> fcs_chunk_size_log];
|
||||
tag.blockNum += snd_idx & ((1 << fcs_chunk_size_log) - 1);
|
||||
if (!lfc_cache_contains(BufTagGetNRelFileInfo(tag), tag.forkNum, tag.blockNum))
|
||||
{
|
||||
(void)communicator_prefetch_register_bufferv(tag, NULL, 1, NULL);
|
||||
n_sent += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ws->skipped_pages += 1;
|
||||
BITMAP_CLR(bitmap, snd_idx);
|
||||
}
|
||||
}
|
||||
snd_idx += 1;
|
||||
}
|
||||
}
|
||||
if (n_sent >= n_received + prewarm_batch || snd_idx == max_prefetch_pages)
|
||||
{
|
||||
if (n_received == n_sent && snd_idx == max_prefetch_pages)
|
||||
{
|
||||
break;
|
||||
}
|
||||
if ((rcv_idx >> fcs_chunk_size_log) % n_workers != worker_id)
|
||||
{
|
||||
/* Skip chunks processed by other workers */
|
||||
rcv_idx += 1 << fcs_chunk_size_log;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Locate next block to prefetch */
|
||||
while (!BITMAP_ISSET(bitmap, rcv_idx))
|
||||
{
|
||||
rcv_idx += 1;
|
||||
}
|
||||
tag = fcs->chunks[rcv_idx >> fcs_chunk_size_log];
|
||||
tag.blockNum += rcv_idx & ((1 << fcs_chunk_size_log) - 1);
|
||||
if (communicator_prefetch_receive(tag))
|
||||
{
|
||||
ws->prewarmed_pages += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
ws->skipped_pages += 1;
|
||||
}
|
||||
rcv_idx += 1;
|
||||
n_received += 1;
|
||||
}
|
||||
}
|
||||
/* No need to perform prefetch cleanup here because prewarm worker will be terminated and
|
||||
* connection to PS dropped just after return from this function.
|
||||
*/
|
||||
Assert(n_sent == n_received || lfc_ctl->prewarm_canceled);
|
||||
elog(LOG, "LFC: worker %d complete prewarming: loaded %ld pages", worker_id, (long)n_received);
|
||||
lfc_ctl->prewarm_workers[worker_id].completed = GetCurrentTimestamp();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Check if page is present in the cache.
|
||||
* Returns true if page is found in local cache.
|
||||
@@ -946,7 +530,7 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
|
||||
{
|
||||
BufferTag tag;
|
||||
FileCacheEntry *entry;
|
||||
int chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);
|
||||
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
|
||||
bool found = false;
|
||||
uint32 hash;
|
||||
|
||||
@@ -955,7 +539,7 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
|
||||
|
||||
CopyNRelFileInfoToBufTag(tag, rinfo);
|
||||
tag.forkNum = forkNum;
|
||||
tag.blockNum = blkno - chunk_offs;
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
|
||||
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
@@ -993,9 +577,9 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
|
||||
chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);
|
||||
tag.blockNum = blkno - chunk_offs;
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_SHARED);
|
||||
|
||||
@@ -1006,12 +590,12 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
}
|
||||
while (true)
|
||||
{
|
||||
int this_chunk = Min(nblocks - i, lfc_blocks_per_chunk - chunk_offs);
|
||||
int this_chunk = Min(nblocks - i, BLOCKS_PER_CHUNK - chunk_offs);
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
|
||||
|
||||
if (entry != NULL)
|
||||
{
|
||||
for (; chunk_offs < lfc_blocks_per_chunk && i < nblocks; chunk_offs++, i++)
|
||||
for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
|
||||
{
|
||||
if (GET_STATE(entry, chunk_offs) != UNAVAILABLE)
|
||||
{
|
||||
@@ -1035,9 +619,9 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
* Prepare for the next iteration. We don't unlock here, as that'd
|
||||
* probably be more expensive than the gains it'd get us.
|
||||
*/
|
||||
chunk_offs = BLOCK_TO_CHUNK_OFF(blkno + i);
|
||||
tag.blockNum = (blkno + i) - chunk_offs;
|
||||
tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
|
||||
}
|
||||
|
||||
LWLockRelease(lfc_lock);
|
||||
@@ -1112,9 +696,9 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
while (nblocks > 0)
|
||||
{
|
||||
struct iovec iov[PG_IOV_MAX];
|
||||
uint8 chunk_mask[MAX_BLOCKS_PER_CHUNK / 8] = {0};
|
||||
int chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);
|
||||
int blocks_in_chunk = Min(nblocks, lfc_blocks_per_chunk - chunk_offs);
|
||||
int8 chunk_mask[BLOCKS_PER_CHUNK / 8] = {0};
|
||||
int chunk_offs = (blkno & (BLOCKS_PER_CHUNK - 1));
|
||||
int blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
|
||||
int iteration_hits = 0;
|
||||
int iteration_misses = 0;
|
||||
uint64 io_time_us = 0;
|
||||
@@ -1202,10 +786,8 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
/* Unlink entry from LRU list to pin it for the duration of IO operation */
|
||||
if (entry->access_count++ == 0)
|
||||
{
|
||||
lfc_ctl->pinned += 1;
|
||||
dlist_delete(&entry->list_node);
|
||||
}
|
||||
|
||||
generation = lfc_ctl->generation;
|
||||
entry_offset = entry->offset;
|
||||
|
||||
@@ -1254,7 +836,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
if (iteration_hits != 0)
|
||||
{
|
||||
/* chunk offset (# of pages) into the LFC file */
|
||||
off_t first_read_offset = (off_t) entry_offset * lfc_blocks_per_chunk;
|
||||
off_t first_read_offset = (off_t) entry_offset * BLOCKS_PER_CHUNK;
|
||||
int nwrite = iov_last_used - first_block_in_chunk_read;
|
||||
/* offset of first IOV */
|
||||
first_read_offset += chunk_offs + first_block_in_chunk_read;
|
||||
@@ -1302,10 +884,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
CriticalAssert(entry->access_count > 0);
|
||||
if (--entry->access_count == 0)
|
||||
{
|
||||
lfc_ctl->pinned -= 1;
|
||||
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1375,17 +954,14 @@ lfc_init_new_entry(FileCacheEntry* entry, uint32 hash)
|
||||
* If we can't (e.g. because all other slots are being accessed)
|
||||
* then we will remove this entry from the hash and continue
|
||||
* on to the next chunk, as we may not exceed the limit.
|
||||
*
|
||||
* While prewarming LFC we do not want to replace existed entries,
|
||||
* so we just stop prewarm is LFC cache is full.
|
||||
*/
|
||||
else if (!dlist_is_empty(&lfc_ctl->lru) && !lfc_do_prewarm)
|
||||
else if (!dlist_is_empty(&lfc_ctl->lru))
|
||||
{
|
||||
/* Cache overflow: evict least recently used chunk */
|
||||
FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node,
|
||||
dlist_pop_head_node(&lfc_ctl->lru));
|
||||
|
||||
for (int i = 0; i < lfc_blocks_per_chunk; i++)
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
{
|
||||
bool is_page_cached = GET_STATE(victim, i) == AVAILABLE;
|
||||
lfc_ctl->used_pages -= is_page_cached;
|
||||
@@ -1403,15 +979,14 @@ lfc_init_new_entry(FileCacheEntry* entry, uint32 hash)
|
||||
/* Can't add this chunk - we don't have the space for it */
|
||||
hash_search_with_hash_value(lfc_hash, &entry->key, hash,
|
||||
HASH_REMOVE, NULL);
|
||||
lfc_ctl->prewarm_canceled = true; /* cancel prewarm if LFC limit is reached */
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
entry->access_count = 1;
|
||||
entry->hash = hash;
|
||||
lfc_ctl->pinned += 1;
|
||||
|
||||
for (int i = 0; i < lfc_blocks_per_chunk; i++)
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
SET_STATE(entry, i, UNAVAILABLE);
|
||||
|
||||
return true;
|
||||
@@ -1456,7 +1031,7 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
FileCacheBlockState state;
|
||||
XLogRecPtr lwlsn;
|
||||
|
||||
int chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);
|
||||
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
|
||||
|
||||
if (lfc_maybe_disabled()) /* fast exit if file cache is disabled */
|
||||
return false;
|
||||
@@ -1466,7 +1041,7 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
|
||||
CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
|
||||
|
||||
tag.blockNum = blkno - chunk_offs;
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
cv = &lfc_ctl->cv[hash % N_COND_VARS];
|
||||
|
||||
@@ -1477,7 +1052,7 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
LWLockRelease(lfc_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
lwlsn = neon_get_lwlsn(rinfo, forknum, blkno);
|
||||
|
||||
if (lwlsn > lsn)
|
||||
@@ -1490,11 +1065,9 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
|
||||
entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
|
||||
|
||||
if (lfc_prewarm_update_ws_estimation)
|
||||
{
|
||||
tag.blockNum = blkno;
|
||||
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
|
||||
}
|
||||
tag.blockNum = blkno;
|
||||
addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
|
||||
|
||||
if (found)
|
||||
{
|
||||
state = GET_STATE(entry, chunk_offs);
|
||||
@@ -1508,10 +1081,7 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
* operation
|
||||
*/
|
||||
if (entry->access_count++ == 0)
|
||||
{
|
||||
lfc_ctl->pinned += 1;
|
||||
dlist_delete(&entry->list_node);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1536,7 +1106,7 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE);
|
||||
INSTR_TIME_SET_CURRENT(io_start);
|
||||
rc = pwrite(lfc_desc, buffer, BLCKSZ,
|
||||
((off_t) entry_offset * lfc_blocks_per_chunk + chunk_offs) * BLCKSZ);
|
||||
((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
|
||||
INSTR_TIME_SET_CURRENT(io_end);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
@@ -1562,10 +1132,7 @@ lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
inc_page_cache_write_wait(time_spent_us);
|
||||
|
||||
if (--entry->access_count == 0)
|
||||
{
|
||||
lfc_ctl->pinned -= 1;
|
||||
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
|
||||
}
|
||||
|
||||
state = GET_STATE(entry, chunk_offs);
|
||||
if (state == REQUESTED) {
|
||||
@@ -1632,8 +1199,8 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
while (nblocks > 0)
|
||||
{
|
||||
struct iovec iov[PG_IOV_MAX];
|
||||
int chunk_offs = BLOCK_TO_CHUNK_OFF(blkno);
|
||||
int blocks_in_chunk = Min(nblocks, lfc_blocks_per_chunk - chunk_offs);
|
||||
int chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
|
||||
int blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
|
||||
instr_time io_start, io_end;
|
||||
ConditionVariable* cv;
|
||||
|
||||
@@ -1645,7 +1212,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
iov[i].iov_len = BLCKSZ;
|
||||
}
|
||||
|
||||
tag.blockNum = blkno - chunk_offs;
|
||||
tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
|
||||
hash = get_hash_value(lfc_hash, &tag);
|
||||
cv = &lfc_ctl->cv[hash % N_COND_VARS];
|
||||
|
||||
@@ -1665,10 +1232,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
* operation
|
||||
*/
|
||||
if (entry->access_count++ == 0)
|
||||
{
|
||||
lfc_ctl->pinned += 1;
|
||||
dlist_delete(&entry->list_node);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1721,7 +1285,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_WRITE);
|
||||
INSTR_TIME_SET_CURRENT(io_start);
|
||||
rc = pwritev(lfc_desc, iov, blocks_in_chunk,
|
||||
((off_t) entry_offset * lfc_blocks_per_chunk + chunk_offs) * BLCKSZ);
|
||||
((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
|
||||
INSTR_TIME_SET_CURRENT(io_end);
|
||||
pgstat_report_wait_end();
|
||||
|
||||
@@ -1748,10 +1312,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
inc_page_cache_write_wait(time_spent_us);
|
||||
|
||||
if (--entry->access_count == 0)
|
||||
{
|
||||
lfc_ctl->pinned -= 1;
|
||||
dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
|
||||
}
|
||||
|
||||
for (int i = 0; i < blocks_in_chunk; i++)
|
||||
{
|
||||
@@ -1877,12 +1438,7 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
|
||||
break;
|
||||
case 8:
|
||||
key = "file_cache_chunk_size_pages";
|
||||
value = lfc_blocks_per_chunk;
|
||||
break;
|
||||
case 9:
|
||||
key = "file_cache_chunks_pinned";
|
||||
if (lfc_ctl)
|
||||
value = lfc_ctl->pinned;
|
||||
value = BLOCKS_PER_CHUNK;
|
||||
break;
|
||||
default:
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
@@ -2010,7 +1566,7 @@ local_cache_pages(PG_FUNCTION_ARGS)
|
||||
/* Skip hole tags */
|
||||
if (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0)
|
||||
{
|
||||
for (int i = 0; i < lfc_blocks_per_chunk; i++)
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
n_pages += GET_STATE(entry, i) == AVAILABLE;
|
||||
}
|
||||
}
|
||||
@@ -2038,13 +1594,13 @@ local_cache_pages(PG_FUNCTION_ARGS)
|
||||
hash_seq_init(&status, lfc_hash);
|
||||
while ((entry = hash_seq_search(&status)) != NULL)
|
||||
{
|
||||
for (int i = 0; i < lfc_blocks_per_chunk; i++)
|
||||
for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
|
||||
{
|
||||
if (NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)) != 0)
|
||||
{
|
||||
if (GET_STATE(entry, i) == AVAILABLE)
|
||||
{
|
||||
fctx->record[n].pageoffs = entry->offset * lfc_blocks_per_chunk + i;
|
||||
fctx->record[n].pageoffs = entry->offset * BLOCKS_PER_CHUNK + i;
|
||||
fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n].reltablespace = NInfoGetSpcOid(BufTagGetNRelFileInfo(entry->key));
|
||||
fctx->record[n].reldatabase = NInfoGetDbOid(BufTagGetNRelFileInfo(entry->key));
|
||||
@@ -2128,82 +1684,3 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
|
||||
}
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(get_local_cache_state);
|
||||
|
||||
Datum
|
||||
get_local_cache_state(PG_FUNCTION_ARGS)
|
||||
{
|
||||
size_t max_entries = PG_ARGISNULL(0) ? lfc_prewarm_limit : PG_GETARG_INT32(0);
|
||||
FileCacheState* fcs = lfc_get_state(max_entries);
|
||||
if (fcs != NULL)
|
||||
PG_RETURN_BYTEA_P((bytea*)fcs);
|
||||
else
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(prewarm_local_cache);
|
||||
|
||||
Datum
|
||||
prewarm_local_cache(PG_FUNCTION_ARGS)
|
||||
{
|
||||
bytea* state = PG_GETARG_BYTEA_PP(0);
|
||||
uint32 n_workers = PG_GETARG_INT32(1);
|
||||
FileCacheState* fcs = (FileCacheState*)state;
|
||||
|
||||
lfc_prewarm(fcs, n_workers);
|
||||
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(get_prewarm_info);
|
||||
|
||||
Datum
|
||||
get_prewarm_info(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Datum values[4];
|
||||
bool nulls[4];
|
||||
TupleDesc tupdesc;
|
||||
uint32 prewarmed_pages = 0;
|
||||
uint32 skipped_pages = 0;
|
||||
uint32 active_workers = 0;
|
||||
uint32 total_pages;
|
||||
size_t n_workers;
|
||||
|
||||
if (lfc_size_limit == 0)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
LWLockAcquire(lfc_lock, LW_SHARED);
|
||||
if (!lfc_ctl || lfc_ctl->n_prewarm_workers == 0)
|
||||
{
|
||||
LWLockRelease(lfc_lock);
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
n_workers = lfc_ctl->n_prewarm_workers;
|
||||
total_pages = lfc_ctl->total_prewarm_pages;
|
||||
for (size_t i = 0; i < n_workers; i++)
|
||||
{
|
||||
PrewarmWorkerState* ws = &lfc_ctl->prewarm_workers[i];
|
||||
prewarmed_pages += ws->prewarmed_pages;
|
||||
skipped_pages += ws->skipped_pages;
|
||||
active_workers += ws->completed != 0;
|
||||
}
|
||||
LWLockRelease(lfc_lock);
|
||||
|
||||
tupdesc = CreateTemplateTupleDesc(4);
|
||||
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "total_pages", INT4OID, -1, 0);
|
||||
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "prewarmed_pages", INT4OID, -1, 0);
|
||||
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "skipped_pages", INT4OID, -1, 0);
|
||||
TupleDescInitEntry(tupdesc, (AttrNumber) 4, "active_workers", INT4OID, -1, 0);
|
||||
tupdesc = BlessTupleDesc(tupdesc);
|
||||
|
||||
MemSet(nulls, 0, sizeof(nulls));
|
||||
|
||||
values[0] = Int32GetDatum(total_pages);
|
||||
values[1] = Int32GetDatum(prewarmed_pages);
|
||||
values[2] = Int32GetDatum(skipped_pages);
|
||||
values[3] = Int32GetDatum(active_workers);
|
||||
|
||||
PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
|
||||
}
|
||||
|
||||
|
||||
@@ -13,17 +13,6 @@
|
||||
|
||||
#include "neon_pgversioncompat.h"
|
||||
|
||||
typedef struct FileCacheState
|
||||
{
|
||||
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||
uint32 magic;
|
||||
uint32 n_chunks;
|
||||
uint32 n_pages;
|
||||
uint16 chunk_size_log;
|
||||
BufferTag chunks[FLEXIBLE_ARRAY_MEMBER];
|
||||
/* followed by bitmap */
|
||||
} FileCacheState;
|
||||
|
||||
/* GUCs */
|
||||
extern bool lfc_store_prefetch_result;
|
||||
|
||||
@@ -43,10 +32,7 @@ extern int lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum,
|
||||
extern void lfc_init(void);
|
||||
extern bool lfc_prefetch(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blkno,
|
||||
const void* buffer, XLogRecPtr lsn);
|
||||
extern FileCacheState* lfc_get_state(size_t max_entries);
|
||||
extern void lfc_prewarm(FileCacheState* fcs, uint32 n_workers);
|
||||
|
||||
PGDLLEXPORT void lfc_prewarm_main(Datum main_arg);
|
||||
|
||||
static inline bool
|
||||
lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
#include "portability/instr_time.h"
|
||||
#include "postmaster/interrupt.h"
|
||||
#include "storage/buf_internals.h"
|
||||
#include "storage/fd.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/lwlock.h"
|
||||
#include "storage/pg_shmem.h"
|
||||
@@ -49,6 +48,7 @@
|
||||
#define MIN_RECONNECT_INTERVAL_USEC 1000
|
||||
#define MAX_RECONNECT_INTERVAL_USEC 1000000
|
||||
|
||||
|
||||
enum NeonComputeMode {
|
||||
CP_MODE_PRIMARY = 0,
|
||||
CP_MODE_REPLICA,
|
||||
@@ -80,7 +80,6 @@ int neon_protocol_version = 3;
|
||||
static int neon_compute_mode = 0;
|
||||
static int max_reconnect_attempts = 60;
|
||||
static int stripe_size;
|
||||
static int max_sockets;
|
||||
|
||||
static int pageserver_response_log_timeout = 10000;
|
||||
/* 2.5 minutes. A bit higher than highest default TCP retransmission timeout */
|
||||
@@ -168,9 +167,6 @@ typedef struct
|
||||
WaitEventSet *wes_read;
|
||||
} PageServer;
|
||||
|
||||
static uint32 local_request_counter;
|
||||
#define GENERATE_REQUEST_ID() (((NeonRequestId)MyProcPid << 32) | ++local_request_counter)
|
||||
|
||||
static PageServer page_servers[MAX_SHARDS];
|
||||
|
||||
static bool pageserver_flush(shardno_t shard_no);
|
||||
@@ -338,13 +334,6 @@ load_shard_map(shardno_t shard_no, char *connstr_p, shardno_t *num_shards_p)
|
||||
pageserver_disconnect(i);
|
||||
}
|
||||
pagestore_local_counter = end_update_counter;
|
||||
|
||||
/* Reserve file descriptors for sockets */
|
||||
while (max_sockets < num_shards)
|
||||
{
|
||||
max_sockets += 1;
|
||||
ReserveExternalFD();
|
||||
}
|
||||
}
|
||||
|
||||
if (num_shards_p)
|
||||
@@ -745,8 +734,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
|
||||
default:
|
||||
neon_shard_log(shard_no, ERROR, "libpagestore: invalid connection state %d", shard->state);
|
||||
}
|
||||
|
||||
pg_unreachable();
|
||||
/* This shouldn't be hit */
|
||||
Assert(false);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -886,7 +875,6 @@ retry:
|
||||
int port;
|
||||
int sndbuf;
|
||||
int recvbuf;
|
||||
uint64* max_wait;
|
||||
|
||||
get_local_port(PQsocket(pageserver_conn), &port);
|
||||
get_socket_stats(PQsocket(pageserver_conn), &sndbuf, &recvbuf);
|
||||
@@ -897,10 +885,7 @@ retry:
|
||||
shard->nrequests_sent, shard->nresponses_received, port, sndbuf, recvbuf,
|
||||
pageserver_conn->inStart, pageserver_conn->inEnd);
|
||||
shard->receive_last_log_time = now;
|
||||
MyNeonCounters->compute_getpage_stuck_requests_total += !shard->receive_logged;
|
||||
shard->receive_logged = true;
|
||||
max_wait = &MyNeonCounters->compute_getpage_max_inflight_stuck_time_ms;
|
||||
*max_wait = Max(*max_wait, INSTR_TIME_GET_MILLISEC(since_start));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -923,7 +908,6 @@ retry:
|
||||
get_local_port(PQsocket(pageserver_conn), &port);
|
||||
neon_shard_log(shard_no, LOG, "no response from pageserver for %0.3f s, disconnecting (socket port=%d)",
|
||||
INSTR_TIME_GET_DOUBLE(since_start), port);
|
||||
MyNeonCounters->compute_getpage_max_inflight_stuck_time_ms = 0;
|
||||
pageserver_disconnect(shard_no);
|
||||
return -1;
|
||||
}
|
||||
@@ -947,7 +931,6 @@ retry:
|
||||
INSTR_TIME_SET_ZERO(shard->receive_start_time);
|
||||
INSTR_TIME_SET_ZERO(shard->receive_last_log_time);
|
||||
shard->receive_logged = false;
|
||||
MyNeonCounters->compute_getpage_max_inflight_stuck_time_ms = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1011,7 +994,6 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
|
||||
pageserver_conn = NULL;
|
||||
}
|
||||
|
||||
request->reqid = GENERATE_REQUEST_ID();
|
||||
req_buff = nm_pack_request(request);
|
||||
|
||||
/*
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
\echo Use "ALTER EXTENSION neon UPDATE TO '1.6'" to load this file. \quit
|
||||
|
||||
CREATE FUNCTION get_prewarm_info(out total_pages integer, out prewarmed_pages integer, out skipped_pages integer, out active_workers integer)
|
||||
RETURNS record
|
||||
AS 'MODULE_PATHNAME', 'get_prewarm_info'
|
||||
LANGUAGE C STRICT
|
||||
PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION get_local_cache_state(max_chunks integer default null)
|
||||
RETURNS bytea
|
||||
AS 'MODULE_PATHNAME', 'get_local_cache_state'
|
||||
LANGUAGE C
|
||||
PARALLEL UNSAFE;
|
||||
|
||||
CREATE FUNCTION prewarm_local_cache(state bytea, n_workers integer default 1)
|
||||
RETURNS void
|
||||
AS 'MODULE_PATHNAME', 'prewarm_local_cache'
|
||||
LANGUAGE C STRICT
|
||||
PARALLEL UNSAFE;
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
DROP FUNCTION IF EXISTS get_prewarm_info(out total_pages integer, out prewarmed_pages integer, out skipped_pages integer, out active_workers integer);
|
||||
|
||||
DROP FUNCTION IF EXISTS get_local_cache_state(max_chunks integer);
|
||||
|
||||
DROP FUNCTION IF EXISTS prewarm_local_cache(state bytea, n_workers integer default 1);
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "miscadmin.h"
|
||||
#include "access/xlog.h"
|
||||
#include "access/xlog_internal.h"
|
||||
#include "storage/ipc.h"
|
||||
#include "storage/shmem.h"
|
||||
#include "storage/buf_internals.h"
|
||||
@@ -400,7 +399,6 @@ neon_set_lwlsn_block_range(XLogRecPtr lsn, NRelFileInfo rlocator, ForkNumber for
|
||||
if (lsn == InvalidXLogRecPtr || n_blocks == 0 || LwLsnCache->lastWrittenLsnCacheSize == 0)
|
||||
return lsn;
|
||||
|
||||
Assert(lsn >= WalSegMinSize);
|
||||
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
|
||||
lsn = SetLastWrittenLSNForBlockRangeInternal(lsn, rlocator, forknum, from, n_blocks);
|
||||
LWLockRelease(LastWrittenLsnLock);
|
||||
@@ -437,6 +435,7 @@ neon_set_lwlsn_block_v(const XLogRecPtr *lsns, NRelFileInfo relfilenode,
|
||||
NInfoGetRelNumber(relfilenode) == InvalidOid)
|
||||
return InvalidXLogRecPtr;
|
||||
|
||||
|
||||
BufTagInit(key, relNumber, forknum, blockno, spcOid, dbOid);
|
||||
|
||||
LWLockAcquire(LastWrittenLsnLock, LW_EXCLUSIVE);
|
||||
@@ -445,10 +444,6 @@ neon_set_lwlsn_block_v(const XLogRecPtr *lsns, NRelFileInfo relfilenode,
|
||||
{
|
||||
XLogRecPtr lsn = lsns[i];
|
||||
|
||||
if (lsn == InvalidXLogRecPtr)
|
||||
continue;
|
||||
|
||||
Assert(lsn >= WalSegMinSize);
|
||||
key.blockNum = blockno + i;
|
||||
entry = hash_search(lastWrittenLsnCache, &key, HASH_ENTER, &found);
|
||||
if (found)
|
||||
@@ -510,5 +505,4 @@ neon_set_lwlsn_db(XLogRecPtr lsn)
|
||||
{
|
||||
NRelFileInfo dummyNode = {InvalidOid, InvalidOid, InvalidOid};
|
||||
return neon_set_lwlsn_block(lsn, dummyNode, MAIN_FORKNUM, 0);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -148,7 +148,7 @@ histogram_to_metrics(IOHistogram histogram,
|
||||
static metric_t *
|
||||
neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
|
||||
{
|
||||
#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + 12)
|
||||
#define NUM_METRICS ((2 + NUM_IO_WAIT_BUCKETS) * 3 + 10)
|
||||
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
|
||||
int i = 0;
|
||||
|
||||
@@ -166,8 +166,6 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
|
||||
|
||||
APPEND_METRIC(getpage_prefetch_requests_total);
|
||||
APPEND_METRIC(getpage_sync_requests_total);
|
||||
APPEND_METRIC(compute_getpage_stuck_requests_total);
|
||||
APPEND_METRIC(compute_getpage_max_inflight_stuck_time_ms);
|
||||
APPEND_METRIC(getpage_prefetch_misses_total);
|
||||
APPEND_METRIC(getpage_prefetch_discards_total);
|
||||
APPEND_METRIC(pageserver_requests_sent_total);
|
||||
@@ -296,11 +294,6 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
|
||||
totals.file_cache_hits_total += counters->file_cache_hits_total;
|
||||
histogram_merge_into(&totals.file_cache_read_hist, &counters->file_cache_read_hist);
|
||||
histogram_merge_into(&totals.file_cache_write_hist, &counters->file_cache_write_hist);
|
||||
|
||||
totals.compute_getpage_stuck_requests_total += counters->compute_getpage_stuck_requests_total;
|
||||
totals.compute_getpage_max_inflight_stuck_time_ms = Max(
|
||||
totals.compute_getpage_max_inflight_stuck_time_ms,
|
||||
counters->compute_getpage_max_inflight_stuck_time_ms);
|
||||
}
|
||||
|
||||
metrics = neon_perf_counters_to_metrics(&totals);
|
||||
|
||||
@@ -57,18 +57,6 @@ typedef struct
|
||||
uint64 getpage_prefetch_requests_total;
|
||||
uint64 getpage_sync_requests_total;
|
||||
|
||||
/*
|
||||
* Total number of Getpage requests left without an answer for more than
|
||||
* pageserver_response_log_timeout but less than pageserver_response_disconnect_timeout
|
||||
*/
|
||||
uint64 compute_getpage_stuck_requests_total;
|
||||
|
||||
/*
|
||||
* Longest waiting time for active stuck requests. If a stuck request gets a
|
||||
* response or disconnects, this metric is updated
|
||||
*/
|
||||
uint64 compute_getpage_max_inflight_stuck_time_ms;
|
||||
|
||||
/*
|
||||
* Total number of readahead misses; consisting of either prefetches that
|
||||
* don't satisfy the LSN bounds, or cases where no readahead was issued
|
||||
|
||||
@@ -65,6 +65,7 @@ typedef enum {
|
||||
SLRU_MULTIXACT_OFFSETS
|
||||
} SlruKind;
|
||||
|
||||
|
||||
/*--
|
||||
* supertype of all the Neon*Request structs below.
|
||||
*
|
||||
@@ -128,7 +129,6 @@ typedef struct
|
||||
int segno;
|
||||
} NeonGetSlruSegmentRequest;
|
||||
|
||||
|
||||
/* supertype of all the Neon*Response structs below */
|
||||
typedef NeonMessage NeonResponse;
|
||||
|
||||
@@ -187,7 +187,6 @@ typedef struct
|
||||
{
|
||||
/*
|
||||
* Send this request to the PageServer associated with this shard.
|
||||
* This function assigns request_id to the request which can be extracted by caller from request struct.
|
||||
*/
|
||||
bool (*send) (shardno_t shard_no, NeonRequest * request);
|
||||
/*
|
||||
|
||||
@@ -836,7 +836,7 @@ TermsCollectedMset(WalProposer *wp, MemberSet *mset, Safekeeper **msk, StringInf
|
||||
{
|
||||
uint32 n_greeted = 0;
|
||||
|
||||
for (uint32 i = 0; i < mset->len; i++)
|
||||
for (uint32 i = 0; i < wp->mconf.members.len; i++)
|
||||
{
|
||||
Safekeeper *sk = msk[i];
|
||||
|
||||
@@ -1106,7 +1106,7 @@ VotesCollectedMset(WalProposer *wp, MemberSet *mset, Safekeeper **msk, StringInf
|
||||
{
|
||||
uint32 n_votes = 0;
|
||||
|
||||
for (uint32 i = 0; i < mset->len; i++)
|
||||
for (uint32 i = 0; i < wp->mconf.members.len; i++)
|
||||
{
|
||||
Safekeeper *sk = msk[i];
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
char *wal_acceptors_list = "";
|
||||
int wal_acceptor_reconnect_timeout = 1000;
|
||||
int wal_acceptor_connection_timeout = 10000;
|
||||
int safekeeper_proto_version = 3;
|
||||
int safekeeper_proto_version = 2;
|
||||
|
||||
/* Set to true in the walproposer bgw. */
|
||||
static bool am_walproposer;
|
||||
@@ -228,7 +228,7 @@ nwp_register_gucs(void)
|
||||
"Version of compute <-> safekeeper protocol.",
|
||||
"Used while migrating from 2 to 3.",
|
||||
&safekeeper_proto_version,
|
||||
3, 0, INT_MAX,
|
||||
2, 0, INT_MAX,
|
||||
PGC_POSTMASTER,
|
||||
0,
|
||||
NULL, NULL, NULL);
|
||||
|
||||
@@ -32,7 +32,7 @@ To play with it locally one may start proxy over a local postgres installation
|
||||
(see end of this page on how to generate certs with openssl):
|
||||
|
||||
```
|
||||
LOGFMT=text ./target/debug/proxy -c server.crt -k server.key --auth-backend=postgres --auth-endpoint=postgres://stas@127.0.0.1:5432/stas --wss 0.0.0.0:4444
|
||||
./target/debug/proxy -c server.crt -k server.key --auth-backend=postgres --auth-endpoint=postgres://stas@127.0.0.1:5432/stas --wss 0.0.0.0:4444
|
||||
```
|
||||
|
||||
If both postgres and proxy are running you may send a SQL query:
|
||||
@@ -130,7 +130,7 @@ openssl req -new -x509 -days 365 -nodes -text -out server.crt -keyout server.key
|
||||
|
||||
Then we need to build proxy with 'testing' feature and run, e.g.:
|
||||
```sh
|
||||
RUST_LOG=proxy LOGFMT=text cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://postgres:proxy-postgres@127.0.0.1:5432/postgres' -c server.crt -k server.key
|
||||
RUST_LOG=proxy cargo run -p proxy --bin proxy --features testing -- --auth-backend postgres --auth-endpoint 'postgresql://postgres:proxy-postgres@127.0.0.1:5432/postgres' -c server.crt -k server.key
|
||||
```
|
||||
|
||||
Now from client you can start a new session:
|
||||
|
||||
@@ -7,14 +7,13 @@ use std::{net::SocketAddr, sync::Arc};
|
||||
|
||||
use anyhow::{Context, anyhow, bail, ensure};
|
||||
use clap::Arg;
|
||||
use futures::TryFutureExt;
|
||||
use futures::future::Either;
|
||||
use futures::{FutureExt, TryFutureExt};
|
||||
use itertools::Itertools;
|
||||
use rustls::crypto::ring;
|
||||
use rustls::pki_types::{DnsName, PrivateKeyDer};
|
||||
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
|
||||
use rustls::pki_types::PrivateKeyDer;
|
||||
use tokio::io::{AsyncRead, AsyncWrite};
|
||||
use tokio::net::TcpListener;
|
||||
use tokio_rustls::TlsConnector;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{Instrument, error, info};
|
||||
use utils::project_git_version;
|
||||
@@ -39,12 +38,6 @@ fn cli() -> clap::Command {
|
||||
.help("listen for incoming client connections on ip:port")
|
||||
.default_value("127.0.0.1:4432"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("listen-tls")
|
||||
.long("listen-tls")
|
||||
.help("listen for incoming client connections on ip:port, requiring TLS to compute")
|
||||
.default_value("127.0.0.1:4433"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("tls-key")
|
||||
.short('k')
|
||||
@@ -129,58 +122,31 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
_ => bail!("tls-key and tls-cert must be specified"),
|
||||
};
|
||||
|
||||
let compute_tls_config =
|
||||
Arc::new(crate::tls::client_config::compute_client_config_with_root_certs()?);
|
||||
|
||||
// Start listening for incoming client connections
|
||||
let proxy_address: SocketAddr = args
|
||||
.get_one::<String>("listen")
|
||||
.expect("listen argument defined")
|
||||
.expect("string argument defined")
|
||||
.parse()?;
|
||||
let proxy_address_compute_tls: SocketAddr = args
|
||||
.get_one::<String>("listen-tls")
|
||||
.expect("listen-tls argument defined")
|
||||
.parse()?;
|
||||
|
||||
info!("Starting sni router on {proxy_address}");
|
||||
info!("Starting sni router on {proxy_address_compute_tls}");
|
||||
let proxy_listener = TcpListener::bind(proxy_address).await?;
|
||||
let proxy_listener_compute_tls = TcpListener::bind(proxy_address_compute_tls).await?;
|
||||
|
||||
let cancellation_token = CancellationToken::new();
|
||||
let dest = Arc::new(destination);
|
||||
|
||||
let main = tokio::spawn(task_main(
|
||||
dest.clone(),
|
||||
tls_config.clone(),
|
||||
None,
|
||||
Arc::new(destination),
|
||||
tls_config,
|
||||
tls_server_end_point,
|
||||
proxy_listener,
|
||||
cancellation_token.clone(),
|
||||
))
|
||||
.map(crate::error::flatten_err);
|
||||
|
||||
let main_tls = tokio::spawn(task_main(
|
||||
dest,
|
||||
tls_config,
|
||||
Some(compute_tls_config),
|
||||
tls_server_end_point,
|
||||
proxy_listener_compute_tls,
|
||||
cancellation_token.clone(),
|
||||
))
|
||||
.map(crate::error::flatten_err);
|
||||
));
|
||||
let signals_task = tokio::spawn(crate::signals::handle(cancellation_token, || {}));
|
||||
|
||||
// the signal task cant ever succeed.
|
||||
// the main task can error, or can succeed on cancellation.
|
||||
// we want to immediately exit on either of these cases
|
||||
let main = futures::future::try_join(main, main_tls);
|
||||
let signal = match futures::future::select(signals_task, main).await {
|
||||
Either::Left((res, _)) => crate::error::flatten_err(res)?,
|
||||
Either::Right((res, _)) => {
|
||||
res?;
|
||||
return Ok(());
|
||||
}
|
||||
Either::Right((res, _)) => return crate::error::flatten_err(res),
|
||||
};
|
||||
|
||||
// maintenance tasks return `Infallible` success values, this is an impossible value
|
||||
@@ -191,7 +157,6 @@ pub async fn run() -> anyhow::Result<()> {
|
||||
async fn task_main(
|
||||
dest_suffix: Arc<String>,
|
||||
tls_config: Arc<rustls::ServerConfig>,
|
||||
compute_tls_config: Option<Arc<rustls::ClientConfig>>,
|
||||
tls_server_end_point: TlsServerEndPoint,
|
||||
listener: tokio::net::TcpListener,
|
||||
cancellation_token: CancellationToken,
|
||||
@@ -210,7 +175,6 @@ async fn task_main(
|
||||
let session_id = uuid::Uuid::new_v4();
|
||||
let tls_config = Arc::clone(&tls_config);
|
||||
let dest_suffix = Arc::clone(&dest_suffix);
|
||||
let compute_tls_config = compute_tls_config.clone();
|
||||
|
||||
connections.spawn(
|
||||
async move {
|
||||
@@ -228,15 +192,7 @@ async fn task_main(
|
||||
crate::metrics::Protocol::SniRouter,
|
||||
"sni",
|
||||
);
|
||||
handle_client(
|
||||
ctx,
|
||||
dest_suffix,
|
||||
tls_config,
|
||||
compute_tls_config,
|
||||
tls_server_end_point,
|
||||
socket,
|
||||
)
|
||||
.await
|
||||
handle_client(ctx, dest_suffix, tls_config, tls_server_end_point, socket).await
|
||||
}
|
||||
.unwrap_or_else(|e| {
|
||||
// Acknowledge that the task has finished with an error.
|
||||
@@ -312,7 +268,6 @@ async fn handle_client(
|
||||
ctx: RequestContext,
|
||||
dest_suffix: Arc<String>,
|
||||
tls_config: Arc<rustls::ServerConfig>,
|
||||
compute_tls_config: Option<Arc<rustls::ClientConfig>>,
|
||||
tls_server_end_point: TlsServerEndPoint,
|
||||
stream: impl AsyncRead + AsyncWrite + Unpin,
|
||||
) -> anyhow::Result<()> {
|
||||
@@ -333,33 +288,7 @@ async fn handle_client(
|
||||
|
||||
info!("destination: {}", destination);
|
||||
|
||||
let mut client = tokio::net::TcpStream::connect(&destination).await?;
|
||||
|
||||
let client = if let Some(compute_tls_config) = compute_tls_config {
|
||||
info!("upgrading TLS");
|
||||
|
||||
// send SslRequest
|
||||
client
|
||||
.write_all(b"\x00\x00\x00\x08\x04\xd2\x16\x2f")
|
||||
.await?;
|
||||
|
||||
// wait for S/N respons
|
||||
let mut resp = b'N';
|
||||
client.read_exact(std::slice::from_mut(&mut resp)).await?;
|
||||
|
||||
// error if not S
|
||||
ensure!(resp == b'S', "compute refused TLS");
|
||||
|
||||
// upgrade to TLS.
|
||||
let domain = DnsName::try_from(destination)?;
|
||||
let domain = rustls::pki_types::ServerName::DnsName(domain);
|
||||
let client = TlsConnector::from(compute_tls_config)
|
||||
.connect(domain, client)
|
||||
.await?;
|
||||
Connection::Tls(client)
|
||||
} else {
|
||||
Connection::Raw(client)
|
||||
};
|
||||
let mut client = tokio::net::TcpStream::connect(destination).await?;
|
||||
|
||||
// doesn't yet matter as pg-sni-router doesn't report analytics logs
|
||||
ctx.set_success();
|
||||
@@ -368,19 +297,9 @@ async fn handle_client(
|
||||
// Starting from here we only proxy the client's traffic.
|
||||
info!("performing the proxy pass...");
|
||||
|
||||
let res = match client {
|
||||
Connection::Raw(mut c) => copy_bidirectional_client_compute(&mut tls_stream, &mut c).await,
|
||||
Connection::Tls(mut c) => copy_bidirectional_client_compute(&mut tls_stream, &mut c).await,
|
||||
};
|
||||
|
||||
match res {
|
||||
match copy_bidirectional_client_compute(&mut tls_stream, &mut client).await {
|
||||
Ok(_) => Ok(()),
|
||||
Err(ErrorSource::Client(err)) => Err(err).context("client"),
|
||||
Err(ErrorSource::Compute(err)) => Err(err).context("compute"),
|
||||
}
|
||||
}
|
||||
|
||||
enum Connection {
|
||||
Raw(tokio::net::TcpStream),
|
||||
Tls(tokio_rustls::client::TlsStream<tokio::net::TcpStream>),
|
||||
}
|
||||
|
||||
@@ -132,10 +132,11 @@ impl Drop for LoggingGuard {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: make JSON the default
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Default, Debug)]
|
||||
enum LogFormat {
|
||||
Text,
|
||||
#[default]
|
||||
Text = 1,
|
||||
Json,
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
//
|
||||
// Main entry point for the safekeeper executable
|
||||
//
|
||||
use std::env::{VarError, var};
|
||||
use std::fs::{self, File};
|
||||
use std::io::{ErrorKind, Write};
|
||||
use std::str::FromStr;
|
||||
@@ -353,13 +354,29 @@ async fn main() -> anyhow::Result<()> {
|
||||
};
|
||||
|
||||
// Load JWT auth token to connect to other safekeepers for pull_timeline.
|
||||
let sk_auth_token = if let Some(auth_token_path) = args.auth_token_path.as_ref() {
|
||||
info!("loading JWT token for authentication with safekeepers from {auth_token_path}");
|
||||
let auth_token = tokio::fs::read_to_string(auth_token_path).await?;
|
||||
Some(SecretString::from(auth_token.trim().to_owned()))
|
||||
} else {
|
||||
info!("no JWT token for authentication with safekeepers detected");
|
||||
None
|
||||
// First check if the env var is present, then check the arg with the path.
|
||||
// We want to deprecate and remove the env var method in the future.
|
||||
let sk_auth_token = match var("SAFEKEEPER_AUTH_TOKEN") {
|
||||
Ok(v) => {
|
||||
info!("loaded JWT token for authentication with safekeepers");
|
||||
Some(SecretString::from(v))
|
||||
}
|
||||
Err(VarError::NotPresent) => {
|
||||
if let Some(auth_token_path) = args.auth_token_path.as_ref() {
|
||||
info!(
|
||||
"loading JWT token for authentication with safekeepers from {auth_token_path}"
|
||||
);
|
||||
let auth_token = tokio::fs::read_to_string(auth_token_path).await?;
|
||||
Some(SecretString::from(auth_token.trim().to_owned()))
|
||||
} else {
|
||||
info!("no JWT token for authentication with safekeepers detected");
|
||||
None
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
warn!("JWT token for authentication with safekeepers is not unicode");
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let ssl_ca_certs = match args.ssl_ca_file.as_ref() {
|
||||
|
||||
@@ -401,10 +401,7 @@ pub async fn handle_request(
|
||||
request.timeline_id,
|
||||
));
|
||||
if existing_tli.is_ok() {
|
||||
info!("Timeline {} already exists", request.timeline_id);
|
||||
return Ok(PullTimelineResponse {
|
||||
safekeeper_host: None,
|
||||
});
|
||||
bail!("Timeline {} already exists", request.timeline_id);
|
||||
}
|
||||
|
||||
let mut http_client = reqwest::Client::builder();
|
||||
@@ -428,25 +425,8 @@ pub async fn handle_request(
|
||||
|
||||
let mut statuses = Vec::new();
|
||||
for (i, response) in responses.into_iter().enumerate() {
|
||||
match response {
|
||||
Ok(status) => {
|
||||
statuses.push((status, i));
|
||||
}
|
||||
Err(e) => {
|
||||
info!("error fetching status from {}: {e}", http_hosts[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Allow missing responses from up to one safekeeper (say due to downtime)
|
||||
// e.g. if we created a timeline on PS A and B, with C being offline. Then B goes
|
||||
// offline and C comes online. Then we want a pull on C with A and B as hosts to work.
|
||||
let min_required_successful = (http_hosts.len() - 1).max(1);
|
||||
if statuses.len() < min_required_successful {
|
||||
bail!(
|
||||
"only got {} successful status responses. required: {min_required_successful}",
|
||||
statuses.len()
|
||||
)
|
||||
let status = response.context(format!("fetching status from {}", http_hosts[i]))?;
|
||||
statuses.push((status, i));
|
||||
}
|
||||
|
||||
// Find the most advanced safekeeper
|
||||
@@ -556,6 +536,6 @@ async fn pull_timeline(
|
||||
.await?;
|
||||
|
||||
Ok(PullTimelineResponse {
|
||||
safekeeper_host: Some(host),
|
||||
safekeeper_host: host,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ use crate::metrics::{
|
||||
WAL_RECEIVERS,
|
||||
};
|
||||
use crate::safekeeper::{AcceptorProposerMessage, ProposerAcceptorMessage};
|
||||
use crate::timeline::{TimelineError, WalResidentTimeline};
|
||||
use crate::timeline::WalResidentTimeline;
|
||||
|
||||
const DEFAULT_FEEDBACK_CAPACITY: usize = 8;
|
||||
|
||||
@@ -357,14 +357,9 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> NetworkReader<'_, IO> {
|
||||
.await
|
||||
.context("create timeline")?
|
||||
} else {
|
||||
let timeline_res = self.global_timelines.get(self.ttid);
|
||||
match timeline_res {
|
||||
Ok(tl) => tl,
|
||||
Err(TimelineError::NotFound(_)) => {
|
||||
return Err(CopyStreamHandlerEnd::TimelineNoCreate);
|
||||
}
|
||||
other => other.context("get_timeline")?,
|
||||
}
|
||||
self.global_timelines
|
||||
.get(self.ttid)
|
||||
.context("get timeline")?
|
||||
};
|
||||
tli.wal_residence_guard().await?
|
||||
}
|
||||
|
||||
@@ -19,8 +19,7 @@ use storage_controller::service::chaos_injector::ChaosInjector;
|
||||
use storage_controller::service::{
|
||||
Config, HEARTBEAT_INTERVAL_DEFAULT, LONG_RECONCILE_THRESHOLD_DEFAULT,
|
||||
MAX_OFFLINE_INTERVAL_DEFAULT, MAX_WARMING_UP_INTERVAL_DEFAULT,
|
||||
PRIORITY_RECONCILER_CONCURRENCY_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT,
|
||||
SAFEKEEPER_RECONCILER_CONCURRENCY_DEFAULT, Service,
|
||||
PRIORITY_RECONCILER_CONCURRENCY_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT, Service,
|
||||
};
|
||||
use tokio::signal::unix::SignalKind;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -133,10 +132,6 @@ struct Cli {
|
||||
#[arg(long)]
|
||||
priority_reconciler_concurrency: Option<usize>,
|
||||
|
||||
/// Maximum number of safekeeper reconciliations that may run in parallel (per safekeeper)
|
||||
#[arg(long)]
|
||||
safekeeper_reconciler_concurrency: Option<usize>,
|
||||
|
||||
/// Tenant API rate limit, as requests per second per tenant.
|
||||
#[arg(long, default_value = "10")]
|
||||
tenant_rate_limit: NonZeroU32,
|
||||
@@ -408,9 +403,6 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
priority_reconciler_concurrency: args
|
||||
.priority_reconciler_concurrency
|
||||
.unwrap_or(PRIORITY_RECONCILER_CONCURRENCY_DEFAULT),
|
||||
safekeeper_reconciler_concurrency: args
|
||||
.safekeeper_reconciler_concurrency
|
||||
.unwrap_or(SAFEKEEPER_RECONCILER_CONCURRENCY_DEFAULT),
|
||||
tenant_rate_limit: args.tenant_rate_limit,
|
||||
split_threshold: args.split_threshold,
|
||||
max_split_shards: args.max_split_shards,
|
||||
|
||||
@@ -194,7 +194,6 @@ pub(crate) enum LeadershipStatus {
|
||||
|
||||
pub const RECONCILER_CONCURRENCY_DEFAULT: usize = 128;
|
||||
pub const PRIORITY_RECONCILER_CONCURRENCY_DEFAULT: usize = 256;
|
||||
pub const SAFEKEEPER_RECONCILER_CONCURRENCY_DEFAULT: usize = 32;
|
||||
|
||||
// Depth of the channel used to enqueue shards for reconciliation when they can't do it immediately.
|
||||
// This channel is finite-size to avoid using excessive memory if we get into a state where reconciles are finishing more slowly
|
||||
@@ -383,9 +382,6 @@ pub struct Config {
|
||||
/// How many high-priority Reconcilers may be spawned concurrently
|
||||
pub priority_reconciler_concurrency: usize,
|
||||
|
||||
/// How many safekeeper reconciles may happen concurrently (per safekeeper)
|
||||
pub safekeeper_reconciler_concurrency: usize,
|
||||
|
||||
/// How many API requests per second to allow per tenant, across all
|
||||
/// tenant-scoped API endpoints. Further API requests queue until ready.
|
||||
pub tenant_rate_limit: NonZeroU32,
|
||||
@@ -3663,7 +3659,7 @@ impl Service {
|
||||
locations: ShardMutationLocations,
|
||||
http_client: reqwest::Client,
|
||||
jwt: Option<String>,
|
||||
mut create_req: TimelineCreateRequest,
|
||||
create_req: TimelineCreateRequest,
|
||||
) -> Result<TimelineInfo, ApiError> {
|
||||
let latest = locations.latest.node;
|
||||
|
||||
@@ -3682,15 +3678,6 @@ impl Service {
|
||||
.await
|
||||
.map_err(|e| passthrough_api_error(&latest, e))?;
|
||||
|
||||
// If we are going to create the timeline on some stale locations for shard 0, then ask them to re-use
|
||||
// the initdb generated by the latest location, rather than generating their own. This avoids racing uploads
|
||||
// of initdb to S3 which might not be binary-identical if different pageservers have different postgres binaries.
|
||||
if tenant_shard_id.is_shard_zero() {
|
||||
if let models::TimelineCreateRequestMode::Bootstrap { existing_initdb_timeline_id, .. } = &mut create_req.mode {
|
||||
*existing_initdb_timeline_id = Some(create_req.new_timeline_id);
|
||||
}
|
||||
}
|
||||
|
||||
// We propagate timeline creations to all attached locations such that a compute
|
||||
// for the new timeline is able to start regardless of the current state of the
|
||||
// tenant shard reconciliation.
|
||||
@@ -3733,10 +3720,6 @@ impl Service {
|
||||
// Because the caller might not provide an explicit LSN, we must do the creation first on a single shard, and then
|
||||
// use whatever LSN that shard picked when creating on subsequent shards. We arbitrarily use shard zero as the shard
|
||||
// that will get the first creation request, and propagate the LSN to all the >0 shards.
|
||||
//
|
||||
// This also enables non-zero shards to use the initdb that shard 0 generated and uploaded to S3, rather than
|
||||
// independently generating their own initdb. This guarantees that shards cannot end up with different initial
|
||||
// states if e.g. they have different postgres binary versions.
|
||||
let timeline_info = create_one(
|
||||
shard_zero_tid,
|
||||
shard_zero_locations,
|
||||
@@ -3746,16 +3729,11 @@ impl Service {
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Update the create request for shards >= 0
|
||||
// Propagate the LSN that shard zero picked, if caller didn't provide one
|
||||
match &mut create_req.mode {
|
||||
models::TimelineCreateRequestMode::Branch { ancestor_start_lsn, .. } if ancestor_start_lsn.is_none() => {
|
||||
// Propagate the LSN that shard zero picked, if caller didn't provide one
|
||||
*ancestor_start_lsn = timeline_info.ancestor_lsn;
|
||||
},
|
||||
models::TimelineCreateRequestMode::Bootstrap { existing_initdb_timeline_id, .. } => {
|
||||
// For shards >= 0, do not run initdb: use the one that shard 0 uploaded to S3
|
||||
*existing_initdb_timeline_id = Some(create_req.new_timeline_id)
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,10 +3,7 @@ use std::{collections::HashMap, str::FromStr, sync::Arc, time::Duration};
|
||||
use clashmap::{ClashMap, Entry};
|
||||
use safekeeper_api::models::PullTimelineRequest;
|
||||
use safekeeper_client::mgmt_api;
|
||||
use tokio::sync::{
|
||||
Semaphore,
|
||||
mpsc::{self, UnboundedReceiver, UnboundedSender},
|
||||
};
|
||||
use tokio::sync::mpsc::{self, UnboundedReceiver, UnboundedSender};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::Instrument;
|
||||
use utils::{
|
||||
@@ -209,27 +206,18 @@ impl ReconcilerHandle {
|
||||
}
|
||||
|
||||
pub(crate) struct SafekeeperReconciler {
|
||||
inner: SafekeeperReconcilerInner,
|
||||
concurrency_limiter: Arc<Semaphore>,
|
||||
service: Arc<Service>,
|
||||
rx: UnboundedReceiver<(ScheduleRequest, CancellationToken)>,
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
/// Thin wrapper over `Service` to not clutter its inherent functions
|
||||
#[derive(Clone)]
|
||||
struct SafekeeperReconcilerInner {
|
||||
service: Arc<Service>,
|
||||
}
|
||||
|
||||
impl SafekeeperReconciler {
|
||||
fn spawn(cancel: CancellationToken, service: Arc<Service>) -> ReconcilerHandle {
|
||||
// We hold the ServiceInner lock so we don't want to make sending to the reconciler channel to be blocking.
|
||||
let (tx, rx) = mpsc::unbounded_channel();
|
||||
let concurrency = service.config.safekeeper_reconciler_concurrency;
|
||||
let mut reconciler = SafekeeperReconciler {
|
||||
inner: SafekeeperReconcilerInner { service },
|
||||
service,
|
||||
rx,
|
||||
concurrency_limiter: Arc::new(Semaphore::new(concurrency)),
|
||||
cancel: cancel.clone(),
|
||||
};
|
||||
let handle = ReconcilerHandle {
|
||||
@@ -242,44 +230,31 @@ impl SafekeeperReconciler {
|
||||
}
|
||||
async fn run(&mut self) {
|
||||
loop {
|
||||
// TODO add parallelism with semaphore here
|
||||
let req = tokio::select! {
|
||||
req = self.rx.recv() => req,
|
||||
_ = self.cancel.cancelled() => break,
|
||||
};
|
||||
let Some((req, req_cancel)) = req else { break };
|
||||
|
||||
let permit_res = tokio::select! {
|
||||
req = self.concurrency_limiter.clone().acquire_owned() => req,
|
||||
_ = self.cancel.cancelled() => break,
|
||||
};
|
||||
let Ok(_permit) = permit_res else { return };
|
||||
|
||||
let inner = self.inner.clone();
|
||||
if req_cancel.is_cancelled() {
|
||||
continue;
|
||||
}
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
let kind = req.kind;
|
||||
let tenant_id = req.tenant_id;
|
||||
let timeline_id = req.timeline_id;
|
||||
let node_id = req.safekeeper.skp.id;
|
||||
inner
|
||||
.reconcile_one(req, req_cancel)
|
||||
.instrument(tracing::info_span!(
|
||||
"reconcile_one",
|
||||
?kind,
|
||||
%tenant_id,
|
||||
?timeline_id,
|
||||
%node_id,
|
||||
))
|
||||
.await;
|
||||
});
|
||||
let kind = req.kind;
|
||||
let tenant_id = req.tenant_id;
|
||||
let timeline_id = req.timeline_id;
|
||||
let node_id = req.safekeeper.skp.id;
|
||||
self.reconcile_one(req, req_cancel)
|
||||
.instrument(tracing::info_span!(
|
||||
"reconcile_one",
|
||||
?kind,
|
||||
%tenant_id,
|
||||
?timeline_id,
|
||||
%node_id,
|
||||
))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SafekeeperReconcilerInner {
|
||||
async fn reconcile_one(&self, req: ScheduleRequest, req_cancel: CancellationToken) {
|
||||
let req_host = req.safekeeper.skp.host.clone();
|
||||
match req.kind {
|
||||
@@ -306,11 +281,10 @@ impl SafekeeperReconcilerInner {
|
||||
req,
|
||||
async |client| client.pull_timeline(&pull_req).await,
|
||||
|resp| {
|
||||
if let Some(host) = resp.safekeeper_host {
|
||||
tracing::info!("pulled timeline from {host} onto {req_host}");
|
||||
} else {
|
||||
tracing::info!("timeline already present on safekeeper on {req_host}");
|
||||
}
|
||||
tracing::info!(
|
||||
"pulled timeline from {} onto {req_host}",
|
||||
resp.safekeeper_host,
|
||||
);
|
||||
},
|
||||
req_cancel,
|
||||
)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
use std::time::SystemTime;
|
||||
|
||||
use futures_util::StreamExt;
|
||||
@@ -56,7 +55,7 @@ impl TimelineAnalysis {
|
||||
pub(crate) async fn branch_cleanup_and_check_errors(
|
||||
remote_client: &GenericRemoteStorage,
|
||||
id: &TenantShardTimelineId,
|
||||
tenant_objects: Arc<tokio::sync::Mutex<TenantObjectListing>>,
|
||||
tenant_objects: &mut TenantObjectListing,
|
||||
s3_active_branch: Option<&BranchData>,
|
||||
console_branch: Option<BranchData>,
|
||||
s3_data: Option<RemoteTimelineBlobData>,
|
||||
@@ -151,11 +150,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
|
||||
))
|
||||
}
|
||||
|
||||
if !tenant_objects
|
||||
.lock()
|
||||
.await
|
||||
.check_ref(id.timeline_id, &layer, &metadata)
|
||||
{
|
||||
if !tenant_objects.check_ref(id.timeline_id, &layer, &metadata) {
|
||||
let path = remote_layer_path(
|
||||
&id.tenant_shard_id.tenant_id,
|
||||
&id.timeline_id,
|
||||
|
||||
@@ -73,12 +73,8 @@ enum Command {
|
||||
node_kind: NodeKind,
|
||||
#[arg(short, long, default_value_t = false)]
|
||||
json: bool,
|
||||
/// If provided, only these tenants will be listed from the remote storage.
|
||||
#[arg(long = "tenant-id", num_args = 0..)]
|
||||
tenant_ids: Vec<TenantShardId>,
|
||||
/// If provided, we will list all tenants, but then filter with the prefix.
|
||||
#[arg(long = "tenant-id-prefix")]
|
||||
tenant_id_prefix: Option<TenantId>,
|
||||
#[arg(long = "post", default_value_t = false)]
|
||||
post_to_storcon: bool,
|
||||
#[arg(long, default_value = None)]
|
||||
@@ -182,7 +178,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
Command::ScanMetadata {
|
||||
json,
|
||||
tenant_ids,
|
||||
tenant_id_prefix,
|
||||
node_kind,
|
||||
post_to_storcon,
|
||||
dump_db_connstr,
|
||||
@@ -191,9 +186,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
verbose,
|
||||
} => {
|
||||
if let NodeKind::Safekeeper = node_kind {
|
||||
if tenant_id_prefix.is_some() {
|
||||
bail!("`tenant_id_prefix` is not supported for safekeeper node_kind");
|
||||
}
|
||||
let db_or_list = match (timeline_lsns, dump_db_connstr) {
|
||||
(Some(timeline_lsns), _) => {
|
||||
let timeline_lsns = serde_json::from_str(&timeline_lsns)
|
||||
@@ -235,7 +227,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
bucket_config,
|
||||
controller_client.as_ref(),
|
||||
tenant_ids,
|
||||
tenant_id_prefix,
|
||||
json,
|
||||
post_to_storcon,
|
||||
verbose,
|
||||
@@ -347,7 +338,6 @@ pub async fn run_cron_job(
|
||||
bucket_config,
|
||||
controller_client,
|
||||
Vec::new(),
|
||||
None,
|
||||
true,
|
||||
post_to_storcon,
|
||||
false, // default to non-verbose mode
|
||||
@@ -394,12 +384,10 @@ pub async fn pageserver_physical_gc_cmd(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub async fn scan_pageserver_metadata_cmd(
|
||||
bucket_config: BucketConfig,
|
||||
controller_client: Option<&control_api::Client>,
|
||||
tenant_shard_ids: Vec<TenantShardId>,
|
||||
tenant_id_prefix: Option<TenantId>,
|
||||
json: bool,
|
||||
post_to_storcon: bool,
|
||||
verbose: bool,
|
||||
@@ -410,14 +398,7 @@ pub async fn scan_pageserver_metadata_cmd(
|
||||
"Posting pageserver scan health status to storage controller requires `--controller-api` and `--controller-jwt` to run"
|
||||
));
|
||||
}
|
||||
match scan_pageserver_metadata(
|
||||
bucket_config.clone(),
|
||||
tenant_shard_ids,
|
||||
tenant_id_prefix,
|
||||
verbose,
|
||||
)
|
||||
.await
|
||||
{
|
||||
match scan_pageserver_metadata(bucket_config.clone(), tenant_shard_ids, verbose).await {
|
||||
Err(e) => {
|
||||
tracing::error!("Failed: {e}");
|
||||
Err(e)
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
use futures::SinkExt;
|
||||
use futures_util::{StreamExt, TryStreamExt};
|
||||
use pageserver::tenant::remote_timeline_client::remote_layer_path;
|
||||
use pageserver_api::controller_api::MetadataHealthUpdateRequest;
|
||||
@@ -9,7 +7,6 @@ use pageserver_api::shard::TenantShardId;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use serde::Serialize;
|
||||
use tracing::{Instrument, info_span};
|
||||
use utils::generation::Generation;
|
||||
use utils::id::TenantId;
|
||||
use utils::shard::ShardCount;
|
||||
|
||||
@@ -17,12 +14,10 @@ use crate::checks::{
|
||||
BlobDataParseResult, RemoteTimelineBlobData, TenantObjectListing, TimelineAnalysis,
|
||||
branch_cleanup_and_check_errors, list_timeline_blobs,
|
||||
};
|
||||
use crate::metadata_stream::{
|
||||
stream_tenant_timelines, stream_tenants, stream_tenants_maybe_prefix,
|
||||
};
|
||||
use crate::metadata_stream::{stream_tenant_timelines, stream_tenants};
|
||||
use crate::{BucketConfig, NodeKind, RootTarget, TenantShardTimelineId, init_remote};
|
||||
|
||||
#[derive(Serialize, Default, Clone)]
|
||||
#[derive(Serialize, Default)]
|
||||
pub struct MetadataSummary {
|
||||
tenant_count: usize,
|
||||
timeline_count: usize,
|
||||
@@ -107,13 +102,13 @@ impl MetadataSummary {
|
||||
|
||||
format!(
|
||||
"Tenants: {}
|
||||
Timelines: {}
|
||||
Timeline-shards: {}
|
||||
With errors: {}
|
||||
With warnings: {}
|
||||
With orphan layers: {}
|
||||
Index versions: {version_summary}
|
||||
",
|
||||
Timelines: {}
|
||||
Timeline-shards: {}
|
||||
With errors: {}
|
||||
With warnings: {}
|
||||
With orphan layers: {}
|
||||
Index versions: {version_summary}
|
||||
",
|
||||
self.tenant_count,
|
||||
self.timeline_count,
|
||||
self.timeline_shard_count,
|
||||
@@ -143,243 +138,24 @@ impl MetadataSummary {
|
||||
pub async fn scan_pageserver_metadata(
|
||||
bucket_config: BucketConfig,
|
||||
tenant_ids: Vec<TenantShardId>,
|
||||
tenant_id_prefix: Option<TenantId>,
|
||||
verbose: bool,
|
||||
) -> anyhow::Result<MetadataSummary> {
|
||||
let (remote_client, target) = init_remote(bucket_config, NodeKind::Pageserver).await?;
|
||||
|
||||
if !tenant_ids.is_empty() && tenant_id_prefix.is_some() {
|
||||
anyhow::bail!("`tenant_id_prefix` is not supported when `tenant_ids` is provided");
|
||||
}
|
||||
let tenants = if tenant_ids.is_empty() {
|
||||
futures::future::Either::Left(stream_tenants(&remote_client, &target))
|
||||
} else {
|
||||
futures::future::Either::Right(futures::stream::iter(tenant_ids.into_iter().map(Ok)))
|
||||
};
|
||||
|
||||
let (mut list_tenants_tx, list_tenants_rx) = futures::channel::mpsc::channel(1);
|
||||
let remote_client_inner = remote_client.clone();
|
||||
let target_inner = target.clone();
|
||||
let list_tenants = tokio::spawn(async move {
|
||||
let mut cnt = 0;
|
||||
if tenant_ids.is_empty() {
|
||||
if let Some(tenant_id_prefix) = tenant_id_prefix {
|
||||
let stream = stream_tenants_maybe_prefix(
|
||||
&remote_client_inner,
|
||||
&target_inner,
|
||||
Some(tenant_id_prefix.to_string()),
|
||||
);
|
||||
let mut stream = Box::pin(stream);
|
||||
while let Some(tenant) = stream.next().await {
|
||||
let tenant = tenant?;
|
||||
list_tenants_tx.send(tenant).await?;
|
||||
cnt += 1;
|
||||
}
|
||||
} else {
|
||||
let stream = stream_tenants(&remote_client_inner, &target_inner);
|
||||
let mut stream = Box::pin(stream);
|
||||
while let Some(tenant) = stream.next().await {
|
||||
let tenant = tenant?;
|
||||
list_tenants_tx.send(tenant).await?;
|
||||
cnt += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for tenant_id in tenant_ids {
|
||||
list_tenants_tx.send(tenant_id).await?;
|
||||
cnt += 1;
|
||||
}
|
||||
}
|
||||
tracing::info!("list_tenants: collected {} tenants", cnt);
|
||||
Ok::<_, anyhow::Error>(())
|
||||
});
|
||||
// How many tenants to process in parallel. We need to be mindful of pageservers
|
||||
// accessing the same per tenant prefixes, so use a lower setting than pageservers.
|
||||
const CONCURRENCY: usize = 32;
|
||||
|
||||
let (mut list_timelines_tx, list_timelines_rx) = futures::channel::mpsc::channel(1);
|
||||
let remote_client_inner = remote_client.clone();
|
||||
let target_inner = target.clone();
|
||||
let list_timelines = tokio::spawn(async move {
|
||||
let stream = list_tenants_rx
|
||||
.map(|tenant_id| {
|
||||
stream_tenant_timelines(&remote_client_inner, &target_inner, tenant_id)
|
||||
})
|
||||
.buffered(8)
|
||||
.try_flatten();
|
||||
let mut stream = Box::pin(stream);
|
||||
while let Some(item) = stream.next().await {
|
||||
let item = item?;
|
||||
list_timelines_tx.send(item).await?;
|
||||
}
|
||||
Ok::<_, anyhow::Error>(())
|
||||
});
|
||||
|
||||
let (mut read_timelines_tx, read_timelines_rx) = futures::channel::mpsc::channel(1);
|
||||
let remote_client_inner = remote_client.clone();
|
||||
let target_inner = target.clone();
|
||||
let read_timelines = tokio::spawn(async move {
|
||||
let stream = list_timelines_rx
|
||||
.map(|ttid| report_on_timeline(&remote_client_inner, &target_inner, ttid))
|
||||
.buffered(32);
|
||||
let mut stream = Box::pin(stream);
|
||||
while let Some(item) = stream.next().await {
|
||||
let item = item?;
|
||||
read_timelines_tx.send(item).await?;
|
||||
}
|
||||
Ok::<_, anyhow::Error>(())
|
||||
});
|
||||
|
||||
let summary = Arc::new(tokio::sync::Mutex::new(MetadataSummary::new()));
|
||||
let summary_inner = summary.clone();
|
||||
|
||||
let (mut consolidate_tenants_tx, consolidate_tenants_rx) = futures::channel::mpsc::channel(32);
|
||||
let consolidate_tenants = tokio::spawn(async move {
|
||||
// We must gather all the TenantShardTimelineId->S3TimelineBlobData for each tenant, because different
|
||||
// shards in the same tenant might refer to one anothers' keys if a shard split has happened.
|
||||
|
||||
let mut tenant_id = None;
|
||||
let mut tenant_objects = TenantObjectListing::default();
|
||||
let mut tenant_timeline_results = Vec::new();
|
||||
|
||||
// Iterate through all the timeline results. These are in key-order, so
|
||||
// all results for the same tenant will be adjacent. We accumulate these,
|
||||
// and then call `analyze_tenant` to flush, when we see the next tenant ID.
|
||||
let mut highest_shard_count = ShardCount::MIN;
|
||||
let mut read_timelines_rx = read_timelines_rx;
|
||||
while let Some(i) = read_timelines_rx.next().await {
|
||||
let (ttid, data) = i;
|
||||
{
|
||||
let mut guard = summary_inner.lock().await;
|
||||
guard.update_data(&data);
|
||||
}
|
||||
|
||||
match tenant_id {
|
||||
Some(prev_tenant_id) => {
|
||||
if prev_tenant_id != ttid.tenant_shard_id.tenant_id {
|
||||
// New tenant: analyze this tenant's timelines, clear accumulated tenant_timeline_results
|
||||
let tenant_objects = std::mem::take(&mut tenant_objects);
|
||||
let timelines = std::mem::take(&mut tenant_timeline_results);
|
||||
analyze_tenant(
|
||||
summary_inner.clone(),
|
||||
Arc::new(tokio::sync::Mutex::new(tenant_objects)),
|
||||
timelines,
|
||||
highest_shard_count,
|
||||
&mut consolidate_tenants_tx,
|
||||
)
|
||||
.await?;
|
||||
tenant_id = Some(ttid.tenant_shard_id.tenant_id);
|
||||
highest_shard_count = ttid.tenant_shard_id.shard_count;
|
||||
} else {
|
||||
highest_shard_count =
|
||||
highest_shard_count.max(ttid.tenant_shard_id.shard_count);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
tenant_id = Some(ttid.tenant_shard_id.tenant_id);
|
||||
highest_shard_count = highest_shard_count.max(ttid.tenant_shard_id.shard_count);
|
||||
}
|
||||
}
|
||||
|
||||
match &data.blob_data {
|
||||
BlobDataParseResult::Parsed {
|
||||
index_part: _,
|
||||
index_part_generation: _index_part_generation,
|
||||
s3_layers,
|
||||
index_part_last_modified_time: _,
|
||||
index_part_snapshot_time: _,
|
||||
} => {
|
||||
tenant_objects.push(ttid, s3_layers.clone());
|
||||
}
|
||||
BlobDataParseResult::Relic => (),
|
||||
BlobDataParseResult::Incorrect {
|
||||
errors: _,
|
||||
s3_layers,
|
||||
} => {
|
||||
tenant_objects.push(ttid, s3_layers.clone());
|
||||
}
|
||||
}
|
||||
tenant_timeline_results.push((ttid, data));
|
||||
}
|
||||
|
||||
if !tenant_timeline_results.is_empty() {
|
||||
analyze_tenant(
|
||||
summary_inner.clone(),
|
||||
Arc::new(tokio::sync::Mutex::new(tenant_objects)),
|
||||
tenant_timeline_results,
|
||||
highest_shard_count,
|
||||
&mut consolidate_tenants_tx,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Ok::<_, anyhow::Error>(())
|
||||
});
|
||||
|
||||
let remote_client_inner = remote_client.clone();
|
||||
let summary_inner = summary.clone();
|
||||
let analyze_tenants = tokio::spawn(async move {
|
||||
let stream = consolidate_tenants_rx
|
||||
.map(|(ttid, tenant_objects, data)| {
|
||||
let remote_client_inner = remote_client_inner.clone();
|
||||
async move {
|
||||
let generation = if let BlobDataParseResult::Parsed {
|
||||
index_part: _,
|
||||
index_part_generation,
|
||||
s3_layers: _,
|
||||
index_part_last_modified_time: _,
|
||||
index_part_snapshot_time: _,
|
||||
} = &data.blob_data
|
||||
{
|
||||
Some(*index_part_generation)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let res = branch_cleanup_and_check_errors(
|
||||
&remote_client_inner,
|
||||
&ttid,
|
||||
tenant_objects.clone(),
|
||||
None,
|
||||
None,
|
||||
Some(data),
|
||||
)
|
||||
.await;
|
||||
(ttid, tenant_objects.clone(), generation, res)
|
||||
}
|
||||
})
|
||||
.buffered(32);
|
||||
let mut last_tenant = None;
|
||||
let mut last_tenant_objects = None;
|
||||
let mut timeline_generations = HashMap::new();
|
||||
let mut stream = Box::pin(stream);
|
||||
while let Some((ttid, tenant_objects, generation, res)) = stream.next().await {
|
||||
if last_tenant != Some(ttid) {
|
||||
if let Some(tenant_id) = last_tenant {
|
||||
let timeline_generations = std::mem::take(&mut timeline_generations);
|
||||
identify_orphans(
|
||||
tenant_id.tenant_shard_id.tenant_id,
|
||||
last_tenant_objects.take().unwrap(),
|
||||
summary_inner.clone(),
|
||||
&timeline_generations,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
last_tenant = Some(ttid);
|
||||
last_tenant_objects = Some(tenant_objects);
|
||||
}
|
||||
if let Some(generation) = generation {
|
||||
timeline_generations.insert(ttid, generation);
|
||||
}
|
||||
{
|
||||
let mut guard = summary_inner.lock().await;
|
||||
guard.update_analysis(&ttid, &res, verbose);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(tenant_id) = last_tenant {
|
||||
identify_orphans(
|
||||
tenant_id.tenant_shard_id.tenant_id,
|
||||
last_tenant_objects.take().unwrap(),
|
||||
summary_inner.clone(),
|
||||
&timeline_generations,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok::<_, anyhow::Error>(())
|
||||
});
|
||||
// Generate a stream of TenantTimelineId
|
||||
let timelines = tenants.map_ok(|t| stream_tenant_timelines(&remote_client, &target, t));
|
||||
let timelines = timelines.try_buffered(CONCURRENCY);
|
||||
let timelines = timelines.try_flatten();
|
||||
|
||||
// Generate a stream of S3TimelineBlobData
|
||||
async fn report_on_timeline(
|
||||
@@ -387,94 +163,93 @@ pub async fn scan_pageserver_metadata(
|
||||
target: &RootTarget,
|
||||
ttid: TenantShardTimelineId,
|
||||
) -> anyhow::Result<(TenantShardTimelineId, RemoteTimelineBlobData)> {
|
||||
tracing::info!("listing blobs for timeline: {}", ttid);
|
||||
let data = list_timeline_blobs(remote_client, ttid, target).await?;
|
||||
Ok((ttid, data))
|
||||
}
|
||||
let timelines = timelines.map_ok(|ttid| report_on_timeline(&remote_client, &target, ttid));
|
||||
let mut timelines = std::pin::pin!(timelines.try_buffered(CONCURRENCY));
|
||||
|
||||
// We must gather all the TenantShardTimelineId->S3TimelineBlobData for each tenant, because different
|
||||
// shards in the same tenant might refer to one anothers' keys if a shard split has happened.
|
||||
|
||||
let mut tenant_id = None;
|
||||
let mut tenant_objects = TenantObjectListing::default();
|
||||
let mut tenant_timeline_results = Vec::new();
|
||||
|
||||
// DO NOT call any long-running tasks in this function; always route them through the channel and let
|
||||
// other tokio tasks handle them.
|
||||
async fn analyze_tenant(
|
||||
summary: Arc<tokio::sync::Mutex<MetadataSummary>>,
|
||||
tenant_objects: Arc<tokio::sync::Mutex<TenantObjectListing>>,
|
||||
remote_client: &GenericRemoteStorage,
|
||||
tenant_id: TenantId,
|
||||
summary: &mut MetadataSummary,
|
||||
mut tenant_objects: TenantObjectListing,
|
||||
timelines: Vec<(TenantShardTimelineId, RemoteTimelineBlobData)>,
|
||||
highest_shard_count: ShardCount,
|
||||
output_tx: &mut futures::channel::mpsc::Sender<(
|
||||
TenantShardTimelineId,
|
||||
Arc<tokio::sync::Mutex<TenantObjectListing>>,
|
||||
RemoteTimelineBlobData,
|
||||
)>,
|
||||
) -> anyhow::Result<()> {
|
||||
{
|
||||
let mut guard = summary.lock().await;
|
||||
guard.tenant_count += 1;
|
||||
}
|
||||
verbose: bool,
|
||||
) {
|
||||
summary.tenant_count += 1;
|
||||
|
||||
let mut timeline_ids = HashSet::new();
|
||||
let mut timeline_generations = HashMap::new();
|
||||
for (ttid, data) in timelines {
|
||||
async {
|
||||
if ttid.tenant_shard_id.shard_count == highest_shard_count {
|
||||
// Only analyze `TenantShardId`s with highest shard count.
|
||||
if ttid.tenant_shard_id.shard_count == highest_shard_count {
|
||||
// Only analyze `TenantShardId`s with highest shard count.
|
||||
|
||||
// Stash the generation of each timeline, for later use identifying orphan layers
|
||||
|
||||
if let BlobDataParseResult::Parsed {
|
||||
index_part,
|
||||
index_part_generation: _,
|
||||
s3_layers: _,
|
||||
index_part_last_modified_time: _,
|
||||
index_part_snapshot_time: _,
|
||||
} = &data.blob_data
|
||||
{
|
||||
if index_part.deleted_at.is_some() {
|
||||
// skip deleted timeline.
|
||||
tracing::info!("Skip analysis of {} b/c timeline is already deleted", ttid);
|
||||
return Ok(());
|
||||
// Stash the generation of each timeline, for later use identifying orphan layers
|
||||
if let BlobDataParseResult::Parsed {
|
||||
index_part,
|
||||
index_part_generation,
|
||||
s3_layers: _,
|
||||
index_part_last_modified_time: _,
|
||||
index_part_snapshot_time: _,
|
||||
} = &data.blob_data
|
||||
{
|
||||
if index_part.deleted_at.is_some() {
|
||||
// skip deleted timeline.
|
||||
tracing::info!(
|
||||
"Skip analysis of {} b/c timeline is already deleted",
|
||||
ttid
|
||||
);
|
||||
return;
|
||||
}
|
||||
timeline_generations.insert(ttid, *index_part_generation);
|
||||
}
|
||||
|
||||
// Apply checks to this timeline shard's metadata, and in the process update `tenant_objects`
|
||||
// reference counts for layers across the tenant.
|
||||
let analysis = branch_cleanup_and_check_errors(
|
||||
remote_client,
|
||||
&ttid,
|
||||
&mut tenant_objects,
|
||||
None,
|
||||
None,
|
||||
Some(data),
|
||||
)
|
||||
.await;
|
||||
summary.update_analysis(&ttid, &analysis, verbose);
|
||||
|
||||
timeline_ids.insert(ttid.timeline_id);
|
||||
} else {
|
||||
tracing::info!(
|
||||
"Skip analysis of {} b/c a lower shard count than {}",
|
||||
ttid,
|
||||
highest_shard_count.0,
|
||||
);
|
||||
}
|
||||
|
||||
// Apply checks to this timeline shard's metadata, and in the process update `tenant_objects`
|
||||
// reference counts for layers across the tenant.
|
||||
|
||||
output_tx.send((ttid, tenant_objects.clone(), data)).await?;
|
||||
|
||||
timeline_ids.insert(ttid.timeline_id);
|
||||
} else {
|
||||
tracing::info!(
|
||||
"Skip analysis of {} b/c a lower shard count than {}",
|
||||
ttid,
|
||||
highest_shard_count.0,
|
||||
);
|
||||
}
|
||||
Ok::<_, anyhow::Error>(())
|
||||
}.instrument(
|
||||
info_span!("analyze-timeline", shard = %ttid.tenant_shard_id.shard_slug(), timeline = %ttid.timeline_id),
|
||||
)
|
||||
.await?;
|
||||
.instrument(
|
||||
info_span!("analyze-timeline", shard = %ttid.tenant_shard_id.shard_slug(), timeline = %ttid.timeline_id),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
{
|
||||
let mut guard = summary.lock().await;
|
||||
guard.timeline_count += timeline_ids.len();
|
||||
}
|
||||
summary.timeline_count += timeline_ids.len();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn identify_orphans(
|
||||
tenant_id: TenantId,
|
||||
tenant_objects: Arc<tokio::sync::Mutex<TenantObjectListing>>,
|
||||
summary: Arc<tokio::sync::Mutex<MetadataSummary>>,
|
||||
timeline_generations: &HashMap<TenantShardTimelineId, Generation>,
|
||||
) {
|
||||
// Identifying orphan layers must be done on a tenant-wide basis, because individual
|
||||
// shards' layers may be referenced by other shards.
|
||||
//
|
||||
// Orphan layers are not a corruption, and not an indication of a problem. They are just
|
||||
// consuming some space in remote storage, and may be cleaned up at leisure.
|
||||
|
||||
let orphans = { tenant_objects.lock().await.get_orphans() };
|
||||
for (shard_index, timeline_id, layer_file, generation) in orphans {
|
||||
for (shard_index, timeline_id, layer_file, generation) in tenant_objects.get_orphans() {
|
||||
let ttid = TenantShardTimelineId {
|
||||
tenant_shard_id: TenantShardId {
|
||||
tenant_id,
|
||||
@@ -504,20 +279,83 @@ pub async fn scan_pageserver_metadata(
|
||||
|
||||
tracing::info!("Orphan layer detected: {orphan_path}");
|
||||
|
||||
{
|
||||
let mut guard = summary.lock().await;
|
||||
guard.notify_timeline_orphan(&ttid);
|
||||
}
|
||||
summary.notify_timeline_orphan(&ttid);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: bail out early if any of the tasks fail
|
||||
list_tenants.await??;
|
||||
list_timelines.await??;
|
||||
read_timelines.await??;
|
||||
consolidate_tenants.await??;
|
||||
analyze_tenants.await??;
|
||||
// Iterate through all the timeline results. These are in key-order, so
|
||||
// all results for the same tenant will be adjacent. We accumulate these,
|
||||
// and then call `analyze_tenant` to flush, when we see the next tenant ID.
|
||||
let mut summary = MetadataSummary::new();
|
||||
let mut highest_shard_count = ShardCount::MIN;
|
||||
while let Some(i) = timelines.next().await {
|
||||
let (ttid, data) = i?;
|
||||
summary.update_data(&data);
|
||||
|
||||
let summary = summary.lock().await;
|
||||
Ok(summary.clone())
|
||||
match tenant_id {
|
||||
Some(prev_tenant_id) => {
|
||||
if prev_tenant_id != ttid.tenant_shard_id.tenant_id {
|
||||
// New tenant: analyze this tenant's timelines, clear accumulated tenant_timeline_results
|
||||
let tenant_objects = std::mem::take(&mut tenant_objects);
|
||||
let timelines = std::mem::take(&mut tenant_timeline_results);
|
||||
analyze_tenant(
|
||||
&remote_client,
|
||||
prev_tenant_id,
|
||||
&mut summary,
|
||||
tenant_objects,
|
||||
timelines,
|
||||
highest_shard_count,
|
||||
verbose,
|
||||
)
|
||||
.instrument(info_span!("analyze-tenant", tenant = %prev_tenant_id))
|
||||
.await;
|
||||
tenant_id = Some(ttid.tenant_shard_id.tenant_id);
|
||||
highest_shard_count = ttid.tenant_shard_id.shard_count;
|
||||
} else {
|
||||
highest_shard_count = highest_shard_count.max(ttid.tenant_shard_id.shard_count);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
tenant_id = Some(ttid.tenant_shard_id.tenant_id);
|
||||
highest_shard_count = highest_shard_count.max(ttid.tenant_shard_id.shard_count);
|
||||
}
|
||||
}
|
||||
|
||||
match &data.blob_data {
|
||||
BlobDataParseResult::Parsed {
|
||||
index_part: _,
|
||||
index_part_generation: _index_part_generation,
|
||||
s3_layers,
|
||||
index_part_last_modified_time: _,
|
||||
index_part_snapshot_time: _,
|
||||
} => {
|
||||
tenant_objects.push(ttid, s3_layers.clone());
|
||||
}
|
||||
BlobDataParseResult::Relic => (),
|
||||
BlobDataParseResult::Incorrect {
|
||||
errors: _,
|
||||
s3_layers,
|
||||
} => {
|
||||
tenant_objects.push(ttid, s3_layers.clone());
|
||||
}
|
||||
}
|
||||
tenant_timeline_results.push((ttid, data));
|
||||
}
|
||||
|
||||
if !tenant_timeline_results.is_empty() {
|
||||
let tenant_id = tenant_id.expect("Must be set if results are present");
|
||||
analyze_tenant(
|
||||
&remote_client,
|
||||
tenant_id,
|
||||
&mut summary,
|
||||
tenant_objects,
|
||||
tenant_timeline_results,
|
||||
highest_shard_count,
|
||||
verbose,
|
||||
)
|
||||
.instrument(info_span!("analyze-tenant", tenant = %tenant_id))
|
||||
.await;
|
||||
}
|
||||
|
||||
Ok(summary)
|
||||
}
|
||||
|
||||
@@ -1194,7 +1194,8 @@ class NeonEnv:
|
||||
else:
|
||||
cfg["broker"]["listen_addr"] = self.broker.listen_addr()
|
||||
|
||||
cfg["control_plane_api"] = self.control_plane_api
|
||||
if self.control_plane_api is not None:
|
||||
cfg["control_plane_api"] = self.control_plane_api
|
||||
|
||||
if self.control_plane_hooks_api is not None:
|
||||
cfg["control_plane_hooks_api"] = self.control_plane_hooks_api
|
||||
@@ -1279,8 +1280,7 @@ class NeonEnv:
|
||||
)
|
||||
|
||||
tenant_config = ps_cfg.setdefault("tenant_config", {})
|
||||
# This feature is pending rollout.
|
||||
# tenant_config["rel_size_v2_enabled"] = True
|
||||
tenant_config["rel_size_v2_enabled"] = True # Enable relsize_v2 by default in tests
|
||||
|
||||
if self.pageserver_remote_storage is not None:
|
||||
ps_cfg["remote_storage"] = remote_storage_to_toml_dict(
|
||||
|
||||
@@ -186,7 +186,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
||||
"type": "interpreted",
|
||||
"args": {"format": "bincode", "compression": {"zstd": {"level": 1}}},
|
||||
},
|
||||
"rel_size_v2_enabled": True,
|
||||
"rel_size_v2_enabled": False, # test suite enables it by default as of https://github.com/neondatabase/neon/issues/11081, so, custom config means disabling it
|
||||
"gc_compaction_enabled": True,
|
||||
"gc_compaction_verification": False,
|
||||
"gc_compaction_initial_threshold_kb": 1024000,
|
||||
|
||||
@@ -229,7 +229,7 @@ def test_pageserver_gc_compaction_preempt(
|
||||
|
||||
|
||||
@skip_in_debug_build("only run with release build")
|
||||
@pytest.mark.timeout(900) # This test is slow with sanitizers enabled, especially on ARM
|
||||
@pytest.mark.timeout(600) # This test is slow with sanitizers enabled, especially on ARM
|
||||
@pytest.mark.parametrize(
|
||||
"with_branches",
|
||||
["with_branches", "no_branches"],
|
||||
|
||||
@@ -14,7 +14,7 @@ from fixtures.log_helper import log
|
||||
from fixtures.metrics import parse_metrics
|
||||
from fixtures.paths import BASE_DIR
|
||||
from fixtures.pg_config import PgConfigKey
|
||||
from fixtures.utils import WITH_SANITIZERS, subprocess_capture
|
||||
from fixtures.utils import subprocess_capture
|
||||
from werkzeug.wrappers.response import Response
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -148,15 +148,6 @@ def test_remote_extensions(
|
||||
pg_config: PgConfig,
|
||||
extension: RemoteExtension,
|
||||
):
|
||||
if WITH_SANITIZERS and extension is RemoteExtension.WITH_LIB:
|
||||
pytest.skip(
|
||||
"""
|
||||
For this test to work with sanitizers enabled, we would need to
|
||||
compile the dummy Postgres extension with the same CFLAGS that we
|
||||
compile Postgres and the neon extension with to link the sanitizers.
|
||||
"""
|
||||
)
|
||||
|
||||
# Setup a mock nginx S3 gateway which will return our test extension.
|
||||
(host, port) = httpserver_listen_address
|
||||
extensions_endpoint = f"http://{host}:{port}/pg-ext-s3-gateway"
|
||||
|
||||
@@ -1,147 +0,0 @@
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.utils import USE_LFC
|
||||
|
||||
|
||||
def check_pinned_entries(cur):
|
||||
# some LFC buffer can be temporary locked by autovacuum or background writer
|
||||
for _ in range(10):
|
||||
cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_chunks_pinned'")
|
||||
n_pinned = cur.fetchall()[0][0]
|
||||
if n_pinned == 0:
|
||||
break
|
||||
time.sleep(1)
|
||||
assert n_pinned == 0
|
||||
|
||||
|
||||
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
|
||||
def test_lfc_prewarm(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
n_records = 1000000
|
||||
|
||||
endpoint = env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
config_lines=[
|
||||
"autovacuum = off",
|
||||
"shared_buffers=1MB",
|
||||
"neon.max_file_cache_size=1GB",
|
||||
"neon.file_cache_size_limit=1GB",
|
||||
"neon.file_cache_prewarm_limit=1000",
|
||||
],
|
||||
)
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("create extension neon version '1.6'")
|
||||
cur.execute("create table t(pk integer primary key, payload text default repeat('?', 128))")
|
||||
cur.execute(f"insert into t (pk) values (generate_series(1,{n_records}))")
|
||||
cur.execute("select get_local_cache_state()")
|
||||
lfc_state = cur.fetchall()[0][0]
|
||||
|
||||
endpoint.stop()
|
||||
endpoint.start()
|
||||
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
time.sleep(1) # wait until compute_ctl complete downgrade of extension to default version
|
||||
cur.execute("alter extension neon update to '1.6'")
|
||||
cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
|
||||
|
||||
cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_used_pages'")
|
||||
lfc_used_pages = cur.fetchall()[0][0]
|
||||
log.info(f"Used LFC size: {lfc_used_pages}")
|
||||
cur.execute("select * from get_prewarm_info()")
|
||||
prewarm_info = cur.fetchall()[0]
|
||||
log.info(f"Prewarm info: {prewarm_info}")
|
||||
log.info(f"Prewarm progress: {(prewarm_info[1] + prewarm_info[2]) * 100 // prewarm_info[0]}%")
|
||||
|
||||
assert lfc_used_pages > 10000
|
||||
assert (
|
||||
prewarm_info[0] > 0
|
||||
and prewarm_info[1] > 0
|
||||
and prewarm_info[0] == prewarm_info[1] + prewarm_info[2]
|
||||
)
|
||||
|
||||
cur.execute("select sum(pk) from t")
|
||||
assert cur.fetchall()[0][0] == n_records * (n_records + 1) / 2
|
||||
|
||||
check_pinned_entries(cur)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
|
||||
def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
n_records = 10000
|
||||
n_threads = 4
|
||||
|
||||
endpoint = env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
config_lines=[
|
||||
"shared_buffers=1MB",
|
||||
"neon.max_file_cache_size=1GB",
|
||||
"neon.file_cache_size_limit=1GB",
|
||||
"neon.file_cache_prewarm_limit=1000000",
|
||||
],
|
||||
)
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("create extension neon version '1.6'")
|
||||
cur.execute(
|
||||
"create table accounts(id integer primary key, balance bigint default 0, payload text default repeat('?', 1000)) with (fillfactor=10)"
|
||||
)
|
||||
cur.execute(f"insert into accounts(id) values (generate_series(1,{n_records}))")
|
||||
cur.execute("select get_local_cache_state()")
|
||||
lfc_state = cur.fetchall()[0][0]
|
||||
|
||||
running = True
|
||||
|
||||
def workload():
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
n_transfers = 0
|
||||
while running:
|
||||
src = random.randint(1, n_records)
|
||||
dst = random.randint(1, n_records)
|
||||
cur.execute("update accounts set balance=balance-100 where id=%s", (src,))
|
||||
cur.execute("update accounts set balance=balance+100 where id=%s", (dst,))
|
||||
n_transfers += 1
|
||||
log.info(f"Number of transfers: {n_transfers}")
|
||||
|
||||
def prewarm():
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
n_prewarms = 0
|
||||
while running:
|
||||
cur.execute("alter system set neon.file_cache_size_limit='1MB'")
|
||||
cur.execute("select pg_reload_conf()")
|
||||
cur.execute("alter system set neon.file_cache_size_limit='1GB'")
|
||||
cur.execute("select pg_reload_conf()")
|
||||
cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
|
||||
n_prewarms += 1
|
||||
log.info(f"Number of prewarms: {n_prewarms}")
|
||||
|
||||
workload_threads = []
|
||||
for _ in range(n_threads):
|
||||
t = threading.Thread(target=workload)
|
||||
workload_threads.append(t)
|
||||
t.start()
|
||||
|
||||
prewarm_thread = threading.Thread(target=prewarm)
|
||||
prewarm_thread.start()
|
||||
|
||||
time.sleep(20)
|
||||
|
||||
running = False
|
||||
for t in workload_threads:
|
||||
t.join()
|
||||
prewarm_thread.join()
|
||||
|
||||
cur.execute("select sum(balance) from accounts")
|
||||
total_balance = cur.fetchall()[0][0]
|
||||
assert total_balance == 0
|
||||
|
||||
check_pinned_entries(cur)
|
||||
@@ -3,7 +3,7 @@
|
||||
Tests in this module exercise the pageserver's behavior around generation numbers,
|
||||
as defined in docs/rfcs/025-generation-numbers.md. Briefly, the behaviors we require
|
||||
of the pageserver are:
|
||||
- Do not start a tenant without a generation number
|
||||
- Do not start a tenant without a generation number if control_plane_api is set
|
||||
- Remote objects must be suffixed with generation
|
||||
- Deletions may only be executed after validating generation
|
||||
- Updates to remote_consistent_lsn may only be made visible after validating generation
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user