mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-31 09:10:38 +00:00
Compare commits
137 Commits
quantumish
...
conrad/ref
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
26be13067c | ||
|
|
791b5d736b | ||
|
|
96bcfba79e | ||
|
|
8e95455aef | ||
|
|
f3ef60d236 | ||
|
|
8f627ea0ab | ||
|
|
6a353c33e3 | ||
|
|
64d0008389 | ||
|
|
53a05e8ccb | ||
|
|
62c0152e6b | ||
|
|
7fef4435c1 | ||
|
|
43fd5b218b | ||
|
|
29ee273d78 | ||
|
|
8b0f2efa57 | ||
|
|
b309cbc6e9 | ||
|
|
f0c0733a64 | ||
|
|
8862e7c4bf | ||
|
|
b7fc5a2fe0 | ||
|
|
4559ba79b6 | ||
|
|
5dd24c7ad8 | ||
|
|
f2828bbe19 | ||
|
|
fb796229bf | ||
|
|
267fb49908 | ||
|
|
e2982ed3ec | ||
|
|
9e154a8130 | ||
|
|
79d72c94e8 | ||
|
|
80e5771c67 | ||
|
|
1178f6fe7c | ||
|
|
8b18d8b31b | ||
|
|
3e4cbaed67 | ||
|
|
c71aea0223 | ||
|
|
87915df2fa | ||
|
|
caca08fe78 | ||
|
|
0c99f16c60 | ||
|
|
dd7fff655a | ||
|
|
809633903d | ||
|
|
5c934efb29 | ||
|
|
5c9c3b3317 | ||
|
|
921a4f2009 | ||
|
|
eb93c3e3c6 | ||
|
|
7a7ab2a1d1 | ||
|
|
ff526a1051 | ||
|
|
9a2456bea5 | ||
|
|
a456e818af | ||
|
|
3e6fdb0aa6 | ||
|
|
f8d3f86f58 | ||
|
|
f67a8a173e | ||
|
|
2288efae66 | ||
|
|
4fedcbc0ac | ||
|
|
eb830fa547 | ||
|
|
a203f9829a | ||
|
|
42ab34dc36 | ||
|
|
30b877074c | ||
|
|
f18cc808f0 | ||
|
|
d14d8271b8 | ||
|
|
fecb707b19 | ||
|
|
296c9190b2 | ||
|
|
a5fe67f361 | ||
|
|
ee7bb1a667 | ||
|
|
9bba31bf68 | ||
|
|
380d167b7c | ||
|
|
cb991fba42 | ||
|
|
4566b12a22 | ||
|
|
63ca084696 | ||
|
|
379259bdd7 | ||
|
|
3300207523 | ||
|
|
a0a7733b5a | ||
|
|
f4245403b3 | ||
|
|
a8db7ebffb | ||
|
|
154f6dc59c | ||
|
|
15f633922a | ||
|
|
c34d36d8a2 | ||
|
|
cec0543b51 | ||
|
|
8aa9540a05 | ||
|
|
b91f821e8b | ||
|
|
44ea17b7b2 | ||
|
|
1b7339b53e | ||
|
|
3593fe195a | ||
|
|
c5aaf1ae21 | ||
|
|
13b5e7b26f | ||
|
|
dcdfe80bf0 | ||
|
|
8630d37f5e | ||
|
|
2fc77c836b | ||
|
|
2c6b327be6 | ||
|
|
be5bbaecad | ||
|
|
d33b3c7457 | ||
|
|
ffeede085e | ||
|
|
bdca5b500b | ||
|
|
f4b03ddd7b | ||
|
|
08b19f001c | ||
|
|
1a45b2ec90 | ||
|
|
13e38a58a1 | ||
|
|
2edd59aefb | ||
|
|
0b639ba608 | ||
|
|
28f604d628 | ||
|
|
fe0ddb7169 | ||
|
|
4bbabc092a | ||
|
|
12c26243fc | ||
|
|
2f71eda00f | ||
|
|
5ec82105cc | ||
|
|
78a6daa874 | ||
|
|
5c0de4ee8c | ||
|
|
bc6a756f1c | ||
|
|
8f3351fa91 | ||
|
|
e7d18bc188 | ||
|
|
4ee0da0a20 | ||
|
|
7049003cf7 | ||
|
|
3915995530 | ||
|
|
5ea0bb2d4f | ||
|
|
aac1f8efb1 | ||
|
|
43dbded8c8 | ||
|
|
c848b995b2 | ||
|
|
4dee2bfd82 | ||
|
|
09ff22a4d4 | ||
|
|
8223c1ba9d | ||
|
|
3dad4698ec | ||
|
|
81e7218c27 | ||
|
|
a06c560ad0 | ||
|
|
477ab12b69 | ||
|
|
f9b05a42d7 | ||
|
|
29d73e1404 | ||
|
|
8a042fb8ed | ||
|
|
f72115d0a9 | ||
|
|
7458d031b1 | ||
|
|
38384c37ac | ||
|
|
2b2a547671 | ||
|
|
59e393aef3 | ||
|
|
f51ed4a2c4 | ||
|
|
4f16ab3f56 | ||
|
|
18796fd1dd | ||
|
|
2f3fc7cb57 | ||
|
|
e65d5f7369 | ||
|
|
55aef2993d | ||
|
|
1eef961f09 | ||
|
|
fc10bb9438 | ||
|
|
4b5c75b52f | ||
|
|
ca9d8761ff |
@@ -33,6 +33,7 @@ workspace-members = [
|
||||
"compute_api",
|
||||
"consumption_metrics",
|
||||
"desim",
|
||||
"json",
|
||||
"metrics",
|
||||
"pageserver_api",
|
||||
"postgres_backend",
|
||||
|
||||
@@ -27,4 +27,4 @@
|
||||
!storage_controller/
|
||||
!vendor/postgres-*/
|
||||
!workspace_hack/
|
||||
!build_tools/patches
|
||||
!build-tools/patches
|
||||
|
||||
2
.github/actionlint.yml
vendored
2
.github/actionlint.yml
vendored
@@ -7,6 +7,7 @@ self-hosted-runner:
|
||||
- small-metal
|
||||
- small-arm64
|
||||
- unit-perf
|
||||
- unit-perf-aws-arm
|
||||
- us-east-2
|
||||
config-variables:
|
||||
- AWS_ECR_REGION
|
||||
@@ -30,6 +31,7 @@ config-variables:
|
||||
- NEON_PROD_AWS_ACCOUNT_ID
|
||||
- PGREGRESS_PG16_PROJECT_ID
|
||||
- PGREGRESS_PG17_PROJECT_ID
|
||||
- PREWARM_PGBENCH_SIZE
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
- REMOTE_STORAGE_AZURE_REGION
|
||||
- SLACK_CICD_CHANNEL_ID
|
||||
|
||||
@@ -176,7 +176,13 @@ runs:
|
||||
fi
|
||||
|
||||
if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then
|
||||
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
# We don't use code coverage for regression tests (the step is disabled),
|
||||
# so there's no need to collect it.
|
||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||
# cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||
cov_prefix=()
|
||||
# Explicitly set LLVM_PROFILE_FILE to /dev/null to avoid writing *.profraw files
|
||||
export LLVM_PROFILE_FILE=/dev/null
|
||||
else
|
||||
cov_prefix=()
|
||||
fi
|
||||
|
||||
@@ -150,7 +150,7 @@ jobs:
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v14
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v15 build
|
||||
id: cache_pg_15
|
||||
@@ -162,7 +162,7 @@ jobs:
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v15
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v16 build
|
||||
id: cache_pg_16
|
||||
@@ -174,7 +174,7 @@ jobs:
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v16
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||
|
||||
- name: Cache postgres v17 build
|
||||
id: cache_pg_17
|
||||
@@ -186,7 +186,7 @@ jobs:
|
||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||
use-fallback: false
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||
|
||||
- name: Build all
|
||||
# Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables
|
||||
|
||||
72
.github/workflows/benchmarking.yml
vendored
72
.github/workflows/benchmarking.yml
vendored
@@ -219,6 +219,7 @@ jobs:
|
||||
--ignore test_runner/performance/test_cumulative_statistics_persistence.py
|
||||
--ignore test_runner/performance/test_perf_many_relations.py
|
||||
--ignore test_runner/performance/test_perf_oltp_large_tenant.py
|
||||
--ignore test_runner/performance/test_lfc_prewarm.py
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
@@ -410,6 +411,77 @@ jobs:
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
prewarm-test:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
PGBENCH_SIZE: ${{ vars.PREWARM_PGBENCH_SIZE }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 17
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: "neon-staging"
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Run prewarm benchmark
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_lfc_prewarm.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 5400
|
||||
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
generate-matrices:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
|
||||
|
||||
@@ -72,7 +72,7 @@ jobs:
|
||||
ARCHS: ${{ inputs.archs || '["x64","arm64"]' }}
|
||||
DEBIANS: ${{ inputs.debians || '["bullseye","bookworm"]' }}
|
||||
IMAGE_TAG: |
|
||||
${{ hashFiles('build-tools.Dockerfile',
|
||||
${{ hashFiles('build-tools/Dockerfile',
|
||||
'.github/workflows/build-build-tools-image.yml') }}
|
||||
run: |
|
||||
echo "archs=${ARCHS}" | tee -a ${GITHUB_OUTPUT}
|
||||
@@ -144,7 +144,7 @@ jobs:
|
||||
|
||||
- uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
|
||||
with:
|
||||
file: build-tools.Dockerfile
|
||||
file: build-tools/Dockerfile
|
||||
context: .
|
||||
provenance: false
|
||||
push: true
|
||||
|
||||
50
.github/workflows/build_and_test.yml
vendored
50
.github/workflows/build_and_test.yml
vendored
@@ -87,6 +87,29 @@ jobs:
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
secrets: inherit
|
||||
|
||||
lint-yamls:
|
||||
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- run: make -C compute manifest-schema-validation
|
||||
- run: make lint-openapi-spec
|
||||
|
||||
check-codestyle-python:
|
||||
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||
# No need to run on `main` because we this in the merge queue. We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
@@ -199,28 +222,6 @@ jobs:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
secrets: inherit
|
||||
|
||||
validate-compute-manifest:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [ meta, check-permissions ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: '24'
|
||||
|
||||
- name: Validate manifest against schema
|
||||
run: |
|
||||
make -C compute manifest-schema-validation
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
@@ -306,14 +307,14 @@ jobs:
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: [ self-hosted, unit-perf ]
|
||||
runs-on: [ self-hosted, unit-perf-aws-arm ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
# for changed limits, see comments on `options:` earlier in this file
|
||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 --ulimit nofile=65536:65536 --security-opt seccomp=unconfined
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -986,6 +987,7 @@ jobs:
|
||||
- name: Verify docker-compose example and test extensions
|
||||
timeout-minutes: 60
|
||||
env:
|
||||
PARALLEL_COMPUTES: 3
|
||||
TAG: >-
|
||||
${{
|
||||
needs.meta.outputs.run-kind == 'compute-rc-pr'
|
||||
|
||||
4
.github/workflows/periodic_pagebench.yml
vendored
4
.github/workflows/periodic_pagebench.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Periodic pagebench performance test on unit-perf hetzner runner
|
||||
name: Periodic pagebench performance test on unit-perf-aws-arm runners
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: [ self-hosted, unit-perf ]
|
||||
runs-on: [ self-hosted, unit-perf-aws-arm ]
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
|
||||
4
.github/workflows/proxy-benchmark.yml
vendored
4
.github/workflows/proxy-benchmark.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Periodic proxy performance test on unit-perf hetzner runner
|
||||
name: Periodic proxy performance test on unit-perf-aws-arm runners
|
||||
|
||||
on:
|
||||
push: # TODO: remove after testing
|
||||
@@ -32,7 +32,7 @@ jobs:
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: [self-hosted, unit-perf]
|
||||
runs-on: [self-hosted, unit-perf-aws-arm]
|
||||
timeout-minutes: 60 # 1h timeout
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -15,7 +15,7 @@ neon.iml
|
||||
/.neon
|
||||
/integration_tests/.neon
|
||||
compaction-suite-results.*
|
||||
pgxn/neon/communicator/communicator_bindings.h
|
||||
docker-compose/docker-compose-parallel.yml
|
||||
|
||||
# Coverage
|
||||
*.profraw
|
||||
@@ -29,3 +29,6 @@ pgxn/neon/communicator/communicator_bindings.h
|
||||
|
||||
# pgindent typedef lists
|
||||
*.list
|
||||
|
||||
# Node
|
||||
**/node_modules/
|
||||
|
||||
8
.gitmodules
vendored
8
.gitmodules
vendored
@@ -1,16 +1,16 @@
|
||||
[submodule "vendor/postgres-v14"]
|
||||
path = vendor/postgres-v14
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
url = ../postgres.git
|
||||
branch = REL_14_STABLE_neon
|
||||
[submodule "vendor/postgres-v15"]
|
||||
path = vendor/postgres-v15
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
url = ../postgres.git
|
||||
branch = REL_15_STABLE_neon
|
||||
[submodule "vendor/postgres-v16"]
|
||||
path = vendor/postgres-v16
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
url = ../postgres.git
|
||||
branch = REL_16_STABLE_neon
|
||||
[submodule "vendor/postgres-v17"]
|
||||
path = vendor/postgres-v17
|
||||
url = https://github.com/neondatabase/postgres.git
|
||||
url = ../postgres.git
|
||||
branch = REL_17_STABLE_neon
|
||||
|
||||
347
Cargo.lock
generated
347
Cargo.lock
generated
@@ -247,32 +247,12 @@ dependencies = [
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atomic-take"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8ab6b55fe97976e46f91ddbed8d147d966475dc29b2032757ba47e02376fbc3"
|
||||
|
||||
[[package]]
|
||||
name = "atomic_enum"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99e1aca718ea7b89985790c94aad72d77533063fe00bc497bb79a7c2dae6a661"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
@@ -707,40 +687,13 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core 0.4.5",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"itoa",
|
||||
"matchit 0.7.3",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"serde",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tower 0.5.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
|
||||
dependencies = [
|
||||
"axum-core 0.5.0",
|
||||
"axum-core",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"form_urlencoded",
|
||||
@@ -748,10 +701,10 @@ dependencies = [
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"itoa",
|
||||
"matchit 0.8.4",
|
||||
"matchit",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
@@ -771,26 +724,6 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"mime",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.5.0"
|
||||
@@ -817,8 +750,8 @@ version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "460fc6f625a1f7705c6cf62d0d070794e94668988b1c38111baeec177c715f7b"
|
||||
dependencies = [
|
||||
"axum 0.8.1",
|
||||
"axum-core 0.5.0",
|
||||
"axum",
|
||||
"axum-core",
|
||||
"bytes",
|
||||
"form_urlencoded",
|
||||
"futures-util",
|
||||
@@ -1096,23 +1029,9 @@ checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.23.1"
|
||||
version = "1.16.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422"
|
||||
dependencies = [
|
||||
"bytemuck_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck_derive"
|
||||
version = "1.9.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ecc273b49b3205b83d648f0690daa588925572cc5063745bfe547fe7ec8e1a1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
@@ -1369,31 +1288,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "communicator"
|
||||
version = "0.0.0"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"atomic_enum",
|
||||
"axum 0.8.1",
|
||||
"bytes",
|
||||
"cbindgen",
|
||||
"clashmap",
|
||||
"http 1.1.0",
|
||||
"libc",
|
||||
"metrics",
|
||||
"neon-shmem",
|
||||
"nix 0.30.1",
|
||||
"pageserver_api",
|
||||
"pageserver_client_grpc",
|
||||
"pageserver_page_api",
|
||||
"prometheus",
|
||||
"prost 0.13.5",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-pipe",
|
||||
"tonic 0.12.3",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"uring-common",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
@@ -1423,7 +1321,7 @@ dependencies = [
|
||||
"aws-sdk-kms",
|
||||
"aws-sdk-s3",
|
||||
"aws-smithy-types",
|
||||
"axum 0.8.1",
|
||||
"axum",
|
||||
"axum-extra",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
@@ -1450,6 +1348,7 @@ dependencies = [
|
||||
"p256 0.13.2",
|
||||
"pageserver_page_api",
|
||||
"postgres",
|
||||
"postgres-types",
|
||||
"postgres_initdb",
|
||||
"postgres_versioninfo",
|
||||
"regex",
|
||||
@@ -1727,9 +1626,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.21"
|
||||
version = "0.8.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||
checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
|
||||
|
||||
[[package]]
|
||||
name = "crossterm"
|
||||
@@ -1973,6 +1872,7 @@ dependencies = [
|
||||
"diesel_derives",
|
||||
"itoa",
|
||||
"serde_json",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2183,7 +2083,7 @@ name = "endpoint_storage"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"axum 0.8.1",
|
||||
"axum",
|
||||
"axum-extra",
|
||||
"camino",
|
||||
"camino-tempfile",
|
||||
@@ -2444,12 +2344,6 @@ version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "foldhash"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.2.1"
|
||||
@@ -2470,7 +2364,7 @@ dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"http-body-util",
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"pin-project",
|
||||
"rand 0.8.5",
|
||||
@@ -2817,16 +2711,6 @@ version = "0.15.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.4"
|
||||
source = "git+https://github.com/quantumish/hashbrown.git?rev=6610e6d#6610e6d2b1f288ef7b0709a3efefbc846395dc5e"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"equivalent",
|
||||
"foldhash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashlink"
|
||||
version = "0.9.1"
|
||||
@@ -3051,9 +2935,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "httparse"
|
||||
version = "1.10.1"
|
||||
version = "1.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
|
||||
checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"
|
||||
|
||||
[[package]]
|
||||
name = "httpdate"
|
||||
@@ -3103,9 +2987,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "1.6.0"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
|
||||
checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
@@ -3145,7 +3029,7 @@ checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"rustls 0.22.4",
|
||||
"rustls-pki-types",
|
||||
@@ -3160,7 +3044,7 @@ version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793"
|
||||
dependencies = [
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
@@ -3169,21 +3053,20 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hyper-util"
|
||||
version = "0.1.14"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc2fdfdbff08affe55bb779f33b053aa1fe5dd5b54c257343c17edfa55711bdb"
|
||||
checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"hyper 1.6.0",
|
||||
"libc",
|
||||
"hyper 1.4.1",
|
||||
"pin-project-lite",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tower 0.4.13",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
@@ -3620,6 +3503,15 @@ dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"itoa",
|
||||
"ryu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "json-structural-diff"
|
||||
version = "0.2.0"
|
||||
@@ -3772,12 +3664,6 @@ dependencies = [
|
||||
"regex-automata 0.1.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.8.4"
|
||||
@@ -3886,7 +3772,7 @@ dependencies = [
|
||||
"prometheus",
|
||||
"rand 0.8.5",
|
||||
"rand_distr 0.4.3",
|
||||
"twox-hash 1.6.3",
|
||||
"twox-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3973,35 +3859,15 @@ checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
|
||||
name = "neon-shmem"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"atomic",
|
||||
"bytemuck",
|
||||
"criterion",
|
||||
"foldhash",
|
||||
"hashbrown 0.15.4",
|
||||
"libc",
|
||||
"lock_api",
|
||||
"nix 0.30.1",
|
||||
"rand 0.9.1",
|
||||
"rand_distr 0.5.1",
|
||||
"rustc-hash 2.1.1",
|
||||
"seahash",
|
||||
"tempfile",
|
||||
"thiserror 1.0.69",
|
||||
"twox-hash 2.1.1",
|
||||
"workspace_hack",
|
||||
"xxhash-rust",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "neonart"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"crossbeam-utils",
|
||||
"rand 0.9.1",
|
||||
"rand_distr 0.5.1",
|
||||
"spin",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4437,20 +4303,18 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"axum 0.8.1",
|
||||
"bytes",
|
||||
"camino",
|
||||
"clap",
|
||||
"futures",
|
||||
"hdrhistogram",
|
||||
"http 1.1.0",
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"metrics",
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"pageserver_client_grpc",
|
||||
"pageserver_page_api",
|
||||
"pprof",
|
||||
"rand 0.8.5",
|
||||
"reqwest",
|
||||
"serde",
|
||||
@@ -4479,6 +4343,7 @@ dependencies = [
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"remote_storage",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"svg_fmt",
|
||||
"thiserror 1.0.69",
|
||||
@@ -4496,6 +4361,7 @@ dependencies = [
|
||||
"arc-swap",
|
||||
"async-compression",
|
||||
"async-stream",
|
||||
"base64 0.22.1",
|
||||
"bincode",
|
||||
"bit_field",
|
||||
"byteorder",
|
||||
@@ -4533,7 +4399,6 @@ dependencies = [
|
||||
"pageserver_client",
|
||||
"pageserver_compaction",
|
||||
"pageserver_page_api",
|
||||
"peekable",
|
||||
"pem",
|
||||
"pin-project-lite",
|
||||
"postgres-protocol",
|
||||
@@ -4547,7 +4412,6 @@ dependencies = [
|
||||
"pprof",
|
||||
"pq_proto",
|
||||
"procfs",
|
||||
"prost 0.13.5",
|
||||
"rand 0.8.5",
|
||||
"range-set-blaze",
|
||||
"regex",
|
||||
@@ -4584,7 +4448,7 @@ dependencies = [
|
||||
"tower 0.5.2",
|
||||
"tracing",
|
||||
"tracing-utils",
|
||||
"twox-hash 1.6.3",
|
||||
"twox-hash",
|
||||
"url",
|
||||
"utils",
|
||||
"uuid",
|
||||
@@ -4657,30 +4521,18 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arc-swap",
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"chrono",
|
||||
"compute_api",
|
||||
"dashmap 5.5.0",
|
||||
"futures",
|
||||
"http 1.1.0",
|
||||
"hyper 1.6.0",
|
||||
"hyper-util",
|
||||
"metrics",
|
||||
"pageserver_api",
|
||||
"pageserver_page_api",
|
||||
"priority-queue",
|
||||
"rand 0.8.5",
|
||||
"scopeguard",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tonic 0.13.1",
|
||||
"tower 0.4.13",
|
||||
"tracing",
|
||||
"utils",
|
||||
"uuid",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4808,7 +4660,7 @@ dependencies = [
|
||||
"paste",
|
||||
"seq-macro",
|
||||
"thrift",
|
||||
"twox-hash 1.6.3",
|
||||
"twox-hash",
|
||||
"zstd",
|
||||
"zstd-sys",
|
||||
]
|
||||
@@ -4854,15 +4706,6 @@ dependencies = [
|
||||
"sha2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peekable"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "225f9651e475709164f871dc2f5724956be59cb9edb055372ffeeab01ec2d20b"
|
||||
dependencies = [
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pem"
|
||||
version = "3.0.3"
|
||||
@@ -5296,17 +5139,6 @@ dependencies = [
|
||||
"elliptic-curve 0.13.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "priority-queue"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5676d703dda103cbb035b653a9f11448c0a7216c7926bd35fcb5865475d0c970"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"equivalent",
|
||||
"indexmap 2.9.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.94"
|
||||
@@ -5476,6 +5308,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"atomic-take",
|
||||
"aws-config",
|
||||
"aws-credential-types",
|
||||
"aws-sdk-iam",
|
||||
"aws-sigv4",
|
||||
"base64 0.22.1",
|
||||
@@ -5507,7 +5340,7 @@ dependencies = [
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"hyper 0.14.30",
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"indexmap 2.9.0",
|
||||
"ipnet",
|
||||
@@ -5515,6 +5348,7 @@ dependencies = [
|
||||
"itoa",
|
||||
"jose-jwa",
|
||||
"jose-jwk",
|
||||
"json",
|
||||
"lasso",
|
||||
"measured",
|
||||
"metrics",
|
||||
@@ -5542,7 +5376,7 @@ dependencies = [
|
||||
"reqwest-tracing",
|
||||
"rsa",
|
||||
"rstest",
|
||||
"rustc-hash 1.1.0",
|
||||
"rustc-hash 2.1.1",
|
||||
"rustls 0.23.27",
|
||||
"rustls-native-certs 0.8.0",
|
||||
"rustls-pemfile 2.1.1",
|
||||
@@ -5572,6 +5406,7 @@ dependencies = [
|
||||
"tracing-test",
|
||||
"tracing-utils",
|
||||
"try-lock",
|
||||
"type-safe-id",
|
||||
"typed-json",
|
||||
"url",
|
||||
"urlencoding",
|
||||
@@ -5940,6 +5775,8 @@ dependencies = [
|
||||
"azure_identity",
|
||||
"azure_storage",
|
||||
"azure_storage_blobs",
|
||||
"base64 0.22.1",
|
||||
"byteorder",
|
||||
"bytes",
|
||||
"camino",
|
||||
"camino-tempfile",
|
||||
@@ -5948,7 +5785,7 @@ dependencies = [
|
||||
"http-body-util",
|
||||
"http-types",
|
||||
"humantime-serde",
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"itertools 0.10.5",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
@@ -5988,7 +5825,7 @@ dependencies = [
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"hyper-rustls 0.26.0",
|
||||
"hyper-util",
|
||||
"ipnet",
|
||||
@@ -6045,7 +5882,7 @@ dependencies = [
|
||||
"futures",
|
||||
"getrandom 0.2.11",
|
||||
"http 1.1.0",
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"parking_lot 0.11.2",
|
||||
"reqwest",
|
||||
"reqwest-middleware",
|
||||
@@ -6066,7 +5903,7 @@ dependencies = [
|
||||
"async-trait",
|
||||
"getrandom 0.2.11",
|
||||
"http 1.1.0",
|
||||
"matchit 0.8.4",
|
||||
"matchit",
|
||||
"opentelemetry",
|
||||
"reqwest",
|
||||
"reqwest-middleware",
|
||||
@@ -6431,6 +6268,7 @@ dependencies = [
|
||||
"itertools 0.10.5",
|
||||
"jsonwebtoken",
|
||||
"metrics",
|
||||
"nix 0.30.1",
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"parking_lot 0.12.1",
|
||||
@@ -6438,6 +6276,7 @@ dependencies = [
|
||||
"postgres-protocol",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
"postgres_ffi_types",
|
||||
"postgres_versioninfo",
|
||||
"pprof",
|
||||
"pq_proto",
|
||||
@@ -6482,7 +6321,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"const_format",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"postgres_ffi_types",
|
||||
"postgres_versioninfo",
|
||||
"pq_proto",
|
||||
"serde",
|
||||
@@ -6553,12 +6392,6 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "621e3680f3e07db4c9c2c3fb07c6223ab2fab2e54bd3c04c3ae037990f428c32"
|
||||
|
||||
[[package]]
|
||||
name = "seahash"
|
||||
version = "4.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
|
||||
|
||||
[[package]]
|
||||
name = "sec1"
|
||||
version = "0.3.0"
|
||||
@@ -7020,12 +6853,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "socket2"
|
||||
version = "0.5.10"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
|
||||
checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7033,9 +6866,6 @@ name = "spin"
|
||||
version = "0.9.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spinning_top"
|
||||
@@ -7094,7 +6924,7 @@ dependencies = [
|
||||
"http-body-util",
|
||||
"http-utils",
|
||||
"humantime",
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"metrics",
|
||||
"once_cell",
|
||||
@@ -7167,6 +6997,7 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"utils",
|
||||
"uuid",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
@@ -7230,6 +7061,7 @@ dependencies = [
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"reqwest",
|
||||
"safekeeper_api",
|
||||
"serde_json",
|
||||
"storage_controller_client",
|
||||
"tokio",
|
||||
@@ -7703,16 +7535,6 @@ dependencies = [
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-pipe"
|
||||
version = "0.2.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f213a84bffbd61b8fa0ba8a044b4bbe35d471d0b518867181e82bd5c15542784"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.10"
|
||||
@@ -7809,6 +7631,7 @@ dependencies = [
|
||||
"futures-core",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7907,25 +7730,16 @@ version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum 0.7.9",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"h2 0.4.4",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.6.0",
|
||||
"hyper-timeout",
|
||||
"hyper-util",
|
||||
"percent-encoding",
|
||||
"pin-project",
|
||||
"prost 0.13.5",
|
||||
"socket2",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tower 0.4.13",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
@@ -7938,7 +7752,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum 0.8.1",
|
||||
"axum",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"flate2",
|
||||
@@ -7946,7 +7760,7 @@ dependencies = [
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"hyper-timeout",
|
||||
"hyper-util",
|
||||
"percent-encoding",
|
||||
@@ -7999,16 +7813,11 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"indexmap 1.9.3",
|
||||
"pin-project",
|
||||
"pin-project-lite",
|
||||
"rand 0.8.5",
|
||||
"slab",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8280,12 +8089,16 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "twox-hash"
|
||||
version = "2.1.1"
|
||||
name = "type-safe-id"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56"
|
||||
checksum = "dd9267f90719e0433aae095640b294ff36ccbf89649ecb9ee34464ec504be157"
|
||||
dependencies = [
|
||||
"arrayvec",
|
||||
"rand 0.9.1",
|
||||
"serde",
|
||||
"thiserror 2.0.11",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8471,6 +8284,7 @@ dependencies = [
|
||||
"tracing-error",
|
||||
"tracing-subscriber",
|
||||
"tracing-utils",
|
||||
"uuid",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
@@ -8501,7 +8315,7 @@ name = "vm_monitor"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"axum 0.8.1",
|
||||
"axum",
|
||||
"cgroups-rs",
|
||||
"clap",
|
||||
"futures",
|
||||
@@ -8995,8 +8809,8 @@ dependencies = [
|
||||
"ahash",
|
||||
"anstream",
|
||||
"anyhow",
|
||||
"axum 0.8.1",
|
||||
"axum-core 0.5.0",
|
||||
"axum",
|
||||
"axum-core",
|
||||
"base64 0.21.7",
|
||||
"base64ct",
|
||||
"bytes",
|
||||
@@ -9018,8 +8832,10 @@ dependencies = [
|
||||
"fail",
|
||||
"form_urlencoded",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-executor",
|
||||
"futures-io",
|
||||
"futures-sink",
|
||||
"futures-util",
|
||||
"generic-array",
|
||||
"getrandom 0.2.11",
|
||||
@@ -9028,7 +8844,7 @@ dependencies = [
|
||||
"hex",
|
||||
"hmac",
|
||||
"hyper 0.14.30",
|
||||
"hyper 1.6.0",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"indexmap 2.9.0",
|
||||
"itertools 0.12.1",
|
||||
@@ -9088,7 +8904,6 @@ dependencies = [
|
||||
"tracing-log",
|
||||
"tracing-subscriber",
|
||||
"url",
|
||||
"uuid",
|
||||
"zeroize",
|
||||
"zstd",
|
||||
"zstd-safe",
|
||||
@@ -9153,12 +8968,6 @@ version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd"
|
||||
|
||||
[[package]]
|
||||
name = "xxhash-rust"
|
||||
version = "0.8.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
|
||||
|
||||
[[package]]
|
||||
name = "yasna"
|
||||
version = "0.5.2"
|
||||
|
||||
14
Cargo.toml
14
Cargo.toml
@@ -35,7 +35,6 @@ members = [
|
||||
"libs/pq_proto",
|
||||
"libs/tenant_size_model",
|
||||
"libs/metrics",
|
||||
"libs/neonart",
|
||||
"libs/postgres_connection",
|
||||
"libs/remote_storage",
|
||||
"libs/tracing-utils",
|
||||
@@ -44,6 +43,7 @@ members = [
|
||||
"libs/walproposer",
|
||||
"libs/wal_decoder",
|
||||
"libs/postgres_initdb",
|
||||
"libs/proxy/json",
|
||||
"libs/proxy/postgres-protocol2",
|
||||
"libs/proxy/postgres-types2",
|
||||
"libs/proxy/tokio-postgres2",
|
||||
@@ -92,7 +92,6 @@ clap = { version = "4.0", features = ["derive", "env"] }
|
||||
clashmap = { version = "1.0", features = ["raw-api"] }
|
||||
comfy-table = "7.1"
|
||||
const_format = "0.2"
|
||||
crossbeam-utils = "0.8.21"
|
||||
crc32c = "0.6"
|
||||
diatomic-waker = { version = "0.2.3" }
|
||||
either = "1.8"
|
||||
@@ -131,6 +130,7 @@ jemalloc_pprof = { version = "0.7", features = ["symbolize", "flamegraph"] }
|
||||
jsonwebtoken = "9"
|
||||
lasso = "0.7"
|
||||
libc = "0.2"
|
||||
lock_api = "0.4.13"
|
||||
md5 = "0.7.0"
|
||||
measured = { version = "0.0.22", features=["lasso"] }
|
||||
measured-process = { version = "0.0.22" }
|
||||
@@ -151,7 +151,6 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "53"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pem = "3.0.3"
|
||||
peekable = "0.3.0"
|
||||
pin-project-lite = "0.2"
|
||||
pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
|
||||
procfs = "0.16"
|
||||
@@ -167,7 +166,7 @@ reqwest-middleware = "0.4"
|
||||
reqwest-retry = "0.7"
|
||||
routerify = "3"
|
||||
rpds = "0.13"
|
||||
rustc-hash = "1.1.0"
|
||||
rustc-hash = "2.1.1"
|
||||
rustls = { version = "0.23.16", default-features = false }
|
||||
rustls-pemfile = "2"
|
||||
rustls-pki-types = "1.11"
|
||||
@@ -188,7 +187,6 @@ smallvec = "1.11"
|
||||
smol_str = { version = "0.2.0", features = ["serde"] }
|
||||
socket2 = "0.5"
|
||||
spki = "0.7.3"
|
||||
spin = "0.9.8"
|
||||
strum = "0.26"
|
||||
strum_macros = "0.26"
|
||||
"subtle" = "2.5.0"
|
||||
@@ -200,10 +198,11 @@ thiserror = "1.0"
|
||||
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
|
||||
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
|
||||
tokio = { version = "1.43.1", features = ["macros"] }
|
||||
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
|
||||
tokio-io-timeout = "1.2.0"
|
||||
tokio-postgres-rustls = "0.12.0"
|
||||
tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
|
||||
tokio-stream = "0.1"
|
||||
tokio-stream = { version = "0.1", features = ["sync"] }
|
||||
tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7.10", features = ["io", "io-util", "rt"] }
|
||||
toml = "0.8"
|
||||
@@ -241,9 +240,6 @@ x509-cert = { version = "0.2.5" }
|
||||
env_logger = "0.11"
|
||||
log = "0.4"
|
||||
|
||||
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
|
||||
uring-common = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }
|
||||
|
||||
19
Makefile
19
Makefile
@@ -2,7 +2,7 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
# Where to install Postgres, default is ./pg_install, maybe useful for package
|
||||
# managers.
|
||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
|
||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install
|
||||
|
||||
# Supported PostgreSQL versions
|
||||
POSTGRES_VERSIONS = v17 v16 v15 v14
|
||||
@@ -14,7 +14,7 @@ POSTGRES_VERSIONS = v17 v16 v15 v14
|
||||
# it is derived from BUILD_TYPE.
|
||||
|
||||
# All intermediate build artifacts are stored here.
|
||||
BUILD_DIR := build
|
||||
BUILD_DIR := $(ROOT_PROJECT_DIR)/build
|
||||
|
||||
ICU_PREFIX_DIR := /usr/local/icu
|
||||
|
||||
@@ -212,7 +212,7 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
||||
INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
||||
-C $(BUILD_DIR)/neon-v17 \
|
||||
-C $(BUILD_DIR)/pgxn-v17/neon \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
|
||||
|
||||
|
||||
@@ -220,6 +220,19 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
||||
setup-pre-commit-hook:
|
||||
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
|
||||
|
||||
build-tools/node_modules: build-tools/package.json
|
||||
cd build-tools && $(if $(CI),npm ci,npm install)
|
||||
touch build-tools/node_modules
|
||||
|
||||
.PHONY: lint-openapi-spec
|
||||
lint-openapi-spec: build-tools/node_modules
|
||||
# operation-2xx-response: pageserver timeline delete returns 404 on success
|
||||
find . -iname "openapi_spec.y*ml" -exec\
|
||||
npx --prefix=build-tools/ redocly\
|
||||
--skip-rule=operation-operationId --skip-rule=operation-summary --extends=minimal\
|
||||
--skip-rule=no-server-example.com --skip-rule=operation-2xx-response\
|
||||
lint {} \+
|
||||
|
||||
# Targets for building PostgreSQL are defined in postgres.mk.
|
||||
#
|
||||
# But if the caller has indicated that PostgreSQL is already
|
||||
|
||||
@@ -35,7 +35,7 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
|
||||
echo -e "retry_connrefused=on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
|
||||
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
|
||||
|
||||
COPY build_tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
|
||||
COPY build-tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
|
||||
|
||||
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
|
||||
set -e && \
|
||||
@@ -188,6 +188,12 @@ RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
|
||||
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# Install node
|
||||
ENV NODE_VERSION=24
|
||||
RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \
|
||||
&& apt install -y nodejs \
|
||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# Install docker
|
||||
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
|
||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable" > /etc/apt/sources.list.d/docker.list \
|
||||
@@ -311,14 +317,14 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
. "$HOME/.cargo/env" && \
|
||||
cargo --version && rustup --version && \
|
||||
rustup component add llvm-tools rustfmt clippy && \
|
||||
cargo install rustfilt --version ${RUSTFILT_VERSION} --locked && \
|
||||
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} --locked && \
|
||||
cargo install cargo-deny --version ${CARGO_DENY_VERSION} --locked && \
|
||||
cargo install cargo-hack --version ${CARGO_HACK_VERSION} --locked && \
|
||||
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} --locked && \
|
||||
cargo install cargo-chef --version ${CARGO_CHEF_VERSION} --locked && \
|
||||
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} --locked \
|
||||
--features postgres-bundled --no-default-features && \
|
||||
cargo install rustfilt --locked --version ${RUSTFILT_VERSION} && \
|
||||
cargo install cargo-hakari --locked --version ${CARGO_HAKARI_VERSION} && \
|
||||
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
|
||||
cargo install cargo-hack --locked --version ${CARGO_HACK_VERSION} && \
|
||||
cargo install cargo-nextest --locked --version ${CARGO_NEXTEST_VERSION} && \
|
||||
cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
|
||||
cargo install diesel_cli --locked --version ${CARGO_DIESEL_CLI_VERSION} \
|
||||
--features postgres-bundled --no-default-features && \
|
||||
rm -rf /home/nonroot/.cargo/registry && \
|
||||
rm -rf /home/nonroot/.cargo/git
|
||||
|
||||
3189
build-tools/package-lock.json
generated
Normal file
3189
build-tools/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
8
build-tools/package.json
Normal file
8
build-tools/package.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"name": "build-tools",
|
||||
"private": true,
|
||||
"devDependencies": {
|
||||
"@redocly/cli": "1.34.4",
|
||||
"@sourcemeta/jsonschema": "10.0.0"
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,12 @@
|
||||
disallowed-methods = [
|
||||
"tokio::task::block_in_place",
|
||||
|
||||
# Allow this for now, to deny it later once we stop using Handle::block_on completely
|
||||
# "tokio::runtime::Handle::block_on",
|
||||
# use tokio_epoll_uring_ext instead
|
||||
"tokio_epoll_uring::thread_local_system",
|
||||
|
||||
# tokio-epoll-uring:
|
||||
# - allow-invalid because the method doesn't exist on macOS
|
||||
{ path = "tokio_epoll_uring::thread_local_system", replacement = "tokio_epoll_uring_ext module inside pageserver crate", allow-invalid = true }
|
||||
]
|
||||
|
||||
disallowed-macros = [
|
||||
|
||||
@@ -50,9 +50,9 @@ jsonnetfmt-format:
|
||||
jsonnetfmt --in-place $(jsonnet_files)
|
||||
|
||||
.PHONY: manifest-schema-validation
|
||||
manifest-schema-validation: node_modules
|
||||
node_modules/.bin/jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
|
||||
manifest-schema-validation: ../build-tools/node_modules
|
||||
npx --prefix=../build-tools/ jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
|
||||
|
||||
node_modules: package.json
|
||||
npm install
|
||||
touch node_modules
|
||||
../build-tools/node_modules: ../build-tools/package.json
|
||||
cd ../build-tools && $(if $(CI),npm ci,npm install)
|
||||
touch ../build-tools/node_modules
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#
|
||||
# build-tools: This contains Rust compiler toolchain and other tools needed at compile
|
||||
# time. This is also used for the storage builds. This image is defined in
|
||||
# build-tools.Dockerfile.
|
||||
# build-tools/Dockerfile.
|
||||
#
|
||||
# build-deps: Contains C compiler, other build tools, and compile-time dependencies
|
||||
# needed to compile PostgreSQL and most extensions. (Some extensions need
|
||||
@@ -115,7 +115,7 @@ ARG EXTENSIONS=all
|
||||
FROM $BASE_IMAGE_SHA AS build-deps
|
||||
ARG DEBIAN_VERSION
|
||||
|
||||
# Keep in sync with build-tools.Dockerfile
|
||||
# Keep in sync with build-tools/Dockerfile
|
||||
ENV PROTOC_VERSION=25.1
|
||||
|
||||
# Use strict mode for bash to catch errors early
|
||||
@@ -170,7 +170,29 @@ RUN case $DEBIAN_VERSION in \
|
||||
FROM build-deps AS pg-build
|
||||
ARG PG_VERSION
|
||||
COPY vendor/postgres-${PG_VERSION:?} postgres
|
||||
COPY compute/patches/postgres_fdw.patch .
|
||||
COPY compute/patches/pg_stat_statements_pg14-16.patch .
|
||||
COPY compute/patches/pg_stat_statements_pg17.patch .
|
||||
RUN cd postgres && \
|
||||
# Apply patches to some contrib extensions
|
||||
# For example, we need to grant EXECUTE on pg_stat_statements_reset() to {privileged_role_name}.
|
||||
# In vanilla Postgres this function is limited to Postgres role superuser.
|
||||
# In Neon we have {privileged_role_name} role that is not a superuser but replaces superuser in some cases.
|
||||
# We could add the additional grant statements to the Postgres repository but it would be hard to maintain,
|
||||
# whenever we need to pick up a new Postgres version and we want to limit the changes in our Postgres fork,
|
||||
# so we do it here.
|
||||
case "${PG_VERSION}" in \
|
||||
"v14" | "v15" | "v16") \
|
||||
patch -p1 < /pg_stat_statements_pg14-16.patch; \
|
||||
;; \
|
||||
"v17") \
|
||||
patch -p1 < /pg_stat_statements_pg17.patch; \
|
||||
;; \
|
||||
*) \
|
||||
# To do not forget to migrate patches to the next major version
|
||||
echo "No contrib patches for this PostgreSQL version" && exit 1;; \
|
||||
esac && \
|
||||
patch -p1 < /postgres_fdw.patch && \
|
||||
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
|
||||
--with-icu --with-libxml --with-libxslt --with-lz4" && \
|
||||
if [ "${PG_VERSION:?}" != "v14" ]; then \
|
||||
@@ -184,8 +206,6 @@ RUN cd postgres && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgres_fdw.control && \
|
||||
file=/usr/local/pgsql/share/extension/postgres_fdw--1.0.sql && [ -e $file ] && \
|
||||
echo 'GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO neon_superuser;' >> $file && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
|
||||
@@ -195,34 +215,7 @@ RUN cd postgres && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control && \
|
||||
# We need to grant EXECUTE on pg_stat_statements_reset() to neon_superuser.
|
||||
# In vanilla postgres this function is limited to Postgres role superuser.
|
||||
# In neon we have neon_superuser role that is not a superuser but replaces superuser in some cases.
|
||||
# We could add the additional grant statements to the postgres repository but it would be hard to maintain,
|
||||
# whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
|
||||
# so we do it here.
|
||||
for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
|
||||
filename=$(basename "$file"); \
|
||||
# Note that there are no downgrade scripts for pg_stat_statements, so we \
|
||||
# don't have to modify any downgrade paths or (much) older versions: we only \
|
||||
# have to make sure every creation of the pg_stat_statements_reset function \
|
||||
# also adds execute permissions to the neon_superuser.
|
||||
case $filename in \
|
||||
pg_stat_statements--1.4.sql) \
|
||||
# pg_stat_statements_reset is first created with 1.4
|
||||
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
|
||||
;; \
|
||||
pg_stat_statements--1.6--1.7.sql) \
|
||||
# Then with the 1.6-1.7 migration it is re-created with a new signature, thus add the permissions back
|
||||
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
|
||||
;; \
|
||||
pg_stat_statements--1.10--1.11.sql) \
|
||||
# Then with the 1.10-1.11 migration it is re-created with a new signature again, thus add the permissions back
|
||||
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO neon_superuser;' >> $file; \
|
||||
;; \
|
||||
esac; \
|
||||
done;
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control
|
||||
|
||||
# Set PATH for all the subsequent build steps
|
||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||
@@ -1524,7 +1517,7 @@ WORKDIR /ext-src
|
||||
COPY compute/patches/pg_duckdb_v031.patch .
|
||||
COPY compute/patches/duckdb_v120.patch .
|
||||
# pg_duckdb build requires source dir to be a git repo to get submodules
|
||||
# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only:
|
||||
# allow {privileged_role_name} to execute some functions that in pg_duckdb are available to superuser only:
|
||||
# - extension management function duckdb.install_extension()
|
||||
# - access to duckdb.extensions table and its sequence
|
||||
RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
|
||||
@@ -1790,7 +1783,7 @@ RUN set -e \
|
||||
#########################################################################################
|
||||
FROM build-deps AS exporters
|
||||
ARG TARGETARCH
|
||||
# Keep sql_exporter version same as in build-tools.Dockerfile and
|
||||
# Keep sql_exporter version same as in build-tools/Dockerfile and
|
||||
# test_runner/regress/test_compute_metrics.py
|
||||
# See comment on the top of the file regading `echo`, `-e` and `\n`
|
||||
RUN if [ "$TARGETARCH" = "amd64" ]; then\
|
||||
@@ -1915,10 +1908,10 @@ RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /e
|
||||
|
||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||
RUN echo /usr/local/pgsql/lib > /etc/ld.so.conf.d/00-neon.conf && /sbin/ldconfig
|
||||
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq \
|
||||
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq parallel \
|
||||
&& apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
|
||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||
ENV PGHOST=compute
|
||||
ENV PGHOST=compute1
|
||||
ENV PGPORT=55433
|
||||
ENV PGUSER=cloud_admin
|
||||
ENV PGDATABASE=postgres
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
{
|
||||
"name": "neon-compute",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@sourcemeta/jsonschema": "9.3.4"
|
||||
}
|
||||
}
|
||||
@@ -1,22 +1,26 @@
|
||||
diff --git a/sql/anon.sql b/sql/anon.sql
|
||||
index 0cdc769..b450327 100644
|
||||
index 0cdc769..5eab1d6 100644
|
||||
--- a/sql/anon.sql
|
||||
+++ b/sql/anon.sql
|
||||
@@ -1141,3 +1141,15 @@ $$
|
||||
@@ -1141,3 +1141,19 @@ $$
|
||||
-- TODO : https://en.wikipedia.org/wiki/L-diversity
|
||||
|
||||
-- TODO : https://en.wikipedia.org/wiki/T-closeness
|
||||
+
|
||||
+-- NEON Patches
|
||||
+
|
||||
+GRANT ALL ON SCHEMA anon to neon_superuser;
|
||||
+GRANT ALL ON ALL TABLES IN SCHEMA anon TO neon_superuser;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ IF current_setting('server_version_num')::int >= 150000 THEN
|
||||
+ GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO neon_superuser;
|
||||
+ END IF;
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT ALL ON SCHEMA anon to %I', privileged_role_name);
|
||||
+ EXECUTE format('GRANT ALL ON ALL TABLES IN SCHEMA anon TO %I', privileged_role_name);
|
||||
+
|
||||
+ IF current_setting('server_version_num')::int >= 150000 THEN
|
||||
+ EXECUTE format('GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO %I', privileged_role_name);
|
||||
+ END IF;
|
||||
+END $$;
|
||||
diff --git a/sql/init.sql b/sql/init.sql
|
||||
index 7da6553..9b6164b 100644
|
||||
|
||||
@@ -21,13 +21,21 @@ index 3235cc8..6b892bc 100644
|
||||
include Makefile.global
|
||||
|
||||
diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||
index d777d76..af60106 100644
|
||||
index d777d76..3b54396 100644
|
||||
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||
@@ -1056,3 +1056,6 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
|
||||
@@ -1056,3 +1056,14 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
|
||||
GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC;
|
||||
GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC;
|
||||
GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC;
|
||||
+GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO neon_superuser;
|
||||
+GRANT ALL ON TABLE duckdb.extensions TO neon_superuser;
|
||||
+GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO neon_superuser;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO %I', privileged_role_name);
|
||||
+ EXECUTE format('GRANT ALL ON TABLE duckdb.extensions TO %I', privileged_role_name);
|
||||
+ EXECUTE format('GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
|
||||
34
compute/patches/pg_stat_statements_pg14-16.patch
Normal file
34
compute/patches/pg_stat_statements_pg14-16.patch
Normal file
@@ -0,0 +1,34 @@
|
||||
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
index 58cdf600fce..8be57a996f6 100644
|
||||
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
|
||||
|
||||
-- Don't want this to be available to non-superusers.
|
||||
REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
index 6fc3fed4c93..256345a8f79 100644
|
||||
--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
|
||||
|
||||
-- Don't want this to be available to non-superusers.
|
||||
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
52
compute/patches/pg_stat_statements_pg17.patch
Normal file
52
compute/patches/pg_stat_statements_pg17.patch
Normal file
@@ -0,0 +1,52 @@
|
||||
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
|
||||
index 0bb2c397711..32764db1d8b 100644
|
||||
--- a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
|
||||
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
|
||||
@@ -80,3 +80,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
|
||||
|
||||
-- Don't want this to be available to non-superusers.
|
||||
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) FROM PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
\ No newline at end of file
|
||||
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
index 58cdf600fce..8be57a996f6 100644
|
||||
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||
@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
|
||||
|
||||
-- Don't want this to be available to non-superusers.
|
||||
REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
index 6fc3fed4c93..256345a8f79 100644
|
||||
--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||
@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
|
||||
|
||||
-- Don't want this to be available to non-superusers.
|
||||
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
17
compute/patches/postgres_fdw.patch
Normal file
17
compute/patches/postgres_fdw.patch
Normal file
@@ -0,0 +1,17 @@
|
||||
diff --git a/contrib/postgres_fdw/postgres_fdw--1.0.sql b/contrib/postgres_fdw/postgres_fdw--1.0.sql
|
||||
index a0f0fc1bf45..ee077f2eea6 100644
|
||||
--- a/contrib/postgres_fdw/postgres_fdw--1.0.sql
|
||||
+++ b/contrib/postgres_fdw/postgres_fdw--1.0.sql
|
||||
@@ -16,3 +16,12 @@ LANGUAGE C STRICT;
|
||||
CREATE FOREIGN DATA WRAPPER postgres_fdw
|
||||
HANDLER postgres_fdw_handler
|
||||
VALIDATOR postgres_fdw_validator;
|
||||
+
|
||||
+DO $$
|
||||
+DECLARE
|
||||
+ privileged_role_name text;
|
||||
+BEGIN
|
||||
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||
+
|
||||
+ EXECUTE format('GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO %I', privileged_role_name);
|
||||
+END $$;
|
||||
@@ -66,7 +66,7 @@ url.workspace = true
|
||||
uuid.workspace = true
|
||||
walkdir.workspace = true
|
||||
x509-cert.workspace = true
|
||||
|
||||
postgres-types.workspace = true
|
||||
postgres_versioninfo.workspace = true
|
||||
postgres_initdb.workspace = true
|
||||
compute_api.workspace = true
|
||||
|
||||
@@ -46,11 +46,14 @@ stateDiagram-v2
|
||||
Configuration --> Failed : Failed to configure the compute
|
||||
Configuration --> Running : Compute has been configured
|
||||
Empty --> Init : Compute spec is immediately available
|
||||
Empty --> TerminationPending : Requested termination
|
||||
Empty --> TerminationPendingFast : Requested termination
|
||||
Empty --> TerminationPendingImmediate : Requested termination
|
||||
Init --> Failed : Failed to start Postgres
|
||||
Init --> Running : Started Postgres
|
||||
Running --> TerminationPending : Requested termination
|
||||
TerminationPending --> Terminated : Terminated compute
|
||||
Running --> TerminationPendingFast : Requested termination
|
||||
Running --> TerminationPendingImmediate : Requested termination
|
||||
TerminationPendingFast --> Terminated compute with 30s delay for cplane to inspect status
|
||||
TerminationPendingImmediate --> Terminated : Terminated compute immediately
|
||||
Failed --> [*] : Compute exited
|
||||
Terminated --> [*] : Compute exited
|
||||
```
|
||||
|
||||
@@ -87,6 +87,14 @@ struct Cli {
|
||||
#[arg(short = 'C', long, value_name = "DATABASE_URL")]
|
||||
pub connstr: String,
|
||||
|
||||
#[arg(
|
||||
long,
|
||||
default_value = "neon_superuser",
|
||||
value_name = "PRIVILEGED_ROLE_NAME",
|
||||
value_parser = Self::parse_privileged_role_name
|
||||
)]
|
||||
pub privileged_role_name: String,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
#[arg(long, default_value = "neon-postgres")]
|
||||
pub cgroup: String,
|
||||
@@ -149,6 +157,21 @@ impl Cli {
|
||||
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
/// For simplicity, we do not escape `privileged_role_name` anywhere in the code.
|
||||
/// Since it's a system role, which we fully control, that's fine. Still, let's
|
||||
/// validate it to avoid any surprises.
|
||||
fn parse_privileged_role_name(value: &str) -> Result<String> {
|
||||
use regex::Regex;
|
||||
|
||||
let pattern = Regex::new(r"^[a-z_]+$").unwrap();
|
||||
|
||||
if !pattern.is_match(value) {
|
||||
bail!("--privileged-role-name can only contain lowercase letters and underscores")
|
||||
}
|
||||
|
||||
Ok(value.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
@@ -178,6 +201,7 @@ fn main() -> Result<()> {
|
||||
ComputeNodeParams {
|
||||
compute_id: cli.compute_id,
|
||||
connstr,
|
||||
privileged_role_name: cli.privileged_role_name.clone(),
|
||||
pgdata: cli.pgdata.clone(),
|
||||
pgbin: cli.pgbin.clone(),
|
||||
pgversion: get_pg_version_string(&cli.pgbin),
|
||||
@@ -327,4 +351,49 @@ mod test {
|
||||
])
|
||||
.expect_err("URL parameters are not allowed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_privileged_role_name() {
|
||||
// Valid name
|
||||
let cli = Cli::parse_from([
|
||||
"compute_ctl",
|
||||
"--pgdata=test",
|
||||
"--connstr=test",
|
||||
"--compute-id=test",
|
||||
"--privileged-role-name",
|
||||
"my_superuser",
|
||||
]);
|
||||
assert_eq!(cli.privileged_role_name, "my_superuser");
|
||||
|
||||
// Invalid names
|
||||
Cli::try_parse_from([
|
||||
"compute_ctl",
|
||||
"--pgdata=test",
|
||||
"--connstr=test",
|
||||
"--compute-id=test",
|
||||
"--privileged-role-name",
|
||||
"NeonSuperuser",
|
||||
])
|
||||
.expect_err("uppercase letters are not allowed");
|
||||
|
||||
Cli::try_parse_from([
|
||||
"compute_ctl",
|
||||
"--pgdata=test",
|
||||
"--connstr=test",
|
||||
"--compute-id=test",
|
||||
"--privileged-role-name",
|
||||
"$'neon_superuser",
|
||||
])
|
||||
.expect_err("special characters are not allowed");
|
||||
|
||||
Cli::try_parse_from([
|
||||
"compute_ctl",
|
||||
"--pgdata=test",
|
||||
"--connstr=test",
|
||||
"--compute-id=test",
|
||||
"--privileged-role-name",
|
||||
"",
|
||||
])
|
||||
.expect_err("empty name is not allowed");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::{DateTime, Utc};
|
||||
use compute_api::privilege::Privilege;
|
||||
use compute_api::responses::{
|
||||
ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus, LfcOffloadState,
|
||||
LfcPrewarmState, TlsConfig,
|
||||
LfcPrewarmState, PromoteState, TlsConfig,
|
||||
};
|
||||
use compute_api::spec::{
|
||||
ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverConnectionInfo,
|
||||
PageserverShardConnectionInfo, PgIdent,
|
||||
ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverProtocol, PgIdent,
|
||||
};
|
||||
use futures::StreamExt;
|
||||
use futures::future::join_all;
|
||||
@@ -30,8 +29,7 @@ use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Condvar, Mutex, RwLock};
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{env, fs};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::{spawn, time};
|
||||
use tokio::{spawn, sync::watch, task::JoinHandle, time};
|
||||
use tracing::{Instrument, debug, error, info, instrument, warn};
|
||||
use url::Url;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
@@ -76,12 +74,20 @@ const DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL: u64 = 3600;
|
||||
|
||||
/// Static configuration params that don't change after startup. These mostly
|
||||
/// come from the CLI args, or are derived from them.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ComputeNodeParams {
|
||||
/// The ID of the compute
|
||||
pub compute_id: String,
|
||||
// Url type maintains proper escaping
|
||||
|
||||
/// Url type maintains proper escaping
|
||||
pub connstr: url::Url,
|
||||
|
||||
/// The name of the 'weak' superuser role, which we give to the users.
|
||||
/// It follows the allow list approach, i.e., we take a standard role
|
||||
/// and grant it extra permissions with explicit GRANTs here and there,
|
||||
/// and core patches.
|
||||
pub privileged_role_name: String,
|
||||
|
||||
pub resize_swap_on_bind: bool,
|
||||
pub set_disk_quota_for_fs: Option<String>,
|
||||
|
||||
@@ -176,6 +182,7 @@ pub struct ComputeState {
|
||||
/// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if
|
||||
/// mode == ComputeMode::Primary. None otherwise
|
||||
pub terminate_flush_lsn: Option<Lsn>,
|
||||
pub promote_state: Option<watch::Receiver<PromoteState>>,
|
||||
|
||||
pub metrics: ComputeMetrics,
|
||||
}
|
||||
@@ -193,6 +200,7 @@ impl ComputeState {
|
||||
lfc_prewarm_state: LfcPrewarmState::default(),
|
||||
lfc_offload_state: LfcOffloadState::default(),
|
||||
terminate_flush_lsn: None,
|
||||
promote_state: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -225,7 +233,7 @@ pub struct ParsedSpec {
|
||||
pub spec: ComputeSpec,
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub pageserver_conninfo: PageserverConnectionInfo,
|
||||
pub pageserver_connstr: String,
|
||||
pub safekeeper_connstrings: Vec<String>,
|
||||
pub storage_auth_token: Option<String>,
|
||||
/// k8s dns name and port
|
||||
@@ -272,27 +280,6 @@ impl ParsedSpec {
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_pageserver_conninfo_from_guc(
|
||||
pageserver_connstring_guc: &str,
|
||||
) -> PageserverConnectionInfo {
|
||||
PageserverConnectionInfo {
|
||||
shards: pageserver_connstring_guc
|
||||
.split(',')
|
||||
.enumerate()
|
||||
.map(|(i, connstr)| {
|
||||
(
|
||||
i as u32,
|
||||
PageserverShardConnectionInfo {
|
||||
libpq_url: Some(connstr.to_string()),
|
||||
grpc_url: None,
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect(),
|
||||
prefer_grpc: false,
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
type Error = String;
|
||||
fn try_from(spec: ComputeSpec) -> Result<Self, String> {
|
||||
@@ -302,17 +289,11 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
// For backwards-compatibility, the top-level fields in the spec file
|
||||
// may be empty. In that case, we need to dig them from the GUCs in the
|
||||
// cluster.settings field.
|
||||
let pageserver_conninfo = match &spec.pageserver_connection_info {
|
||||
Some(x) => x.clone(),
|
||||
None => {
|
||||
if let Some(guc) = spec.cluster.settings.find("neon.pageserver_connstring") {
|
||||
extract_pageserver_conninfo_from_guc(&guc)
|
||||
} else {
|
||||
return Err("pageserver connstr should be provided".to_string());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let pageserver_connstr = spec
|
||||
.pageserver_connstring
|
||||
.clone()
|
||||
.or_else(|| spec.cluster.settings.find("neon.pageserver_connstring"))
|
||||
.ok_or("pageserver connstr should be provided")?;
|
||||
let safekeeper_connstrings = if spec.safekeeper_connstrings.is_empty() {
|
||||
if matches!(spec.mode, ComputeMode::Primary) {
|
||||
spec.cluster
|
||||
@@ -362,7 +343,7 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
|
||||
let res = ParsedSpec {
|
||||
spec,
|
||||
pageserver_conninfo,
|
||||
pageserver_connstr,
|
||||
safekeeper_connstrings,
|
||||
storage_auth_token,
|
||||
tenant_id,
|
||||
@@ -452,7 +433,7 @@ impl ComputeNode {
|
||||
|
||||
let mut new_state = ComputeState::new();
|
||||
if let Some(spec) = config.spec {
|
||||
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow!(msg))?;
|
||||
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
|
||||
new_state.pspec = Some(pspec);
|
||||
}
|
||||
|
||||
@@ -983,14 +964,20 @@ impl ComputeNode {
|
||||
None
|
||||
};
|
||||
|
||||
let mut delay_exit = false;
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.terminate_flush_lsn = lsn;
|
||||
if let ComputeStatus::TerminationPending { mode } = state.status {
|
||||
|
||||
let delay_exit = state.status == ComputeStatus::TerminationPendingFast;
|
||||
if state.status == ComputeStatus::TerminationPendingFast
|
||||
|| state.status == ComputeStatus::TerminationPendingImmediate
|
||||
{
|
||||
info!(
|
||||
"Changing compute status from {} to {}",
|
||||
state.status,
|
||||
ComputeStatus::Terminated
|
||||
);
|
||||
state.status = ComputeStatus::Terminated;
|
||||
self.state_changed.notify_all();
|
||||
// we were asked to terminate gracefully, don't exit to avoid restart
|
||||
delay_exit = mode == compute_api::responses::TerminateMode::Fast
|
||||
}
|
||||
drop(state);
|
||||
|
||||
@@ -1053,13 +1040,16 @@ impl ComputeNode {
|
||||
fn try_get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
|
||||
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
|
||||
let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
|
||||
let started = Instant::now();
|
||||
let (connected, size) = if spec.pageserver_conninfo.prefer_grpc {
|
||||
self.try_get_basebackup_grpc(spec, lsn)?
|
||||
} else {
|
||||
self.try_get_basebackup_libpq(spec, lsn)?
|
||||
|
||||
let (connected, size) = match PageserverProtocol::from_connstring(shard0_connstr)? {
|
||||
PageserverProtocol::Libpq => self.try_get_basebackup_libpq(spec, lsn)?,
|
||||
PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,
|
||||
};
|
||||
|
||||
self.fix_zenith_signal_neon_signal()?;
|
||||
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.metrics.pageserver_connect_micros =
|
||||
connected.duration_since(started).as_micros() as u64;
|
||||
@@ -1069,24 +1059,44 @@ impl ComputeNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Move the Zenith signal file to Neon signal file location.
|
||||
/// This makes Compute compatible with older PageServers that don't yet
|
||||
/// know about the Zenith->Neon rename.
|
||||
fn fix_zenith_signal_neon_signal(&self) -> Result<()> {
|
||||
let datadir = Path::new(&self.params.pgdata);
|
||||
|
||||
let neonsig = datadir.join("neon.signal");
|
||||
|
||||
if neonsig.is_file() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let zenithsig = datadir.join("zenith.signal");
|
||||
|
||||
if zenithsig.is_file() {
|
||||
fs::copy(zenithsig, neonsig)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when
|
||||
/// the connection was established, and the (compressed) size of the basebackup.
|
||||
fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
|
||||
let shard0 = spec
|
||||
.pageserver_conninfo
|
||||
.shards
|
||||
.get(&0)
|
||||
.expect("shard 0 connection info missing");
|
||||
let shard0_url = shard0.grpc_url.clone().expect("no grpc_url for shard 0");
|
||||
|
||||
let shard_index = match spec.pageserver_conninfo.shards.len() as u8 {
|
||||
let shard0_connstr = spec
|
||||
.pageserver_connstr
|
||||
.split(',')
|
||||
.next()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let shard_index = match spec.pageserver_connstr.split(',').count() as u8 {
|
||||
0 | 1 => ShardIndex::unsharded(),
|
||||
count => ShardIndex::new(ShardNumber(0), ShardCount(count)),
|
||||
};
|
||||
|
||||
let (reader, connected) = tokio::runtime::Handle::current().block_on(async move {
|
||||
let mut client = page_api::Client::connect(
|
||||
shard0_url,
|
||||
shard0_connstr,
|
||||
spec.tenant_id,
|
||||
spec.timeline_id,
|
||||
shard_index,
|
||||
@@ -1121,13 +1131,8 @@ impl ComputeNode {
|
||||
/// Fetches a basebackup via libpq. The connstring must use postgresql://. Returns the timestamp
|
||||
/// when the connection was established, and the (compressed) size of the basebackup.
|
||||
fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
|
||||
let shard0 = spec
|
||||
.pageserver_conninfo
|
||||
.shards
|
||||
.get(&0)
|
||||
.expect("shard 0 connection info missing");
|
||||
let shard0_connstr = shard0.libpq_url.clone().expect("no libpq_url for shard 0");
|
||||
let mut config = postgres::Config::from_str(&shard0_connstr)?;
|
||||
let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
|
||||
let mut config = postgres::Config::from_str(shard0_connstr)?;
|
||||
|
||||
// Use the storage auth token from the config file, if given.
|
||||
// Note: this overrides any password set in the connection string.
|
||||
@@ -1213,7 +1218,10 @@ impl ComputeNode {
|
||||
return result;
|
||||
}
|
||||
Err(ref e) if attempts < max_attempts => {
|
||||
warn!("Failed to get basebackup: {e:?} (attempt {attempts}/{max_attempts})");
|
||||
warn!(
|
||||
"Failed to get basebackup: {} (attempt {}/{})",
|
||||
e, attempts, max_attempts
|
||||
);
|
||||
std::thread::sleep(std::time::Duration::from_millis(retry_period_ms as u64));
|
||||
retry_period_ms *= 1.5;
|
||||
}
|
||||
@@ -1286,9 +1294,7 @@ impl ComputeNode {
|
||||
|
||||
// In case of error, log and fail the check, but don't crash.
|
||||
// We're playing it safe because these errors could be transient
|
||||
// and we don't yet retry. Also being careful here allows us to
|
||||
// be backwards compatible with safekeepers that don't have the
|
||||
// TIMELINE_STATUS API yet.
|
||||
// and we don't yet retry.
|
||||
if responses.len() < quorum {
|
||||
error!(
|
||||
"failed sync safekeepers check {:?} {:?} {:?}",
|
||||
@@ -1391,6 +1397,7 @@ impl ComputeNode {
|
||||
self.create_pgdata()?;
|
||||
config::write_postgres_conf(
|
||||
pgdata_path,
|
||||
&self.params,
|
||||
&pspec.spec,
|
||||
self.params.internal_http_port,
|
||||
tls_config,
|
||||
@@ -1422,8 +1429,16 @@ impl ComputeNode {
|
||||
}
|
||||
};
|
||||
|
||||
self.get_basebackup(compute_state, lsn)
|
||||
.with_context(|| format!("failed to get basebackup@{lsn}"))?;
|
||||
info!(
|
||||
"getting basebackup@{} from pageserver {}",
|
||||
lsn, &pspec.pageserver_connstr
|
||||
);
|
||||
self.get_basebackup(compute_state, lsn).with_context(|| {
|
||||
format!(
|
||||
"failed to get basebackup@{} from pageserver {}",
|
||||
lsn, &pspec.pageserver_connstr
|
||||
)
|
||||
})?;
|
||||
|
||||
// Update pg_hba.conf received with basebackup.
|
||||
update_pg_hba(pgdata_path)?;
|
||||
@@ -1731,6 +1746,7 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
// Run migrations separately to not hold up cold starts
|
||||
let params = self.params.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut conf = conf.as_ref().clone();
|
||||
conf.application_name("compute_ctl:migrations");
|
||||
@@ -1742,7 +1758,7 @@ impl ComputeNode {
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
if let Err(e) = handle_migrations(&mut client).await {
|
||||
if let Err(e) = handle_migrations(params, &mut client).await {
|
||||
error!("Failed to run migrations: {}", e);
|
||||
}
|
||||
}
|
||||
@@ -1821,11 +1837,14 @@ impl ComputeNode {
|
||||
let pgdata_path = Path::new(&self.params.pgdata);
|
||||
config::write_postgres_conf(
|
||||
pgdata_path,
|
||||
&self.params,
|
||||
&spec,
|
||||
self.params.internal_http_port,
|
||||
tls_config,
|
||||
)?;
|
||||
|
||||
self.pg_reload_conf()?;
|
||||
|
||||
if !spec.skip_pg_catalog_updates {
|
||||
let max_concurrent_connections = spec.reconfigure_concurrency;
|
||||
// Temporarily reset max_cluster_size in config
|
||||
@@ -1845,10 +1864,9 @@ impl ComputeNode {
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
self.pg_reload_conf()?;
|
||||
}
|
||||
|
||||
self.pg_reload_conf()?;
|
||||
|
||||
let unknown_op = "unknown".to_string();
|
||||
let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
|
||||
info!(
|
||||
@@ -1921,7 +1939,8 @@ impl ComputeNode {
|
||||
|
||||
// exit loop
|
||||
ComputeStatus::Failed
|
||||
| ComputeStatus::TerminationPending { .. }
|
||||
| ComputeStatus::TerminationPendingFast
|
||||
| ComputeStatus::TerminationPendingImmediate
|
||||
| ComputeStatus::Terminated => break 'cert_update,
|
||||
|
||||
// wait
|
||||
@@ -2087,7 +2106,7 @@ LIMIT 100",
|
||||
self.params
|
||||
.remote_ext_base_url
|
||||
.as_ref()
|
||||
.ok_or(DownloadError::BadInput(anyhow!(
|
||||
.ok_or(DownloadError::BadInput(anyhow::anyhow!(
|
||||
"Remote extensions storage is not configured",
|
||||
)))?;
|
||||
|
||||
@@ -2283,7 +2302,7 @@ LIMIT 100",
|
||||
let remote_extensions = spec
|
||||
.remote_extensions
|
||||
.as_ref()
|
||||
.ok_or(anyhow!("Remote extensions are not configured"))?;
|
||||
.ok_or(anyhow::anyhow!("Remote extensions are not configured"))?;
|
||||
|
||||
info!("parse shared_preload_libraries from spec.cluster.settings");
|
||||
let mut libs_vec = Vec::new();
|
||||
@@ -2362,22 +2381,22 @@ LIMIT 100",
|
||||
/// The operation will time out after a specified duration.
|
||||
pub fn wait_timeout_while_pageserver_connstr_unchanged(&self, duration: Duration) {
|
||||
let state = self.state.lock().unwrap();
|
||||
let old_pageserver_conninfo = state
|
||||
let old_pageserver_connstr = state
|
||||
.pspec
|
||||
.as_ref()
|
||||
.expect("spec must be set")
|
||||
.pageserver_conninfo
|
||||
.pageserver_connstr
|
||||
.clone();
|
||||
let mut unchanged = true;
|
||||
let _ = self
|
||||
.state_changed
|
||||
.wait_timeout_while(state, duration, |s| {
|
||||
let pageserver_conninfo = &s
|
||||
let pageserver_connstr = &s
|
||||
.pspec
|
||||
.as_ref()
|
||||
.expect("spec must be set")
|
||||
.pageserver_conninfo;
|
||||
unchanged = pageserver_conninfo == &old_pageserver_conninfo;
|
||||
.pageserver_connstr;
|
||||
unchanged = pageserver_connstr == &old_pageserver_connstr;
|
||||
unchanged
|
||||
})
|
||||
.unwrap();
|
||||
@@ -2431,14 +2450,31 @@ LIMIT 100",
|
||||
pub fn spawn_lfc_offload_task(self: &Arc<Self>, interval: Duration) {
|
||||
self.terminate_lfc_offload_task();
|
||||
let secs = interval.as_secs();
|
||||
info!("spawning lfc offload worker with {secs}s interval");
|
||||
let this = self.clone();
|
||||
|
||||
info!("spawning LFC offload worker with {secs}s interval");
|
||||
let handle = spawn(async move {
|
||||
let mut interval = time::interval(interval);
|
||||
interval.tick().await; // returns immediately
|
||||
loop {
|
||||
interval.tick().await;
|
||||
this.offload_lfc_async().await;
|
||||
|
||||
let prewarm_state = this.state.lock().unwrap().lfc_prewarm_state.clone();
|
||||
// Do not offload LFC state if we are currently prewarming or any issue occurred.
|
||||
// If we'd do that, we might override the LFC state in endpoint storage with some
|
||||
// incomplete state. Imagine a situation:
|
||||
// 1. Endpoint started with `autoprewarm: true`
|
||||
// 2. While prewarming is not completed, we upload the new incomplete state
|
||||
// 3. Compute gets interrupted and restarts
|
||||
// 4. We start again and try to prewarm with the state from 2. instead of the previous complete state
|
||||
if matches!(
|
||||
prewarm_state,
|
||||
LfcPrewarmState::Completed
|
||||
| LfcPrewarmState::NotPrewarmed
|
||||
| LfcPrewarmState::Skipped
|
||||
) {
|
||||
this.offload_lfc_async().await;
|
||||
}
|
||||
}
|
||||
});
|
||||
*self.lfc_offload_task.lock().unwrap() = Some(handle);
|
||||
@@ -2455,19 +2491,11 @@ LIMIT 100",
|
||||
// If the value is -1, we never suspend so set the value to default collection.
|
||||
// If the value is 0, it means default, we will just continue to use the default.
|
||||
if spec.suspend_timeout_seconds == -1 || spec.suspend_timeout_seconds == 0 {
|
||||
info!(
|
||||
"[NEON_EXT_INT_UPD] Spec Timeout: {}, New Timeout: {}",
|
||||
spec.suspend_timeout_seconds, DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL
|
||||
);
|
||||
self.params.installed_extensions_collection_interval.store(
|
||||
DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL,
|
||||
std::sync::atomic::Ordering::SeqCst,
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"[NEON_EXT_INT_UPD] Spec Timeout: {}",
|
||||
spec.suspend_timeout_seconds
|
||||
);
|
||||
self.params.installed_extensions_collection_interval.store(
|
||||
spec.suspend_timeout_seconds as u64,
|
||||
std::sync::atomic::Ordering::SeqCst,
|
||||
@@ -2485,7 +2513,7 @@ pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
|
||||
serde_json::to_string(&extensions).expect("failed to serialize extensions list")
|
||||
);
|
||||
}
|
||||
Err(err) => error!("could not get installed extensions: {err:?}"),
|
||||
Err(err) => error!("could not get installed extensions: {err}"),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ impl ComputeNode {
|
||||
}
|
||||
};
|
||||
let row = match client
|
||||
.query_one("select * from get_prewarm_info()", &[])
|
||||
.query_one("select * from neon.get_prewarm_info()", &[])
|
||||
.await
|
||||
{
|
||||
Ok(row) => row,
|
||||
@@ -89,7 +89,7 @@ impl ComputeNode {
|
||||
self.state.lock().unwrap().lfc_offload_state.clone()
|
||||
}
|
||||
|
||||
/// If there is a prewarm request ongoing, return false, true otherwise
|
||||
/// If there is a prewarm request ongoing, return `false`, `true` otherwise.
|
||||
pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
|
||||
{
|
||||
let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
|
||||
@@ -101,14 +101,25 @@ impl ComputeNode {
|
||||
|
||||
let cloned = self.clone();
|
||||
spawn(async move {
|
||||
let Err(err) = cloned.prewarm_impl(from_endpoint).await else {
|
||||
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Completed;
|
||||
return;
|
||||
};
|
||||
error!(%err);
|
||||
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Failed {
|
||||
error: err.to_string(),
|
||||
let state = match cloned.prewarm_impl(from_endpoint).await {
|
||||
Ok(true) => LfcPrewarmState::Completed,
|
||||
Ok(false) => {
|
||||
info!(
|
||||
"skipping LFC prewarm because LFC state is not found in endpoint storage"
|
||||
);
|
||||
LfcPrewarmState::Skipped
|
||||
}
|
||||
Err(err) => {
|
||||
crate::metrics::LFC_PREWARM_ERRORS.inc();
|
||||
error!(%err, "could not prewarm LFC");
|
||||
|
||||
LfcPrewarmState::Failed {
|
||||
error: err.to_string(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
cloned.state.lock().unwrap().lfc_prewarm_state = state;
|
||||
});
|
||||
true
|
||||
}
|
||||
@@ -119,15 +130,21 @@ impl ComputeNode {
|
||||
EndpointStoragePair::from_spec_and_endpoint(state.pspec.as_ref().unwrap(), from_endpoint)
|
||||
}
|
||||
|
||||
async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<()> {
|
||||
/// Request LFC state from endpoint storage and load corresponding pages into Postgres.
|
||||
/// Returns a result with `false` if the LFC state is not found in endpoint storage.
|
||||
async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<bool> {
|
||||
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(from_endpoint)?;
|
||||
info!(%url, "requesting LFC state from endpoint storage");
|
||||
|
||||
info!(%url, "requesting LFC state from endpoint storage");
|
||||
let request = Client::new().get(&url).bearer_auth(token);
|
||||
let res = request.send().await.context("querying endpoint storage")?;
|
||||
let status = res.status();
|
||||
if status != StatusCode::OK {
|
||||
bail!("{status} querying endpoint storage")
|
||||
match status {
|
||||
StatusCode::OK => (),
|
||||
StatusCode::NOT_FOUND => {
|
||||
return Ok(false);
|
||||
}
|
||||
_ => bail!("{status} querying endpoint storage"),
|
||||
}
|
||||
|
||||
let mut uncompressed = Vec::new();
|
||||
@@ -140,15 +157,18 @@ impl ComputeNode {
|
||||
.await
|
||||
.context("decoding LFC state")?;
|
||||
let uncompressed_len = uncompressed.len();
|
||||
info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into postgres");
|
||||
|
||||
info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into Postgres");
|
||||
|
||||
ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
||||
.await
|
||||
.context("connecting to postgres")?
|
||||
.query_one("select prewarm_local_cache($1)", &[&uncompressed])
|
||||
.query_one("select neon.prewarm_local_cache($1)", &[&uncompressed])
|
||||
.await
|
||||
.context("loading LFC state into postgres")
|
||||
.map(|_| ())
|
||||
.map(|_| ())?;
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// If offload request is ongoing, return false, true otherwise
|
||||
@@ -176,11 +196,14 @@ impl ComputeNode {
|
||||
|
||||
async fn offload_lfc_with_state_update(&self) {
|
||||
crate::metrics::LFC_OFFLOADS.inc();
|
||||
|
||||
let Err(err) = self.offload_lfc_impl().await else {
|
||||
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
|
||||
return;
|
||||
};
|
||||
error!(%err);
|
||||
|
||||
crate::metrics::LFC_OFFLOAD_ERRORS.inc();
|
||||
error!(%err, "could not offload LFC state to endpoint storage");
|
||||
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
|
||||
error: err.to_string(),
|
||||
};
|
||||
@@ -188,13 +211,13 @@ impl ComputeNode {
|
||||
|
||||
async fn offload_lfc_impl(&self) -> Result<()> {
|
||||
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;
|
||||
info!(%url, "requesting LFC state from postgres");
|
||||
info!(%url, "requesting LFC state from Postgres");
|
||||
|
||||
let mut compressed = Vec::new();
|
||||
ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
||||
.await
|
||||
.context("connecting to postgres")?
|
||||
.query_one("select get_local_cache_state()", &[])
|
||||
.query_one("select neon.get_local_cache_state()", &[])
|
||||
.await
|
||||
.context("querying LFC state")?
|
||||
.try_get::<usize, &[u8]>(0)
|
||||
@@ -203,13 +226,17 @@ impl ComputeNode {
|
||||
.read_to_end(&mut compressed)
|
||||
.await
|
||||
.context("compressing LFC state")?;
|
||||
|
||||
let compressed_len = compressed.len();
|
||||
info!(%url, "downloaded LFC state, compressed size {compressed_len}, writing to endpoint storage");
|
||||
|
||||
let request = Client::new().put(url).bearer_auth(token).body(compressed);
|
||||
match request.send().await {
|
||||
Ok(res) if res.status() == StatusCode::OK => Ok(()),
|
||||
Ok(res) => bail!("Error writing to endpoint storage: {}", res.status()),
|
||||
Ok(res) => bail!(
|
||||
"Request to endpoint storage failed with status: {}",
|
||||
res.status()
|
||||
),
|
||||
Err(err) => Err(err).context("writing to endpoint storage"),
|
||||
}
|
||||
}
|
||||
|
||||
132
compute_tools/src/compute_promote.rs
Normal file
132
compute_tools/src/compute_promote.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
use crate::compute::ComputeNode;
|
||||
use anyhow::{Context, Result, bail};
|
||||
use compute_api::{
|
||||
responses::{LfcPrewarmState, PromoteState, SafekeepersLsn},
|
||||
spec::ComputeMode,
|
||||
};
|
||||
use std::{sync::Arc, time::Duration};
|
||||
use tokio::time::sleep;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
impl ComputeNode {
|
||||
/// Returns only when promote fails or succeeds. If a network error occurs
|
||||
/// and http client disconnects, this does not stop promotion, and subsequent
|
||||
/// calls block until promote finishes.
|
||||
/// Called by control plane on secondary after primary endpoint is terminated
|
||||
pub async fn promote(self: &Arc<Self>, safekeepers_lsn: SafekeepersLsn) -> PromoteState {
|
||||
let cloned = self.clone();
|
||||
let start_promotion = || {
|
||||
let (tx, rx) = tokio::sync::watch::channel(PromoteState::NotPromoted);
|
||||
tokio::spawn(async move {
|
||||
tx.send(match cloned.promote_impl(safekeepers_lsn).await {
|
||||
Ok(_) => PromoteState::Completed,
|
||||
Err(err) => {
|
||||
tracing::error!(%err, "promoting");
|
||||
PromoteState::Failed {
|
||||
error: err.to_string(),
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
rx
|
||||
};
|
||||
|
||||
let mut task;
|
||||
// self.state is unlocked after block ends so we lock it in promote_impl
|
||||
// and task.changed() is reached
|
||||
{
|
||||
task = self
|
||||
.state
|
||||
.lock()
|
||||
.unwrap()
|
||||
.promote_state
|
||||
.get_or_insert_with(start_promotion)
|
||||
.clone()
|
||||
}
|
||||
task.changed().await.expect("promote sender dropped");
|
||||
task.borrow().clone()
|
||||
}
|
||||
|
||||
// Why do we have to supply safekeepers?
|
||||
// For secondary we use primary_connection_conninfo so safekeepers field is empty
|
||||
async fn promote_impl(&self, safekeepers_lsn: SafekeepersLsn) -> Result<()> {
|
||||
{
|
||||
let state = self.state.lock().unwrap();
|
||||
let mode = &state.pspec.as_ref().unwrap().spec.mode;
|
||||
if *mode != ComputeMode::Replica {
|
||||
bail!("{} is not replica", mode.to_type_str());
|
||||
}
|
||||
|
||||
// we don't need to query Postgres so not self.lfc_prewarm_state()
|
||||
match &state.lfc_prewarm_state {
|
||||
LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming => {
|
||||
bail!("prewarm not requested or pending")
|
||||
}
|
||||
LfcPrewarmState::Failed { error } => {
|
||||
tracing::warn!(%error, "replica prewarm failed")
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
||||
.await
|
||||
.context("connecting to postgres")?;
|
||||
|
||||
let primary_lsn = safekeepers_lsn.wal_flush_lsn;
|
||||
let mut last_wal_replay_lsn: Lsn = Lsn::INVALID;
|
||||
const RETRIES: i32 = 20;
|
||||
for i in 0..=RETRIES {
|
||||
let row = client
|
||||
.query_one("SELECT pg_last_wal_replay_lsn()", &[])
|
||||
.await
|
||||
.context("getting last replay lsn")?;
|
||||
let lsn: u64 = row.get::<usize, postgres_types::PgLsn>(0).into();
|
||||
last_wal_replay_lsn = lsn.into();
|
||||
if last_wal_replay_lsn >= primary_lsn {
|
||||
break;
|
||||
}
|
||||
tracing::info!("Try {i}, replica lsn {last_wal_replay_lsn}, primary lsn {primary_lsn}");
|
||||
sleep(Duration::from_secs(1)).await;
|
||||
}
|
||||
if last_wal_replay_lsn < primary_lsn {
|
||||
bail!("didn't catch up with primary in {RETRIES} retries");
|
||||
}
|
||||
|
||||
// using $1 doesn't work with ALTER SYSTEM SET
|
||||
let safekeepers_sql = format!(
|
||||
"ALTER SYSTEM SET neon.safekeepers='{}'",
|
||||
safekeepers_lsn.safekeepers
|
||||
);
|
||||
client
|
||||
.query(&safekeepers_sql, &[])
|
||||
.await
|
||||
.context("setting safekeepers")?;
|
||||
client
|
||||
.query("SELECT pg_reload_conf()", &[])
|
||||
.await
|
||||
.context("reloading postgres config")?;
|
||||
let row = client
|
||||
.query_one("SELECT * FROM pg_promote()", &[])
|
||||
.await
|
||||
.context("pg_promote")?;
|
||||
if !row.get::<usize, bool>(0) {
|
||||
bail!("pg_promote() returned false");
|
||||
}
|
||||
|
||||
let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
||||
.await
|
||||
.context("connecting to postgres")?;
|
||||
let row = client
|
||||
.query_one("SHOW transaction_read_only", &[])
|
||||
.await
|
||||
.context("getting transaction_read_only")?;
|
||||
if row.get::<usize, &str>(0) == "on" {
|
||||
bail!("replica in read only mode after promotion");
|
||||
}
|
||||
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.pspec.as_mut().unwrap().spec.mode = ComputeMode::Primary;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -9,6 +9,7 @@ use std::path::Path;
|
||||
use compute_api::responses::TlsConfig;
|
||||
use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
|
||||
|
||||
use crate::compute::ComputeNodeParams;
|
||||
use crate::pg_helpers::{
|
||||
GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
|
||||
};
|
||||
@@ -41,6 +42,7 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
|
||||
/// Create or completely rewrite configuration file specified by `path`
|
||||
pub fn write_postgres_conf(
|
||||
pgdata_path: &Path,
|
||||
params: &ComputeNodeParams,
|
||||
spec: &ComputeSpec,
|
||||
extension_server_port: u16,
|
||||
tls_config: &Option<TlsConfig>,
|
||||
@@ -54,56 +56,15 @@ pub fn write_postgres_conf(
|
||||
writeln!(file, "{conf}")?;
|
||||
}
|
||||
|
||||
// Add options for connecting to storage
|
||||
writeln!(file, "# Neon storage settings")?;
|
||||
|
||||
if let Some(conninfo) = &spec.pageserver_connection_info {
|
||||
let mut libpq_urls: Option<Vec<String>> = Some(Vec::new());
|
||||
let mut grpc_urls: Option<Vec<String>> = Some(Vec::new());
|
||||
|
||||
for shardno in 0..conninfo.shards.len() {
|
||||
let info = conninfo.shards.get(&(shardno as u32)).ok_or_else(|| {
|
||||
anyhow::anyhow!("shard {shardno} missing from pageserver_connection_info shard map")
|
||||
})?;
|
||||
|
||||
if let Some(url) = &info.libpq_url {
|
||||
if let Some(ref mut urls) = libpq_urls {
|
||||
urls.push(url.clone());
|
||||
}
|
||||
} else {
|
||||
libpq_urls = None
|
||||
}
|
||||
if let Some(url) = &info.grpc_url {
|
||||
if let Some(ref mut urls) = grpc_urls {
|
||||
urls.push(url.clone());
|
||||
}
|
||||
} else {
|
||||
grpc_urls = None
|
||||
}
|
||||
}
|
||||
if let Some(libpq_urls) = libpq_urls {
|
||||
writeln!(
|
||||
file,
|
||||
"neon.pageserver_connstring={}",
|
||||
escape_conf_value(&libpq_urls.join(","))
|
||||
)?;
|
||||
} else {
|
||||
writeln!(file, "# no neon.pageserver_connstring")?;
|
||||
}
|
||||
if let Some(grpc_urls) = grpc_urls {
|
||||
writeln!(
|
||||
file,
|
||||
"neon.pageserver_grpc_urls={}",
|
||||
escape_conf_value(&grpc_urls.join(","))
|
||||
)?;
|
||||
} else {
|
||||
writeln!(file, "# no neon.pageserver_grpc_urls")?;
|
||||
}
|
||||
}
|
||||
|
||||
// Stripe size GUC should be defined prior to connection string
|
||||
if let Some(stripe_size) = spec.shard_stripe_size {
|
||||
writeln!(file, "neon.stripe_size={stripe_size}")?;
|
||||
}
|
||||
// Add options for connecting to storage
|
||||
writeln!(file, "# Neon storage settings")?;
|
||||
if let Some(s) = &spec.pageserver_connstring {
|
||||
writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
|
||||
}
|
||||
if !spec.safekeeper_connstrings.is_empty() {
|
||||
let mut neon_safekeepers_value = String::new();
|
||||
tracing::info!(
|
||||
@@ -203,6 +164,12 @@ pub fn write_postgres_conf(
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(
|
||||
file,
|
||||
"neon.privileged_role_name={}",
|
||||
escape_conf_value(params.privileged_role_name.as_str())
|
||||
)?;
|
||||
|
||||
// If there are any extra options in the 'settings' field, append those
|
||||
if spec.cluster.settings.is_some() {
|
||||
writeln!(file, "# Managed by compute_ctl: begin")?;
|
||||
|
||||
@@ -83,6 +83,87 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/DbsAndRoles"
|
||||
|
||||
/promote:
|
||||
post:
|
||||
tags:
|
||||
- Promotion
|
||||
summary: Promote secondary replica to primary
|
||||
description: ""
|
||||
operationId: promoteReplica
|
||||
requestBody:
|
||||
description: Promote requests data
|
||||
required: true
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/SafekeepersLsn"
|
||||
responses:
|
||||
200:
|
||||
description: Promote succeeded or wasn't started
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/PromoteState"
|
||||
500:
|
||||
description: Promote failed
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/PromoteState"
|
||||
|
||||
/lfc/prewarm:
|
||||
post:
|
||||
summary: Request LFC Prewarm
|
||||
parameters:
|
||||
- name: from_endpoint
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
description: ""
|
||||
operationId: lfcPrewarm
|
||||
responses:
|
||||
202:
|
||||
description: LFC prewarm started
|
||||
429:
|
||||
description: LFC prewarm ongoing
|
||||
get:
|
||||
tags:
|
||||
- Prewarm
|
||||
summary: Get LFC prewarm state
|
||||
description: ""
|
||||
operationId: getLfcPrewarmState
|
||||
responses:
|
||||
200:
|
||||
description: Prewarm state
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/LfcPrewarmState"
|
||||
|
||||
/lfc/offload:
|
||||
post:
|
||||
summary: Request LFC offload
|
||||
description: ""
|
||||
operationId: lfcOffload
|
||||
responses:
|
||||
202:
|
||||
description: LFC offload started
|
||||
429:
|
||||
description: LFC offload ongoing
|
||||
get:
|
||||
tags:
|
||||
- Prewarm
|
||||
summary: Get LFC offloading state
|
||||
description: ""
|
||||
operationId: getLfcOffloadState
|
||||
responses:
|
||||
200:
|
||||
description: Offload state
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/LfcOffloadState"
|
||||
|
||||
/database_schema:
|
||||
get:
|
||||
tags:
|
||||
@@ -290,9 +371,28 @@ paths:
|
||||
summary: Terminate Postgres and wait for it to exit
|
||||
description: ""
|
||||
operationId: terminate
|
||||
parameters:
|
||||
- name: mode
|
||||
in: query
|
||||
description: "Terminate mode: fast (wait 30s before returning) and immediate"
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
enum: ["fast", "immediate"]
|
||||
default: fast
|
||||
responses:
|
||||
200:
|
||||
description: Result
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/TerminateResponse"
|
||||
201:
|
||||
description: Result if compute is already terminated
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/TerminateResponse"
|
||||
412:
|
||||
description: "wrong state"
|
||||
content:
|
||||
@@ -335,15 +435,6 @@ components:
|
||||
total_startup_ms:
|
||||
type: integer
|
||||
|
||||
Info:
|
||||
type: object
|
||||
description: Information about VM/Pod.
|
||||
required:
|
||||
- num_cpus
|
||||
properties:
|
||||
num_cpus:
|
||||
type: integer
|
||||
|
||||
DbsAndRoles:
|
||||
type: object
|
||||
description: Databases and Roles
|
||||
@@ -458,11 +549,14 @@ components:
|
||||
type: string
|
||||
enum:
|
||||
- empty
|
||||
- init
|
||||
- failed
|
||||
- running
|
||||
- configuration_pending
|
||||
- init
|
||||
- running
|
||||
- configuration
|
||||
- failed
|
||||
- termination_pending_fast
|
||||
- termination_pending_immediate
|
||||
- terminated
|
||||
example: running
|
||||
|
||||
ExtensionInstallRequest:
|
||||
@@ -497,25 +591,69 @@ components:
|
||||
type: string
|
||||
example: "1.0.0"
|
||||
|
||||
InstalledExtensions:
|
||||
SafekeepersLsn:
|
||||
type: object
|
||||
required:
|
||||
- safekeepers
|
||||
- wal_flush_lsn
|
||||
properties:
|
||||
extensions:
|
||||
description: Contains list of installed extensions.
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
extname:
|
||||
type: string
|
||||
version:
|
||||
type: string
|
||||
items:
|
||||
type: string
|
||||
n_databases:
|
||||
type: integer
|
||||
owned_by_superuser:
|
||||
type: integer
|
||||
safekeepers:
|
||||
description: Primary replica safekeepers
|
||||
type: string
|
||||
wal_flush_lsn:
|
||||
description: Primary last WAL flush LSN
|
||||
type: string
|
||||
|
||||
LfcPrewarmState:
|
||||
type: object
|
||||
required:
|
||||
- status
|
||||
- total
|
||||
- prewarmed
|
||||
- skipped
|
||||
properties:
|
||||
status:
|
||||
description: LFC prewarm status
|
||||
enum: [not_prewarmed, prewarming, completed, failed, skipped]
|
||||
type: string
|
||||
error:
|
||||
description: LFC prewarm error, if any
|
||||
type: string
|
||||
total:
|
||||
description: Total pages processed
|
||||
type: integer
|
||||
prewarmed:
|
||||
description: Total pages prewarmed
|
||||
type: integer
|
||||
skipped:
|
||||
description: Pages processed but not prewarmed
|
||||
type: integer
|
||||
|
||||
LfcOffloadState:
|
||||
type: object
|
||||
required:
|
||||
- status
|
||||
properties:
|
||||
status:
|
||||
description: LFC offload status
|
||||
enum: [not_offloaded, offloading, completed, failed]
|
||||
type: string
|
||||
error:
|
||||
description: LFC offload error, if any
|
||||
type: string
|
||||
|
||||
PromoteState:
|
||||
type: object
|
||||
required:
|
||||
- status
|
||||
properties:
|
||||
status:
|
||||
description: Promote result
|
||||
enum: [not_promoted, completed, failed]
|
||||
type: string
|
||||
error:
|
||||
description: Promote error, if any
|
||||
type: string
|
||||
|
||||
SetRoleGrantsRequest:
|
||||
type: object
|
||||
@@ -544,6 +682,17 @@ components:
|
||||
description: Role name.
|
||||
example: "neon"
|
||||
|
||||
TerminateResponse:
|
||||
type: object
|
||||
required:
|
||||
- lsn
|
||||
properties:
|
||||
lsn:
|
||||
type: string
|
||||
nullable: true
|
||||
description: "last WAL flush LSN"
|
||||
example: "0/028F10D8"
|
||||
|
||||
SetRoleGrantsResponse:
|
||||
type: object
|
||||
required:
|
||||
|
||||
@@ -14,6 +14,7 @@ pub(in crate::http) mod insights;
|
||||
pub(in crate::http) mod lfc;
|
||||
pub(in crate::http) mod metrics;
|
||||
pub(in crate::http) mod metrics_json;
|
||||
pub(in crate::http) mod promote;
|
||||
pub(in crate::http) mod status;
|
||||
pub(in crate::http) mod terminate;
|
||||
|
||||
|
||||
14
compute_tools/src/http/routes/promote.rs
Normal file
14
compute_tools/src/http/routes/promote.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use crate::http::JsonResponse;
|
||||
use axum::Form;
|
||||
use http::StatusCode;
|
||||
|
||||
pub(in crate::http) async fn promote(
|
||||
compute: axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>,
|
||||
Form(safekeepers_lsn): Form<compute_api::responses::SafekeepersLsn>,
|
||||
) -> axum::response::Response {
|
||||
let state = compute.promote(safekeepers_lsn).await;
|
||||
if let compute_api::responses::PromoteState::Failed { error } = state {
|
||||
return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, error);
|
||||
}
|
||||
JsonResponse::success(StatusCode::OK, state)
|
||||
}
|
||||
@@ -3,7 +3,7 @@ use crate::http::JsonResponse;
|
||||
use axum::extract::State;
|
||||
use axum::response::Response;
|
||||
use axum_extra::extract::OptionalQuery;
|
||||
use compute_api::responses::{ComputeStatus, TerminateResponse};
|
||||
use compute_api::responses::{ComputeStatus, TerminateMode, TerminateResponse};
|
||||
use http::StatusCode;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
@@ -12,7 +12,7 @@ use tracing::info;
|
||||
|
||||
#[derive(Deserialize, Default)]
|
||||
pub struct TerminateQuery {
|
||||
mode: compute_api::responses::TerminateMode,
|
||||
mode: TerminateMode,
|
||||
}
|
||||
|
||||
/// Terminate the compute.
|
||||
@@ -24,16 +24,16 @@ pub(in crate::http) async fn terminate(
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status == ComputeStatus::Terminated {
|
||||
return JsonResponse::success(StatusCode::CREATED, state.terminate_flush_lsn);
|
||||
let response = TerminateResponse {
|
||||
lsn: state.terminate_flush_lsn,
|
||||
};
|
||||
return JsonResponse::success(StatusCode::CREATED, response);
|
||||
}
|
||||
|
||||
if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
|
||||
return JsonResponse::invalid_status(state.status);
|
||||
}
|
||||
state.set_status(
|
||||
ComputeStatus::TerminationPending { mode },
|
||||
&compute.state_changed,
|
||||
);
|
||||
state.set_status(mode.into(), &compute.state_changed);
|
||||
}
|
||||
|
||||
forward_termination_signal(false);
|
||||
|
||||
@@ -23,7 +23,7 @@ use super::{
|
||||
middleware::authorize::Authorize,
|
||||
routes::{
|
||||
check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
|
||||
grants, insights, lfc, metrics, metrics_json, status, terminate,
|
||||
grants, insights, lfc, metrics, metrics_json, promote, status, terminate,
|
||||
},
|
||||
};
|
||||
use crate::compute::ComputeNode;
|
||||
@@ -87,6 +87,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
|
||||
let authenticated_router = Router::<Arc<ComputeNode>>::new()
|
||||
.route("/lfc/prewarm", get(lfc::prewarm_state).post(lfc::prewarm))
|
||||
.route("/lfc/offload", get(lfc::offload_state).post(lfc::offload))
|
||||
.route("/promote", post(promote::promote))
|
||||
.route("/check_writability", post(check_writability::is_writable))
|
||||
.route("/configure", post(configure::configure))
|
||||
.route("/database_schema", get(database_schema::get_schema_dump))
|
||||
|
||||
@@ -2,6 +2,7 @@ use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||
use tokio_postgres::error::Error as PostgresError;
|
||||
use tokio_postgres::{Client, Config, NoTls};
|
||||
|
||||
use crate::metrics::INSTALLED_EXTENSIONS;
|
||||
@@ -10,7 +11,7 @@ use crate::metrics::INSTALLED_EXTENSIONS;
|
||||
/// and to make database listing query here more explicit.
|
||||
///
|
||||
/// Limit the number of databases to 500 to avoid excessive load.
|
||||
async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
||||
async fn list_dbs(client: &mut Client) -> Result<Vec<String>, PostgresError> {
|
||||
// `pg_database.datconnlimit = -2` means that the database is in the
|
||||
// invalid state
|
||||
let databases = client
|
||||
@@ -37,7 +38,9 @@ async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
||||
/// Same extension can be installed in multiple databases with different versions,
|
||||
/// so we report a separate metric (number of databases where it is installed)
|
||||
/// for each extension version.
|
||||
pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExtensions> {
|
||||
pub async fn get_installed_extensions(
|
||||
mut conf: Config,
|
||||
) -> Result<InstalledExtensions, PostgresError> {
|
||||
conf.application_name("compute_ctl:get_installed_extensions");
|
||||
let databases: Vec<String> = {
|
||||
let (mut client, connection) = conf.connect(NoTls).await?;
|
||||
|
||||
@@ -12,6 +12,7 @@ pub mod logger;
|
||||
pub mod catalog;
|
||||
pub mod compute;
|
||||
pub mod compute_prewarm;
|
||||
pub mod compute_promote;
|
||||
pub mod disk_quota;
|
||||
pub mod extension_server;
|
||||
pub mod installed_extensions;
|
||||
|
||||
@@ -4,7 +4,8 @@ use std::thread;
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use anyhow::{Result, bail};
|
||||
use compute_api::spec::{ComputeMode, PageserverConnectionInfo};
|
||||
use compute_api::spec::{ComputeMode, PageserverProtocol};
|
||||
use itertools::Itertools as _;
|
||||
use pageserver_page_api as page_api;
|
||||
use postgres::{NoTls, SimpleQueryMessage};
|
||||
use tracing::{info, warn};
|
||||
@@ -77,16 +78,17 @@ fn acquire_lsn_lease_with_retry(
|
||||
|
||||
loop {
|
||||
// Note: List of pageservers is dynamic, need to re-read configs before each attempt.
|
||||
let (conninfo, auth) = {
|
||||
let (connstrings, auth) = {
|
||||
let state = compute.state.lock().unwrap();
|
||||
let spec = state.pspec.as_ref().expect("spec must be set");
|
||||
(
|
||||
spec.pageserver_conninfo.clone(),
|
||||
spec.pageserver_connstr.clone(),
|
||||
spec.storage_auth_token.clone(),
|
||||
)
|
||||
};
|
||||
|
||||
let result = try_acquire_lsn_lease(conninfo, auth.as_deref(), tenant_id, timeline_id, lsn);
|
||||
let result =
|
||||
try_acquire_lsn_lease(&connstrings, auth.as_deref(), tenant_id, timeline_id, lsn);
|
||||
match result {
|
||||
Ok(Some(res)) => {
|
||||
return Ok(res);
|
||||
@@ -110,16 +112,17 @@ fn acquire_lsn_lease_with_retry(
|
||||
|
||||
/// Tries to acquire LSN leases on all Pageserver shards.
|
||||
fn try_acquire_lsn_lease(
|
||||
conninfo: PageserverConnectionInfo,
|
||||
connstrings: &str,
|
||||
auth: Option<&str>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<Option<SystemTime>> {
|
||||
let shard_count = conninfo.shards.len();
|
||||
let connstrings = connstrings.split(',').collect_vec();
|
||||
let shard_count = connstrings.len();
|
||||
let mut leases = Vec::new();
|
||||
|
||||
for (shard_number, shard) in conninfo.shards.into_iter() {
|
||||
for (shard_number, &connstring) in connstrings.iter().enumerate() {
|
||||
let tenant_shard_id = match shard_count {
|
||||
0 | 1 => TenantShardId::unsharded(tenant_id),
|
||||
shard_count => TenantShardId {
|
||||
@@ -129,22 +132,13 @@ fn try_acquire_lsn_lease(
|
||||
},
|
||||
};
|
||||
|
||||
let lease = if conninfo.prefer_grpc {
|
||||
acquire_lsn_lease_grpc(
|
||||
&shard.grpc_url.unwrap(),
|
||||
auth,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
)?
|
||||
} else {
|
||||
acquire_lsn_lease_libpq(
|
||||
&shard.libpq_url.unwrap(),
|
||||
auth,
|
||||
tenant_shard_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
)?
|
||||
let lease = match PageserverProtocol::from_connstring(connstring)? {
|
||||
PageserverProtocol::Libpq => {
|
||||
acquire_lsn_lease_libpq(connstring, auth, tenant_shard_id, timeline_id, lsn)?
|
||||
}
|
||||
PageserverProtocol::Grpc => {
|
||||
acquire_lsn_lease_grpc(connstring, auth, tenant_shard_id, timeline_id, lsn)?
|
||||
}
|
||||
};
|
||||
leases.push(lease);
|
||||
}
|
||||
|
||||
@@ -105,6 +105,14 @@ pub(crate) static LFC_PREWARMS: Lazy<IntCounter> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static LFC_PREWARM_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"compute_ctl_lfc_prewarm_errors_total",
|
||||
"Total number of LFC prewarm errors",
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"compute_ctl_lfc_offloads_total",
|
||||
@@ -113,6 +121,14 @@ pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static LFC_OFFLOAD_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
|
||||
register_int_counter!(
|
||||
"compute_ctl_lfc_offload_errors_total",
|
||||
"Total number of LFC offload errors",
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub fn collect() -> Vec<MetricFamily> {
|
||||
let mut metrics = COMPUTE_CTL_UP.collect();
|
||||
metrics.extend(INSTALLED_EXTENSIONS.collect());
|
||||
@@ -123,6 +139,8 @@ pub fn collect() -> Vec<MetricFamily> {
|
||||
metrics.extend(PG_CURR_DOWNTIME_MS.collect());
|
||||
metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
|
||||
metrics.extend(LFC_PREWARMS.collect());
|
||||
metrics.extend(LFC_PREWARM_ERRORS.collect());
|
||||
metrics.extend(LFC_OFFLOADS.collect());
|
||||
metrics.extend(LFC_OFFLOAD_ERRORS.collect());
|
||||
metrics
|
||||
}
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
ALTER ROLE {privileged_role_name} BYPASSRLS;
|
||||
@@ -1 +0,0 @@
|
||||
ALTER ROLE neon_superuser BYPASSRLS;
|
||||
@@ -1,8 +1,21 @@
|
||||
-- On December 8th, 2023, an engineering escalation (INC-110) was opened after
|
||||
-- it was found that BYPASSRLS was being applied to all roles.
|
||||
--
|
||||
-- PR that introduced the issue: https://github.com/neondatabase/neon/pull/5657
|
||||
-- Subsequent commit on main: https://github.com/neondatabase/neon/commit/ad99fa5f0393e2679e5323df653c508ffa0ac072
|
||||
--
|
||||
-- NOBYPASSRLS and INHERIT are the defaults for a Postgres role, but because it
|
||||
-- isn't easy to know if a Postgres cluster is affected by the issue, we need to
|
||||
-- keep the migration around for a long time, if not indefinitely, so any
|
||||
-- cluster can be fixed.
|
||||
--
|
||||
-- Branching is the gift that keeps on giving...
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
role_name text;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, '{privileged_role_name}', 'member')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
|
||||
@@ -10,7 +23,7 @@ BEGIN
|
||||
|
||||
FOR role_name IN SELECT rolname FROM pg_roles
|
||||
WHERE
|
||||
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||
NOT pg_has_role(rolname, '{privileged_role_name}', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
|
||||
EXECUTE 'GRANT pg_create_subscription TO {privileged_role_name}';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -1 +0,0 @@
|
||||
GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;
|
||||
@@ -0,0 +1 @@
|
||||
GRANT pg_monitor TO {privileged_role_name} WITH ADMIN OPTION;
|
||||
@@ -1,4 +1,4 @@
|
||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||
-- interacted with by neon_superuser without permission issues.
|
||||
-- interacted with by {privileged_role_name} without permission issues.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name};
|
||||
@@ -1,4 +1,4 @@
|
||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||
-- interacted with by neon_superuser without permission issues.
|
||||
-- interacted with by {privileged_role_name} without permission issues.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name};
|
||||
@@ -1,3 +1,3 @@
|
||||
-- SKIP: Moved inline to the handle_grants() functions.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name} WITH GRANT OPTION;
|
||||
@@ -1,3 +1,3 @@
|
||||
-- SKIP: Moved inline to the handle_grants() functions.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name} WITH GRANT OPTION;
|
||||
@@ -1,7 +1,7 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
|
||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
|
||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO {privileged_role_name}';
|
||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO {privileged_role_name}';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -1 +0,0 @@
|
||||
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;
|
||||
@@ -0,0 +1 @@
|
||||
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO {privileged_role_name};
|
||||
@@ -0,0 +1 @@
|
||||
GRANT pg_signal_backend TO {privileged_role_name} WITH ADMIN OPTION;
|
||||
@@ -7,13 +7,17 @@ BEGIN
|
||||
INTO monitor
|
||||
FROM pg_auth_members
|
||||
WHERE roleid = 'pg_monitor'::regrole
|
||||
AND member = 'pg_monitor'::regrole;
|
||||
AND member = 'neon_superuser'::regrole;
|
||||
|
||||
IF NOT monitor.member THEN
|
||||
IF monitor IS NULL THEN
|
||||
RAISE EXCEPTION 'no entry in pg_auth_members for neon_superuser and pg_monitor';
|
||||
END IF;
|
||||
|
||||
IF monitor.admin IS NULL OR NOT monitor.member THEN
|
||||
RAISE EXCEPTION 'neon_superuser is not a member of pg_monitor';
|
||||
END IF;
|
||||
|
||||
IF NOT monitor.admin THEN
|
||||
IF monitor.admin IS NULL OR NOT monitor.admin THEN
|
||||
RAISE EXCEPTION 'neon_superuser cannot grant pg_monitor';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,23 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
signal_backend record;
|
||||
BEGIN
|
||||
SELECT pg_has_role('neon_superuser', 'pg_signal_backend', 'member') AS member,
|
||||
admin_option AS admin
|
||||
INTO signal_backend
|
||||
FROM pg_auth_members
|
||||
WHERE roleid = 'pg_signal_backend'::regrole
|
||||
AND member = 'neon_superuser'::regrole;
|
||||
|
||||
IF signal_backend IS NULL THEN
|
||||
RAISE EXCEPTION 'no entry in pg_auth_members for neon_superuser and pg_signal_backend';
|
||||
END IF;
|
||||
|
||||
IF signal_backend.member IS NULL OR NOT signal_backend.member THEN
|
||||
RAISE EXCEPTION 'neon_superuser is not a member of pg_signal_backend';
|
||||
END IF;
|
||||
|
||||
IF signal_backend.admin IS NULL OR NOT signal_backend.admin THEN
|
||||
RAISE EXCEPTION 'neon_superuser cannot grant pg_signal_backend';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -84,7 +84,8 @@ impl ComputeMonitor {
|
||||
if matches!(
|
||||
compute_status,
|
||||
ComputeStatus::Terminated
|
||||
| ComputeStatus::TerminationPending { .. }
|
||||
| ComputeStatus::TerminationPendingFast
|
||||
| ComputeStatus::TerminationPendingImmediate
|
||||
| ComputeStatus::Failed
|
||||
) {
|
||||
info!(
|
||||
|
||||
@@ -9,6 +9,7 @@ use reqwest::StatusCode;
|
||||
use tokio_postgres::Client;
|
||||
use tracing::{error, info, instrument};
|
||||
|
||||
use crate::compute::ComputeNodeParams;
|
||||
use crate::config;
|
||||
use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
|
||||
use crate::migration::MigrationRunner;
|
||||
@@ -169,7 +170,7 @@ pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
pub async fn handle_migrations(client: &mut Client) -> Result<()> {
|
||||
pub async fn handle_migrations(params: ComputeNodeParams, client: &mut Client) -> Result<()> {
|
||||
info!("handle migrations");
|
||||
|
||||
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
@@ -178,24 +179,58 @@ pub async fn handle_migrations(client: &mut Client) -> Result<()> {
|
||||
|
||||
// Add new migrations in numerical order.
|
||||
let migrations = [
|
||||
include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
|
||||
include_str!("./migrations/0002-alter_roles.sql"),
|
||||
include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
|
||||
include_str!(
|
||||
"./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
|
||||
&format!(
|
||||
include_str!("./migrations/0001-add_bypass_rls_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
include_str!(
|
||||
"./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
|
||||
&format!(
|
||||
include_str!("./migrations/0002-alter_roles.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0003-grant_pg_create_subscription_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0004-grant_pg_monitor_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0005-grant_all_on_tables_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0006-grant_all_on_sequences_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!(
|
||||
"./migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql"
|
||||
),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!(
|
||||
"./migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql"
|
||||
),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
|
||||
include_str!(
|
||||
"./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
|
||||
&format!(
|
||||
include_str!(
|
||||
"./migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql"
|
||||
),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
include_str!(
|
||||
"./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
|
||||
&format!(
|
||||
include_str!(
|
||||
"./migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql"
|
||||
),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
&format!(
|
||||
include_str!("./migrations/0012-grant_pg_signal_backend_to_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
];
|
||||
|
||||
|
||||
@@ -13,14 +13,14 @@ use tokio_postgres::Client;
|
||||
use tokio_postgres::error::SqlState;
|
||||
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
|
||||
|
||||
use crate::compute::{ComputeNode, ComputeState};
|
||||
use crate::compute::{ComputeNode, ComputeNodeParams, ComputeState};
|
||||
use crate::pg_helpers::{
|
||||
DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async,
|
||||
get_existing_roles_async,
|
||||
};
|
||||
use crate::spec_apply::ApplySpecPhase::{
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateNeonSuperuser,
|
||||
CreatePgauditExtension, CreatePgauditlogtofileExtension, CreateSchemaNeon,
|
||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreatePgauditExtension,
|
||||
CreatePgauditlogtofileExtension, CreatePrivilegedRole, CreateSchemaNeon,
|
||||
DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
|
||||
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
|
||||
RunInEachDatabase,
|
||||
@@ -49,6 +49,7 @@ impl ComputeNode {
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
let client = Self::get_maintenance_client(&conf).await?;
|
||||
let spec = spec.clone();
|
||||
let params = Arc::new(self.params.clone());
|
||||
|
||||
let databases = get_existing_dbs_async(&client).await?;
|
||||
let roles = get_existing_roles_async(&client)
|
||||
@@ -157,6 +158,7 @@ impl ComputeNode {
|
||||
|
||||
let conf = Arc::new(conf);
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
params.clone(),
|
||||
spec.clone(),
|
||||
conf,
|
||||
ctx.clone(),
|
||||
@@ -185,7 +187,7 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
for phase in [
|
||||
CreateNeonSuperuser,
|
||||
CreatePrivilegedRole,
|
||||
DropInvalidDatabases,
|
||||
RenameRoles,
|
||||
CreateAndAlterRoles,
|
||||
@@ -195,6 +197,7 @@ impl ComputeNode {
|
||||
] {
|
||||
info!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
params.clone(),
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
@@ -243,6 +246,7 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
let fut = Self::apply_spec_sql_db(
|
||||
params.clone(),
|
||||
spec.clone(),
|
||||
conf,
|
||||
ctx.clone(),
|
||||
@@ -293,6 +297,7 @@ impl ComputeNode {
|
||||
for phase in phases {
|
||||
debug!("Applying phase {:?}", &phase);
|
||||
apply_operations(
|
||||
params.clone(),
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
@@ -313,7 +318,9 @@ impl ComputeNode {
|
||||
/// May opt to not connect to databases that don't have any scheduled
|
||||
/// operations. The function is concurrency-controlled with the provided
|
||||
/// semaphore. The caller has to make sure the semaphore isn't exhausted.
|
||||
#[allow(clippy::too_many_arguments)] // TODO: needs bigger refactoring
|
||||
async fn apply_spec_sql_db(
|
||||
params: Arc<ComputeNodeParams>,
|
||||
spec: Arc<ComputeSpec>,
|
||||
conf: Arc<tokio_postgres::Config>,
|
||||
ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
|
||||
@@ -328,6 +335,7 @@ impl ComputeNode {
|
||||
|
||||
for subphase in subphases {
|
||||
apply_operations(
|
||||
params.clone(),
|
||||
spec.clone(),
|
||||
ctx.clone(),
|
||||
jwks_roles.clone(),
|
||||
@@ -467,7 +475,7 @@ pub enum PerDatabasePhase {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ApplySpecPhase {
|
||||
CreateNeonSuperuser,
|
||||
CreatePrivilegedRole,
|
||||
DropInvalidDatabases,
|
||||
RenameRoles,
|
||||
CreateAndAlterRoles,
|
||||
@@ -510,6 +518,7 @@ pub struct MutableApplyContext {
|
||||
/// - No timeouts have (yet) been implemented.
|
||||
/// - The caller is responsible for limiting and/or applying concurrency.
|
||||
pub async fn apply_operations<'a, Fut, F>(
|
||||
params: Arc<ComputeNodeParams>,
|
||||
spec: Arc<ComputeSpec>,
|
||||
ctx: Arc<RwLock<MutableApplyContext>>,
|
||||
jwks_roles: Arc<HashSet<String>>,
|
||||
@@ -527,7 +536,7 @@ where
|
||||
debug!("Processing phase {:?}", &apply_spec_phase);
|
||||
let ctx = ctx;
|
||||
|
||||
let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase)
|
||||
let mut ops = get_operations(¶ms, &spec, &ctx, &jwks_roles, &apply_spec_phase)
|
||||
.await?
|
||||
.peekable();
|
||||
|
||||
@@ -588,14 +597,18 @@ where
|
||||
/// sort/merge/batch execution, but for now this is a nice way to improve
|
||||
/// batching behavior of the commands.
|
||||
async fn get_operations<'a>(
|
||||
params: &'a ComputeNodeParams,
|
||||
spec: &'a ComputeSpec,
|
||||
ctx: &'a RwLock<MutableApplyContext>,
|
||||
jwks_roles: &'a HashSet<String>,
|
||||
apply_spec_phase: &'a ApplySpecPhase,
|
||||
) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
|
||||
match apply_spec_phase {
|
||||
ApplySpecPhase::CreateNeonSuperuser => Ok(Box::new(once(Operation {
|
||||
query: include_str!("sql/create_neon_superuser.sql").to_string(),
|
||||
ApplySpecPhase::CreatePrivilegedRole => Ok(Box::new(once(Operation {
|
||||
query: format!(
|
||||
include_str!("sql/create_privileged_role.sql"),
|
||||
privileged_role_name = params.privileged_role_name
|
||||
),
|
||||
comment: None,
|
||||
}))),
|
||||
ApplySpecPhase::DropInvalidDatabases => {
|
||||
@@ -697,8 +710,9 @@ async fn get_operations<'a>(
|
||||
None => {
|
||||
let query = if !jwks_roles.contains(role.name.as_str()) {
|
||||
format!(
|
||||
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}",
|
||||
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE {} {}",
|
||||
role.name.pg_quote(),
|
||||
params.privileged_role_name,
|
||||
role.to_pg_options(),
|
||||
)
|
||||
} else {
|
||||
@@ -849,8 +863,9 @@ async fn get_operations<'a>(
|
||||
// ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database
|
||||
// (see https://www.postgresql.org/docs/current/ddl-priv.html)
|
||||
query: format!(
|
||||
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
|
||||
db.name.pg_quote()
|
||||
"GRANT ALL PRIVILEGES ON DATABASE {} TO {}",
|
||||
db.name.pg_quote(),
|
||||
params.privileged_role_name
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
|
||||
THEN
|
||||
CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
8
compute_tools/src/sql/create_privileged_role.sql
Normal file
8
compute_tools/src/sql/create_privileged_role.sql
Normal file
@@ -0,0 +1,8 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{privileged_role_name}')
|
||||
THEN
|
||||
CREATE ROLE {privileged_role_name} CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
@@ -8,10 +8,10 @@ code changes locally, but not suitable for running production systems.
|
||||
|
||||
## Example: Start with Postgres 16
|
||||
|
||||
To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 3 of the start-up commands.
|
||||
To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 2 of the start-up commands.
|
||||
|
||||
```shell
|
||||
cargo neon init --pg-version 16
|
||||
cargo neon init
|
||||
cargo neon start
|
||||
cargo neon tenant create --set-default --pg-version 16
|
||||
cargo neon endpoint create main --pg-version 16
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::time::Duration;
|
||||
use anyhow::{Context, Result, anyhow, bail};
|
||||
use clap::Parser;
|
||||
use compute_api::requests::ComputeClaimsScope;
|
||||
use compute_api::spec::{ComputeMode, PageserverConnectionInfo, PageserverShardConnectionInfo};
|
||||
use compute_api::spec::{ComputeMode, PageserverProtocol};
|
||||
use control_plane::broker::StorageBroker;
|
||||
use control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode};
|
||||
use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
|
||||
@@ -631,6 +631,10 @@ struct EndpointCreateCmdArgs {
|
||||
help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
|
||||
)]
|
||||
allow_multiple: bool,
|
||||
|
||||
/// Only allow changing it on creation
|
||||
#[clap(long, help = "Name of the privileged role for the endpoint")]
|
||||
privileged_role_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
@@ -1480,6 +1484,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
args.grpc,
|
||||
!args.update_catalog,
|
||||
false,
|
||||
args.privileged_role_name.clone(),
|
||||
)?;
|
||||
}
|
||||
EndpointCmd::Start(args) => {
|
||||
@@ -1516,35 +1521,29 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
)?;
|
||||
}
|
||||
|
||||
let (shards, stripe_size) = if let Some(ps_id) = pageserver_id {
|
||||
let conf = env.get_pageserver_conf(ps_id).unwrap();
|
||||
let libpq_url = Some({
|
||||
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
||||
let port = port.unwrap_or(5432);
|
||||
format!("postgres://no_user@{host}:{port}")
|
||||
});
|
||||
let grpc_url = if let Some(grpc_addr) = &conf.listen_grpc_addr {
|
||||
let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
|
||||
let conf = env.get_pageserver_conf(pageserver_id).unwrap();
|
||||
// Use gRPC if requested.
|
||||
let pageserver = if endpoint.grpc {
|
||||
let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
|
||||
let (host, port) = parse_host_port(grpc_addr)?;
|
||||
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
|
||||
Some(format!("grpc://no_user@{host}:{port}"))
|
||||
(PageserverProtocol::Grpc, host, port)
|
||||
} else {
|
||||
None
|
||||
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
||||
let port = port.unwrap_or(5432);
|
||||
(PageserverProtocol::Libpq, host, port)
|
||||
};
|
||||
let pageserver = PageserverShardConnectionInfo {
|
||||
libpq_url,
|
||||
grpc_url,
|
||||
};
|
||||
|
||||
// If caller is telling us what pageserver to use, this is not a tenant which is
|
||||
// fully managed by storage controller, therefore not sharded.
|
||||
(vec![(0, pageserver)], DEFAULT_STRIPE_SIZE)
|
||||
(vec![pageserver], DEFAULT_STRIPE_SIZE)
|
||||
} else {
|
||||
// Look up the currently attached location of the tenant, and its striping metadata,
|
||||
// to pass these on to postgres.
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
|
||||
let shards = futures::future::try_join_all(locate_result.shards.into_iter().map(
|
||||
|shard| async move {
|
||||
let pageservers = futures::future::try_join_all(
|
||||
locate_result.shards.into_iter().map(|shard| async move {
|
||||
if let ComputeMode::Static(lsn) = endpoint.mode {
|
||||
// Initialize LSN leases for static computes.
|
||||
let conf = env.get_pageserver_conf(shard.node_id).unwrap();
|
||||
@@ -1556,34 +1555,28 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.await?;
|
||||
}
|
||||
|
||||
let libpq_host = Host::parse(&shard.listen_pg_addr)?;
|
||||
let libpq_port = shard.listen_pg_port;
|
||||
let libpq_url =
|
||||
Some(format!("postgres://no_user@{libpq_host}:{libpq_port}"));
|
||||
|
||||
let grpc_url = if let Some(grpc_host) = shard.listen_grpc_addr {
|
||||
let grpc_port = shard.listen_grpc_port.expect("no gRPC port");
|
||||
Some(format!("grpc://no_user@{grpc_host}:{grpc_port}"))
|
||||
let pageserver = if endpoint.grpc {
|
||||
(
|
||||
PageserverProtocol::Grpc,
|
||||
Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))?,
|
||||
shard.listen_grpc_port.expect("no gRPC port"),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
(
|
||||
PageserverProtocol::Libpq,
|
||||
Host::parse(&shard.listen_pg_addr)?,
|
||||
shard.listen_pg_port,
|
||||
)
|
||||
};
|
||||
let pageserver = PageserverShardConnectionInfo {
|
||||
libpq_url,
|
||||
grpc_url,
|
||||
};
|
||||
anyhow::Ok((shard.shard_id.shard_number.0 as u32, pageserver))
|
||||
},
|
||||
))
|
||||
anyhow::Ok(pageserver)
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let stripe_size = locate_result.shard_params.stripe_size;
|
||||
|
||||
(shards, stripe_size)
|
||||
};
|
||||
assert!(!shards.is_empty());
|
||||
let pageserver_conninfo = PageserverConnectionInfo {
|
||||
shards: shards.into_iter().collect(),
|
||||
prefer_grpc: endpoint.grpc,
|
||||
(pageservers, stripe_size)
|
||||
};
|
||||
assert!(!pageservers.is_empty());
|
||||
|
||||
let ps_conf = env.get_pageserver_conf(DEFAULT_PAGESERVER_ID)?;
|
||||
let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {
|
||||
@@ -1613,7 +1606,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
endpoint_storage_addr,
|
||||
safekeepers_generation,
|
||||
safekeepers,
|
||||
pageserver_conninfo,
|
||||
pageservers,
|
||||
remote_ext_base_url: remote_ext_base_url.clone(),
|
||||
shard_stripe_size: stripe_size.0 as usize,
|
||||
create_test_user: args.create_test_user,
|
||||
@@ -1632,27 +1625,20 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.endpoints
|
||||
.get(endpoint_id.as_str())
|
||||
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||
let shards = if let Some(ps_id) = args.endpoint_pageserver_id {
|
||||
let pageservers = if let Some(ps_id) = args.endpoint_pageserver_id {
|
||||
let conf = env.get_pageserver_conf(ps_id)?;
|
||||
let libpq_url = Some({
|
||||
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
||||
let port = port.unwrap_or(5432);
|
||||
format!("postgres://no_user@{host}:{port}")
|
||||
});
|
||||
let grpc_url = if let Some(grpc_addr) = &conf.listen_grpc_addr {
|
||||
// Use gRPC if requested.
|
||||
let pageserver = if endpoint.grpc {
|
||||
let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
|
||||
let (host, port) = parse_host_port(grpc_addr)?;
|
||||
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
|
||||
Some(format!("grpc://no_user@{host}:{port}"))
|
||||
(PageserverProtocol::Grpc, host, port)
|
||||
} else {
|
||||
None
|
||||
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
||||
let port = port.unwrap_or(5432);
|
||||
(PageserverProtocol::Libpq, host, port)
|
||||
};
|
||||
let pageserver = PageserverShardConnectionInfo {
|
||||
libpq_url,
|
||||
grpc_url,
|
||||
};
|
||||
// If caller is telling us what pageserver to use, this is not a tenant which is
|
||||
// fully managed by storage controller, therefore not sharded.
|
||||
vec![(0, pageserver)]
|
||||
vec![pageserver]
|
||||
} else {
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
storage_controller
|
||||
@@ -1662,36 +1648,28 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.into_iter()
|
||||
.map(|shard| {
|
||||
// Use gRPC if requested.
|
||||
let libpq_host = Host::parse(&shard.listen_pg_addr).expect("bad hostname");
|
||||
let libpq_port = shard.listen_pg_port;
|
||||
let libpq_url =
|
||||
Some(format!("postgres://no_user@{libpq_host}:{libpq_port}"));
|
||||
|
||||
let grpc_url = if let Some(grpc_host) = shard.listen_grpc_addr {
|
||||
let grpc_port = shard.listen_grpc_port.expect("no gRPC port");
|
||||
Some(format!("grpc://no_user@{grpc_host}:{grpc_port}"))
|
||||
if endpoint.grpc {
|
||||
(
|
||||
PageserverProtocol::Grpc,
|
||||
Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))
|
||||
.expect("bad hostname"),
|
||||
shard.listen_grpc_port.expect("no gRPC port"),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
(
|
||||
shard.shard_id.shard_number.0 as u32,
|
||||
PageserverShardConnectionInfo {
|
||||
libpq_url,
|
||||
grpc_url,
|
||||
},
|
||||
)
|
||||
(
|
||||
PageserverProtocol::Libpq,
|
||||
Host::parse(&shard.listen_pg_addr).expect("bad hostname"),
|
||||
shard.listen_pg_port,
|
||||
)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
let pageserver_conninfo = PageserverConnectionInfo {
|
||||
shards: shards.into_iter().collect(),
|
||||
prefer_grpc: endpoint.grpc,
|
||||
};
|
||||
// If --safekeepers argument is given, use only the listed
|
||||
// safekeeper nodes; otherwise all from the env.
|
||||
let safekeepers = parse_safekeepers(&args.safekeepers)?;
|
||||
endpoint
|
||||
.reconfigure(Some(pageserver_conninfo), None, safekeepers, None)
|
||||
.reconfigure(Some(pageservers), None, safekeepers, None)
|
||||
.await?;
|
||||
}
|
||||
EndpointCmd::Stop(args) => {
|
||||
|
||||
@@ -36,7 +36,7 @@ impl StorageBroker {
|
||||
pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
||||
let broker = &self.env.broker;
|
||||
|
||||
print!("Starting neon broker at {}", broker.client_url());
|
||||
println!("Starting neon broker at {}", broker.client_url());
|
||||
|
||||
let mut args = Vec::new();
|
||||
|
||||
|
||||
@@ -32,7 +32,8 @@
|
||||
//! config.json - passed to `compute_ctl`
|
||||
//! pgdata/
|
||||
//! postgresql.conf - copy of postgresql.conf created by `compute_ctl`
|
||||
//! zenith.signal
|
||||
//! neon.signal
|
||||
//! zenith.signal - copy of neon.signal, for backward compatibility
|
||||
//! <other PostgreSQL files>
|
||||
//! ```
|
||||
//!
|
||||
@@ -56,13 +57,9 @@ use compute_api::responses::{
|
||||
TlsConfig,
|
||||
};
|
||||
use compute_api::spec::{
|
||||
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
|
||||
RemoteExtSpec, Role,
|
||||
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PageserverProtocol,
|
||||
PgIdent, RemoteExtSpec, Role,
|
||||
};
|
||||
|
||||
// re-export these, because they're used in the reconfigure() function
|
||||
pub use compute_api::spec::{PageserverConnectionInfo, PageserverShardConnectionInfo};
|
||||
|
||||
use jsonwebtoken::jwk::{
|
||||
AlgorithmParameters, CommonParameters, EllipticCurve, Jwk, JwkSet, KeyAlgorithm, KeyOperations,
|
||||
OctetKeyPairParameters, OctetKeyPairType, PublicKeyUse,
|
||||
@@ -78,6 +75,7 @@ use sha2::{Digest, Sha256};
|
||||
use spki::der::Decode;
|
||||
use spki::{SubjectPublicKeyInfo, SubjectPublicKeyInfoRef};
|
||||
use tracing::debug;
|
||||
use url::Host;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
@@ -101,6 +99,7 @@ pub struct EndpointConf {
|
||||
features: Vec<ComputeFeature>,
|
||||
cluster: Option<Cluster>,
|
||||
compute_ctl_config: ComputeCtlConfig,
|
||||
privileged_role_name: Option<String>,
|
||||
}
|
||||
|
||||
//
|
||||
@@ -201,6 +200,7 @@ impl ComputeControlPlane {
|
||||
grpc: bool,
|
||||
skip_pg_catalog_updates: bool,
|
||||
drop_subscriptions_before_start: bool,
|
||||
privileged_role_name: Option<String>,
|
||||
) -> Result<Arc<Endpoint>> {
|
||||
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
|
||||
let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
|
||||
@@ -238,6 +238,7 @@ impl ComputeControlPlane {
|
||||
features: vec![],
|
||||
cluster: None,
|
||||
compute_ctl_config: compute_ctl_config.clone(),
|
||||
privileged_role_name: privileged_role_name.clone(),
|
||||
});
|
||||
|
||||
ep.create_endpoint_dir()?;
|
||||
@@ -259,6 +260,7 @@ impl ComputeControlPlane {
|
||||
features: vec![],
|
||||
cluster: None,
|
||||
compute_ctl_config,
|
||||
privileged_role_name,
|
||||
})?,
|
||||
)?;
|
||||
std::fs::write(
|
||||
@@ -334,6 +336,9 @@ pub struct Endpoint {
|
||||
|
||||
/// The compute_ctl config for the endpoint's compute.
|
||||
compute_ctl_config: ComputeCtlConfig,
|
||||
|
||||
/// The name of the privileged role for the endpoint.
|
||||
privileged_role_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
@@ -382,7 +387,7 @@ pub struct EndpointStartArgs {
|
||||
pub endpoint_storage_addr: String,
|
||||
pub safekeepers_generation: Option<SafekeeperGeneration>,
|
||||
pub safekeepers: Vec<NodeId>,
|
||||
pub pageserver_conninfo: PageserverConnectionInfo,
|
||||
pub pageservers: Vec<(PageserverProtocol, Host, u16)>,
|
||||
pub remote_ext_base_url: Option<String>,
|
||||
pub shard_stripe_size: usize,
|
||||
pub create_test_user: bool,
|
||||
@@ -434,6 +439,7 @@ impl Endpoint {
|
||||
features: conf.features,
|
||||
cluster: conf.cluster,
|
||||
compute_ctl_config: conf.compute_ctl_config,
|
||||
privileged_role_name: conf.privileged_role_name,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -466,7 +472,7 @@ impl Endpoint {
|
||||
conf.append("max_connections", "100");
|
||||
conf.append("wal_level", "logical");
|
||||
// wal_sender_timeout is the maximum time to wait for WAL replication.
|
||||
// It also defines how often the walreciever will send a feedback message to the wal sender.
|
||||
// It also defines how often the walreceiver will send a feedback message to the wal sender.
|
||||
conf.append("wal_sender_timeout", "5s");
|
||||
conf.append("listen_addresses", &self.pg_address.ip().to_string());
|
||||
conf.append("port", &self.pg_address.port().to_string());
|
||||
@@ -656,6 +662,14 @@ impl Endpoint {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_pageserver_connstr(pageservers: &[(PageserverProtocol, Host, u16)]) -> String {
|
||||
pageservers
|
||||
.iter()
|
||||
.map(|(scheme, host, port)| format!("{scheme}://no_user@{host}:{port}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
}
|
||||
|
||||
/// Map safekeepers ids to the actual connection strings.
|
||||
fn build_safekeepers_connstrs(&self, sk_ids: Vec<NodeId>) -> Result<Vec<String>> {
|
||||
let mut safekeeper_connstrings = Vec::new();
|
||||
@@ -701,6 +715,9 @@ impl Endpoint {
|
||||
std::fs::remove_dir_all(self.pgdata())?;
|
||||
}
|
||||
|
||||
let pageserver_connstring = Self::build_pageserver_connstr(&args.pageservers);
|
||||
assert!(!pageserver_connstring.is_empty());
|
||||
|
||||
let safekeeper_connstrings = self.build_safekeepers_connstrs(args.safekeepers)?;
|
||||
|
||||
// check for file remote_extensions_spec.json
|
||||
@@ -759,7 +776,7 @@ impl Endpoint {
|
||||
branch_id: None,
|
||||
endpoint_id: Some(self.endpoint_id.clone()),
|
||||
mode: self.mode,
|
||||
pageserver_connection_info: Some(args.pageserver_conninfo),
|
||||
pageserver_connstring: Some(pageserver_connstring),
|
||||
safekeepers_generation: args.safekeepers_generation.map(|g| g.into_inner()),
|
||||
safekeeper_connstrings,
|
||||
storage_auth_token: args.auth_token.clone(),
|
||||
@@ -861,6 +878,10 @@ impl Endpoint {
|
||||
cmd.arg("--dev");
|
||||
}
|
||||
|
||||
if let Some(privileged_role_name) = self.privileged_role_name.clone() {
|
||||
cmd.args(["--privileged-role-name", &privileged_role_name]);
|
||||
}
|
||||
|
||||
let child = cmd.spawn()?;
|
||||
// set up a scopeguard to kill & wait for the child in case we panic or bail below
|
||||
let child = scopeguard::guard(child, |mut child| {
|
||||
@@ -914,7 +935,8 @@ impl Endpoint {
|
||||
ComputeStatus::Empty
|
||||
| ComputeStatus::ConfigurationPending
|
||||
| ComputeStatus::Configuration
|
||||
| ComputeStatus::TerminationPending { .. }
|
||||
| ComputeStatus::TerminationPendingFast
|
||||
| ComputeStatus::TerminationPendingImmediate
|
||||
| ComputeStatus::Terminated => {
|
||||
bail!("unexpected compute status: {:?}", state.status)
|
||||
}
|
||||
@@ -972,7 +994,7 @@ impl Endpoint {
|
||||
|
||||
pub async fn reconfigure(
|
||||
&self,
|
||||
pageserver_conninfo: Option<PageserverConnectionInfo>,
|
||||
pageservers: Option<Vec<(PageserverProtocol, Host, u16)>>,
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
safekeepers: Option<Vec<NodeId>>,
|
||||
safekeeper_generation: Option<SafekeeperGeneration>,
|
||||
@@ -988,17 +1010,15 @@ impl Endpoint {
|
||||
let postgresql_conf = self.read_postgresql_conf()?;
|
||||
spec.cluster.postgresql_conf = Some(postgresql_conf);
|
||||
|
||||
if let Some(pageserver_conninfo) = pageserver_conninfo {
|
||||
// If pageservers are provided, we need to ensure that they are not empty.
|
||||
// This is a requirement for the compute_ctl configuration.
|
||||
anyhow::ensure!(
|
||||
!pageserver_conninfo.shards.is_empty(),
|
||||
"no pageservers provided"
|
||||
);
|
||||
spec.pageserver_connection_info = Some(pageserver_conninfo);
|
||||
}
|
||||
if stripe_size.is_some() {
|
||||
spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
|
||||
// If pageservers are not specified, don't change them.
|
||||
if let Some(pageservers) = pageservers {
|
||||
anyhow::ensure!(!pageservers.is_empty(), "no pageservers provided");
|
||||
|
||||
let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
|
||||
spec.pageserver_connstring = Some(pageserver_connstr);
|
||||
if stripe_size.is_some() {
|
||||
spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
|
||||
}
|
||||
}
|
||||
|
||||
// If safekeepers are not specified, don't change them.
|
||||
@@ -1047,7 +1067,7 @@ impl Endpoint {
|
||||
|
||||
pub async fn reconfigure_pageservers(
|
||||
&self,
|
||||
pageservers: PageserverConnectionInfo,
|
||||
pageservers: Vec<(PageserverProtocol, Host, u16)>,
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
) -> Result<()> {
|
||||
self.reconfigure(Some(pageservers), stripe_size, None, None)
|
||||
|
||||
@@ -217,6 +217,9 @@ pub struct NeonStorageControllerConf {
|
||||
pub posthog_config: Option<PostHogConfig>,
|
||||
|
||||
pub kick_secondary_downloads: Option<bool>,
|
||||
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub shard_split_request_timeout: Option<Duration>,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
@@ -250,6 +253,7 @@ impl Default for NeonStorageControllerConf {
|
||||
timeline_safekeeper_count: None,
|
||||
posthog_config: None,
|
||||
kick_secondary_downloads: None,
|
||||
shard_split_request_timeout: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -303,7 +303,7 @@ impl PageServerNode {
|
||||
async fn start_node(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
||||
// TODO: using a thread here because start_process() is not async but we need to call check_status()
|
||||
let datadir = self.repo_path();
|
||||
print!(
|
||||
println!(
|
||||
"Starting pageserver node {} at '{}' in {:?}, retrying for {:?}",
|
||||
self.conf.id,
|
||||
self.pg_connection_config.raw_address(),
|
||||
@@ -452,6 +452,12 @@ impl PageServerNode {
|
||||
.map(|x| x.parse::<usize>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_creation_threshold' as non zero integer")?,
|
||||
// HADRON
|
||||
image_layer_force_creation_period: settings
|
||||
.remove("image_layer_force_creation_period")
|
||||
.map(humantime::parse_duration)
|
||||
.transpose()
|
||||
.context("Failed to parse 'image_layer_force_creation_period' as duration")?,
|
||||
image_layer_creation_check_threshold: settings
|
||||
.remove("image_layer_creation_check_threshold")
|
||||
.map(|x| x.parse::<u8>())
|
||||
|
||||
@@ -127,7 +127,7 @@ impl SafekeeperNode {
|
||||
extra_opts: &[String],
|
||||
retry_timeout: &Duration,
|
||||
) -> anyhow::Result<()> {
|
||||
print!(
|
||||
println!(
|
||||
"Starting safekeeper at '{}' in '{}', retrying for {:?}",
|
||||
self.pg_connection_config.raw_address(),
|
||||
self.datadir_path().display(),
|
||||
|
||||
@@ -648,6 +648,13 @@ impl StorageController {
|
||||
args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
|
||||
}
|
||||
|
||||
if let Some(duration) = self.config.shard_split_request_timeout {
|
||||
args.push(format!(
|
||||
"--shard-split-request-timeout={}",
|
||||
humantime::Duration::from(duration)
|
||||
));
|
||||
}
|
||||
|
||||
let mut envs = vec![
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
@@ -660,7 +667,7 @@ impl StorageController {
|
||||
));
|
||||
}
|
||||
|
||||
println!("Starting storage controller");
|
||||
println!("Starting storage controller at {scheme}://{host}:{listen_port}");
|
||||
|
||||
background_process::start_process(
|
||||
COMMAND,
|
||||
|
||||
@@ -14,6 +14,7 @@ humantime.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
pageserver_client.workspace = true
|
||||
reqwest.workspace = true
|
||||
safekeeper_api.workspace=true
|
||||
serde_json = { workspace = true, features = ["raw_value"] }
|
||||
storage_controller_client.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -11,7 +11,7 @@ use pageserver_api::controller_api::{
|
||||
PlacementPolicy, SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest,
|
||||
ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,
|
||||
SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||
TenantShardMigrateRequest, TenantShardMigrateResponse, TimelineSafekeeperMigrateRequest,
|
||||
};
|
||||
use pageserver_api::models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, ShardParameters, TenantConfig,
|
||||
@@ -21,6 +21,7 @@ use pageserver_api::models::{
|
||||
use pageserver_api::shard::{ShardStripeSize, TenantShardId};
|
||||
use pageserver_client::mgmt_api::{self};
|
||||
use reqwest::{Certificate, Method, StatusCode, Url};
|
||||
use safekeeper_api::models::TimelineLocateResponse;
|
||||
use storage_controller_client::control_api::Client;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
|
||||
@@ -75,6 +76,12 @@ enum Command {
|
||||
NodeStartDelete {
|
||||
#[arg(long)]
|
||||
node_id: NodeId,
|
||||
/// When `force` is true, skip waiting for shards to prewarm during migration.
|
||||
/// This can significantly speed up node deletion since prewarming all shards
|
||||
/// can take considerable time, but may result in slower initial access to
|
||||
/// migrated shards until they warm up naturally.
|
||||
#[arg(long)]
|
||||
force: bool,
|
||||
},
|
||||
/// Cancel deletion of the specified pageserver and wait for `timeout`
|
||||
/// for the operation to be canceled. May be retried.
|
||||
@@ -279,6 +286,23 @@ enum Command {
|
||||
#[arg(long)]
|
||||
concurrency: Option<usize>,
|
||||
},
|
||||
/// Locate safekeepers for a timeline from the storcon DB.
|
||||
TimelineLocate {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
timeline_id: TimelineId,
|
||||
},
|
||||
/// Migrate a timeline to a new set of safekeepers
|
||||
TimelineSafekeeperMigrate {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
timeline_id: TimelineId,
|
||||
/// Example: --new-sk-set 1,2,3
|
||||
#[arg(long, required = true, value_delimiter = ',')]
|
||||
new_sk_set: Vec<NodeId>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
@@ -458,6 +482,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
listen_http_port,
|
||||
listen_https_port,
|
||||
availability_zone_id: AvailabilityZone(availability_zone_id),
|
||||
node_ip_addr: None,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
@@ -933,13 +958,14 @@ async fn main() -> anyhow::Result<()> {
|
||||
.dispatch::<(), ()>(Method::DELETE, format!("control/v1/node/{node_id}"), None)
|
||||
.await?;
|
||||
}
|
||||
Command::NodeStartDelete { node_id } => {
|
||||
Command::NodeStartDelete { node_id, force } => {
|
||||
let query = if force {
|
||||
format!("control/v1/node/{node_id}/delete?force=true")
|
||||
} else {
|
||||
format!("control/v1/node/{node_id}/delete")
|
||||
};
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(
|
||||
Method::PUT,
|
||||
format!("control/v1/node/{node_id}/delete"),
|
||||
None,
|
||||
)
|
||||
.dispatch::<(), ()>(Method::PUT, query, None)
|
||||
.await?;
|
||||
println!("Delete started for {node_id}");
|
||||
}
|
||||
@@ -1324,7 +1350,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
concurrency,
|
||||
} => {
|
||||
let mut path = format!(
|
||||
"/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
|
||||
"v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
|
||||
);
|
||||
|
||||
if let Some(c) = concurrency {
|
||||
@@ -1335,6 +1361,41 @@ async fn main() -> anyhow::Result<()> {
|
||||
.dispatch::<(), ()>(Method::POST, path, None)
|
||||
.await?;
|
||||
}
|
||||
Command::TimelineLocate {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
} => {
|
||||
let path = format!("debug/v1/tenant/{tenant_id}/timeline/{timeline_id}/locate");
|
||||
|
||||
let resp = storcon_client
|
||||
.dispatch::<(), TimelineLocateResponse>(Method::GET, path, None)
|
||||
.await?;
|
||||
|
||||
let sk_set = resp.sk_set.iter().map(|id| id.0 as i64).collect::<Vec<_>>();
|
||||
let new_sk_set = resp
|
||||
.new_sk_set
|
||||
.as_ref()
|
||||
.map(|ids| ids.iter().map(|id| id.0 as i64).collect::<Vec<_>>());
|
||||
|
||||
println!("generation = {}", resp.generation);
|
||||
println!("sk_set = {sk_set:?}");
|
||||
println!("new_sk_set = {new_sk_set:?}");
|
||||
}
|
||||
Command::TimelineSafekeeperMigrate {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
new_sk_set,
|
||||
} => {
|
||||
let path = format!("v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate");
|
||||
|
||||
storcon_client
|
||||
.dispatch::<_, ()>(
|
||||
Method::POST,
|
||||
path,
|
||||
Some(TimelineSafekeeperMigrateRequest { new_sk_set }),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -54,14 +54,16 @@ else
|
||||
printf '%s\n' "${result}" | jq .
|
||||
fi
|
||||
|
||||
echo "Check if a timeline present"
|
||||
PARAMS=(
|
||||
-X GET
|
||||
-H "Content-Type: application/json"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
|
||||
)
|
||||
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
|
||||
if [[ -z "${timeline_id}" || "${timeline_id}" = null ]]; then
|
||||
if [[ "${RUN_PARALLEL:-false}" != "true" ]]; then
|
||||
echo "Check if a timeline present"
|
||||
PARAMS=(
|
||||
-X GET
|
||||
-H "Content-Type: application/json"
|
||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
|
||||
)
|
||||
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
|
||||
fi
|
||||
if [[ -z "${timeline_id:-}" || "${timeline_id:-}" = null ]]; then
|
||||
generate_id timeline_id
|
||||
PARAMS=(
|
||||
-sbf
|
||||
|
||||
@@ -142,7 +142,7 @@ services:
|
||||
- "storage_broker"
|
||||
- "--listen-addr=0.0.0.0:50051"
|
||||
|
||||
compute:
|
||||
compute1:
|
||||
restart: always
|
||||
build:
|
||||
context: ./compute_wrapper/
|
||||
@@ -152,6 +152,7 @@ services:
|
||||
- TAG=${COMPUTE_TAG:-${TAG:-latest}}
|
||||
- http_proxy=${http_proxy:-}
|
||||
- https_proxy=${https_proxy:-}
|
||||
image: built-compute
|
||||
environment:
|
||||
- PG_VERSION=${PG_VERSION:-16}
|
||||
- TENANT_ID=${TENANT_ID:-}
|
||||
@@ -166,6 +167,11 @@ services:
|
||||
- 3080:3080 # http endpoints
|
||||
entrypoint:
|
||||
- "/shell/compute.sh"
|
||||
# Ad an alias for compute1 for compatibility
|
||||
networks:
|
||||
default:
|
||||
aliases:
|
||||
- compute
|
||||
depends_on:
|
||||
- safekeeper1
|
||||
- safekeeper2
|
||||
@@ -174,15 +180,20 @@ services:
|
||||
|
||||
compute_is_ready:
|
||||
image: postgres:latest
|
||||
environment:
|
||||
- PARALLEL_COMPUTES=1
|
||||
entrypoint:
|
||||
- "/bin/bash"
|
||||
- "/bin/sh"
|
||||
- "-c"
|
||||
command:
|
||||
- "until pg_isready -h compute -p 55433 -U cloud_admin ; do
|
||||
echo 'Waiting to start compute...' && sleep 1;
|
||||
done"
|
||||
- "for i in $(seq 1 $${PARALLEL_COMPUTES}); do
|
||||
until pg_isready -h compute$$i -p 55433 -U cloud_admin ; do
|
||||
sleep 1;
|
||||
done;
|
||||
done;
|
||||
echo All computes are started"
|
||||
depends_on:
|
||||
- compute
|
||||
- compute1
|
||||
|
||||
neon-test-extensions:
|
||||
profiles: ["test-extensions"]
|
||||
@@ -196,4 +207,4 @@ services:
|
||||
command:
|
||||
- sleep 3600
|
||||
depends_on:
|
||||
- compute
|
||||
- compute1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# A basic test to ensure Docker images are built correctly.
|
||||
# Build a wrapper around the compute, start all services and runs a simple SQL query.
|
||||
@@ -13,9 +13,36 @@
|
||||
#
|
||||
set -eux -o pipefail
|
||||
|
||||
cd "$(dirname "${0}")"
|
||||
export COMPOSE_FILE='docker-compose.yml'
|
||||
export COMPOSE_PROFILES=test-extensions
|
||||
cd "$(dirname "${0}")"
|
||||
export PARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}
|
||||
READY_MESSAGE="All computes are started"
|
||||
COMPUTES=()
|
||||
for i in $(seq 1 "${PARALLEL_COMPUTES}"); do
|
||||
COMPUTES+=("compute${i}")
|
||||
done
|
||||
CURRENT_TMPDIR=$(mktemp -d)
|
||||
trap 'rm -rf ${CURRENT_TMPDIR} docker-compose-parallel.yml' EXIT
|
||||
if [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then
|
||||
export COMPOSE_FILE=docker-compose-parallel.yml
|
||||
cp docker-compose.yml docker-compose-parallel.yml
|
||||
# Replace the environment variable PARALLEL_COMPUTES with the actual value
|
||||
yq eval -i ".services.compute_is_ready.environment |= map(select(. | test(\"^PARALLEL_COMPUTES=\") | not)) + [\"PARALLEL_COMPUTES=${PARALLEL_COMPUTES}\"]" ${COMPOSE_FILE}
|
||||
for i in $(seq 2 "${PARALLEL_COMPUTES}"); do
|
||||
# Duplicate compute1 as compute${i} for parallel execution
|
||||
yq eval -i ".services.compute${i} = .services.compute1" ${COMPOSE_FILE}
|
||||
# We don't need these sections, so delete them
|
||||
yq eval -i "(del .services.compute${i}.build) | (del .services.compute${i}.ports) | (del .services.compute${i}.networks)" ${COMPOSE_FILE}
|
||||
# Let the compute 1 be the only dependence
|
||||
yq eval -i ".services.compute${i}.depends_on = [\"compute1\"]" ${COMPOSE_FILE}
|
||||
# Set RUN_PARALLEL=true for compute2. They will generate tenant_id and timeline_id to avoid using the same as other computes
|
||||
yq eval -i ".services.compute${i}.environment += [\"RUN_PARALLEL=true\"]" ${COMPOSE_FILE}
|
||||
# Remove TENANT_ID and TIMELINE_ID from the environment variables of the generated computes
|
||||
# They will create new TENANT_ID and TIMELINE_ID anyway.
|
||||
yq eval -i ".services.compute${i}.environment |= map(select(. | (test(\"^TENANT_ID=\") or test(\"^TIMELINE_ID=\")) | not))" ${COMPOSE_FILE}
|
||||
done
|
||||
fi
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
|
||||
|
||||
function cleanup() {
|
||||
@@ -27,11 +54,11 @@ function cleanup() {
|
||||
|
||||
for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
pg_version=${pg_version/v/}
|
||||
echo "clean up containers if exists"
|
||||
echo "clean up containers if exist"
|
||||
cleanup
|
||||
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
|
||||
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull --build -d
|
||||
|
||||
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose build compute1
|
||||
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull -d
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
cnt=0
|
||||
while sleep 3; do
|
||||
@@ -41,45 +68,50 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||
echo "timeout before the compute is ready."
|
||||
exit 1
|
||||
fi
|
||||
if docker compose logs "compute_is_ready" | grep -q "accepting connections"; then
|
||||
if docker compose logs compute_is_ready | grep -q "${READY_MESSAGE}"; then
|
||||
echo "OK. The compute is ready to connect."
|
||||
echo "execute simple queries."
|
||||
docker compose exec compute /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
|
||||
for compute in "${COMPUTES[@]}"; do
|
||||
docker compose exec "${compute}" /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
|
||||
done
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${pg_version} -ge 16 ]]; then
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
# Prepare for the PostGIS test
|
||||
docker compose exec compute mkdir -p /tmp/pgis_reg/pgis_reg_tmp
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${TMPDIR}"
|
||||
docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${TMPDIR}"
|
||||
docker compose exec compute mkdir -p /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
|
||||
docker compose cp "${TMPDIR}/test" compute:/ext-src/postgis-src/raster/test
|
||||
docker compose cp "${TMPDIR}/00-regress-install" compute:/ext-src/postgis-src/regress
|
||||
rm -rf "${TMPDIR}"
|
||||
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
|
||||
docker compose cp "${TMPDIR}/data" compute:/ext-src/pg_hint_plan-src/
|
||||
rm -rf "${TMPDIR}"
|
||||
# The following block does the same for the contrib/file_fdw test
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${TMPDIR}/data"
|
||||
docker compose cp "${TMPDIR}/data" compute:/postgres/contrib/file_fdw/data
|
||||
rm -rf "${TMPDIR}"
|
||||
mkdir "${CURRENT_TMPDIR}"/{pg_hint_plan-src,file_fdw,postgis-src}
|
||||
docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${CURRENT_TMPDIR}/postgis-src/test"
|
||||
docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${CURRENT_TMPDIR}/postgis-src/00-regress-install"
|
||||
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${CURRENT_TMPDIR}/pg_hint_plan-src/data"
|
||||
docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${CURRENT_TMPDIR}/file_fdw/data"
|
||||
|
||||
for compute in "${COMPUTES[@]}"; do
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config on "${compute}"
|
||||
docker compose exec "${compute}" touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
# Prepare for the PostGIS test
|
||||
docker compose exec "${compute}" mkdir -p /tmp/pgis_reg/pgis_reg_tmp /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
|
||||
docker compose cp "${CURRENT_TMPDIR}/postgis-src/test" "${compute}":/ext-src/postgis-src/raster/test
|
||||
docker compose cp "${CURRENT_TMPDIR}/postgis-src/00-regress-install" "${compute}":/ext-src/postgis-src/regress
|
||||
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
|
||||
docker compose cp "${CURRENT_TMPDIR}/pg_hint_plan-src/data" "${compute}":/ext-src/pg_hint_plan-src/
|
||||
# The following block does the same for the contrib/file_fdw test
|
||||
docker compose cp "${CURRENT_TMPDIR}/file_fdw/data" "${compute}":/postgres/contrib/file_fdw/data
|
||||
done
|
||||
# Apply patches
|
||||
docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
|
||||
# We are running tests now
|
||||
rm -f testout.txt testout_contrib.txt
|
||||
# We want to run the longest tests first to better utilize parallelization and reduce overall test time.
|
||||
# Tests listed in the RUN_FIRST variable will be run before others.
|
||||
# If parallelization is not used, this environment variable will be ignored.
|
||||
|
||||
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
|
||||
-e RUN_FIRST=hll-src,postgis-src,pgtap-src -e PARALLEL_COMPUTES="${PARALLEL_COMPUTES}" \
|
||||
neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
|
||||
docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
|
||||
-e PARALLEL_COMPUTES="${PARALLEL_COMPUTES}" \
|
||||
neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
|
||||
if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then
|
||||
CONTRIB_FAILED=
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -x
|
||||
|
||||
if [[ -v BENCHMARK_CONNSTR ]]; then
|
||||
@@ -26,8 +26,9 @@ if [[ -v BENCHMARK_CONNSTR ]]; then
|
||||
fi
|
||||
fi
|
||||
REGULAR_USER=false
|
||||
while getopts r arg; do
|
||||
case $arg in
|
||||
PARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}
|
||||
while getopts pr arg; do
|
||||
case ${arg} in
|
||||
r)
|
||||
REGULAR_USER=true
|
||||
shift $((OPTIND-1))
|
||||
@@ -41,26 +42,49 @@ extdir=${1}
|
||||
|
||||
cd "${extdir}" || exit 2
|
||||
FAILED=
|
||||
LIST=$( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
|
||||
for d in ${LIST}; do
|
||||
[ -d "${d}" ] || continue
|
||||
if ! psql -w -c "select 1" >/dev/null; then
|
||||
FAILED="${d} ${FAILED}"
|
||||
break
|
||||
fi
|
||||
if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then
|
||||
"${d}/regular-test.sh" || FAILED="${d} ${FAILED}"
|
||||
continue
|
||||
fi
|
||||
export FAILED_FILE=/tmp/failed
|
||||
rm -f ${FAILED_FILE}
|
||||
mapfile -t LIST < <( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
|
||||
if [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then
|
||||
# Avoid errors if RUN_FIRST is not defined
|
||||
RUN_FIRST=${RUN_FIRST:-}
|
||||
# Move entries listed in the RUN_FIRST variable to the beginning
|
||||
ORDERED_LIST=$(printf "%s\n" "${LIST[@]}" | grep -x -Ff <(echo -e "${RUN_FIRST//,/$'\n'}"); printf "%s\n" "${LIST[@]}" | grep -vx -Ff <(echo -e "${RUN_FIRST//,/$'\n'}"))
|
||||
parallel -j"${PARALLEL_COMPUTES}" "[[ -d {} ]] || exit 0
|
||||
export PGHOST=compute{%}
|
||||
if ! psql -c 'select 1'>/dev/null; then
|
||||
exit 1
|
||||
fi
|
||||
echo Running on \${PGHOST}
|
||||
if [[ -f ${extdir}/{}/neon-test.sh ]]; then
|
||||
echo Running from script
|
||||
${extdir}/{}/neon-test.sh || echo {} >> ${FAILED_FILE};
|
||||
else
|
||||
echo Running using make;
|
||||
USE_PGXS=1 make -C {} installcheck || echo {} >> ${FAILED_FILE};
|
||||
fi" ::: ${ORDERED_LIST}
|
||||
[[ ! -f ${FAILED_FILE} ]] && exit 0
|
||||
else
|
||||
for d in "${LIST[@]}"; do
|
||||
[ -d "${d}" ] || continue
|
||||
if ! psql -w -c "select 1" >/dev/null; then
|
||||
FAILED="${d} ${FAILED}"
|
||||
break
|
||||
fi
|
||||
if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then
|
||||
"${d}/regular-test.sh" || FAILED="${d} ${FAILED}"
|
||||
continue
|
||||
fi
|
||||
|
||||
if [ -f "${d}/neon-test.sh" ]; then
|
||||
"${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
|
||||
else
|
||||
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
|
||||
fi
|
||||
done
|
||||
[ -z "${FAILED}" ] && exit 0
|
||||
for d in ${FAILED}; do
|
||||
if [ -f "${d}/neon-test.sh" ]; then
|
||||
"${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
|
||||
else
|
||||
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
|
||||
fi
|
||||
done
|
||||
[[ -z ${FAILED} ]] && exit 0
|
||||
fi
|
||||
for d in ${FAILED} $([[ ! -f ${FAILED_FILE} ]] || cat ${FAILED_FILE}); do
|
||||
cat "$(find $d -name regression.diffs)"
|
||||
done
|
||||
for postgis_diff in /tmp/pgis_reg/*_diff; do
|
||||
@@ -68,4 +92,5 @@ for postgis_diff in /tmp/pgis_reg/*_diff; do
|
||||
cat "${postgis_diff}"
|
||||
done
|
||||
echo "${FAILED}"
|
||||
cat ${FAILED_FILE}
|
||||
exit 1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
set -eux -o pipefail
|
||||
cd "$(dirname "${0}")"
|
||||
# Takes a variable name as argument. The result is stored in that variable.
|
||||
@@ -60,8 +60,8 @@ function check_timeline() {
|
||||
# Restarts the compute node with the required compute tag and timeline.
|
||||
# Accepts the tag for the compute node and the timeline as parameters.
|
||||
function restart_compute() {
|
||||
docker compose down compute compute_is_ready
|
||||
COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready
|
||||
docker compose down compute1 compute_is_ready
|
||||
COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute1 compute_is_ready
|
||||
wait_for_ready
|
||||
check_timeline ${2}
|
||||
}
|
||||
|
||||
@@ -129,9 +129,10 @@ segment to bootstrap the WAL writing, but it doesn't contain the checkpoint reco
|
||||
changes in xlog.c, to allow starting the compute node without reading the last checkpoint record
|
||||
from WAL.
|
||||
|
||||
This includes code to read the `zenith.signal` file, which tells the startup code the LSN to start
|
||||
at. When the `zenith.signal` file is present, the startup uses that LSN instead of the last
|
||||
checkpoint's LSN. The system is known to be consistent at that LSN, without any WAL redo.
|
||||
This includes code to read the `neon.signal` (also `zenith.signal`) file, which tells the startup
|
||||
code the LSN to start at. When the `neon.signal` file is present, the startup uses that LSN
|
||||
instead of the last checkpoint's LSN. The system is known to be consistent at that LSN, without
|
||||
any WAL redo.
|
||||
|
||||
|
||||
### How to get rid of the patch
|
||||
|
||||
@@ -75,7 +75,7 @@ CLI examples:
|
||||
* AWS S3 : `env AWS_ACCESS_KEY_ID='SOMEKEYAAAAASADSAH*#' AWS_SECRET_ACCESS_KEY='SOMEsEcReTsd292v' ${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/'}"`
|
||||
|
||||
For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
|
||||
For local S3 installations, refer to the their documentation for name format and credentials.
|
||||
For local S3 installations, refer to their documentation for name format and credentials.
|
||||
|
||||
Similar to other pageserver settings, toml config file can be used to configure either of the storages as backup targets.
|
||||
Required sections are:
|
||||
|
||||
@@ -20,7 +20,7 @@ In our case consensus leader is compute (walproposer), and we don't want to wake
|
||||
up all computes for the change. Neither we want to fully reimplement the leader
|
||||
logic second time outside compute. Because of that the proposed algorithm relies
|
||||
for issuing configurations on the external fault tolerant (distributed) strongly
|
||||
consisent storage with simple API: CAS (compare-and-swap) on the single key.
|
||||
consistent storage with simple API: CAS (compare-and-swap) on the single key.
|
||||
Properly configured postgres suits this.
|
||||
|
||||
In the system consensus is implemented at the timeline level, so algorithm below
|
||||
@@ -34,7 +34,7 @@ A configuration is
|
||||
|
||||
```
|
||||
struct Configuration {
|
||||
generation: Generation, // a number uniquely identifying configuration
|
||||
generation: SafekeeperGeneration, // a number uniquely identifying configuration
|
||||
sk_set: Vec<NodeId>, // current safekeeper set
|
||||
new_sk_set: Optional<Vec<NodeId>>,
|
||||
}
|
||||
@@ -81,11 +81,11 @@ configuration generation in them is less than its current one. Namely, it
|
||||
refuses to vote, to truncate WAL in `handle_elected` and to accept WAL. In
|
||||
response it sends its current configuration generation to let walproposer know.
|
||||
|
||||
Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
|
||||
accepting `Configuration`. Safekeeper switches to the given conf it is higher than its
|
||||
Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/membership`
|
||||
accepting `Configuration`. Safekeeper switches to the given conf if it is higher than its
|
||||
current one and ignores it otherwise. In any case it replies with
|
||||
```
|
||||
struct ConfigurationSwitchResponse {
|
||||
struct TimelineMembershipSwitchResponse {
|
||||
conf: Configuration,
|
||||
term: Term,
|
||||
last_log_term: Term,
|
||||
@@ -108,7 +108,7 @@ establishes this configuration as its own and moves to voting.
|
||||
It should stop talking to safekeepers not listed in the configuration at this
|
||||
point, though it is not unsafe to continue doing so.
|
||||
|
||||
To be elected it must receive votes from both majorites if `new_sk_set` is present.
|
||||
To be elected it must receive votes from both majorities if `new_sk_set` is present.
|
||||
Similarly, to commit WAL it must receive flush acknowledge from both majorities.
|
||||
|
||||
If walproposer hears from safekeeper configuration higher than his own (i.e.
|
||||
@@ -130,7 +130,7 @@ storage are reachable.
|
||||
1) Fetch current timeline configuration from the configuration storage.
|
||||
2) If it is already joint one and `new_set` is different from `desired_set`
|
||||
refuse to change. However, assign join conf to (in memory) var
|
||||
`join_conf` and proceed to step 4 to finish the ongoing change.
|
||||
`joint_conf` and proceed to step 4 to finish the ongoing change.
|
||||
3) Else, create joint `joint_conf: Configuration`: increment current conf number
|
||||
`n` and put `desired_set` to `new_sk_set`. Persist it in the configuration
|
||||
storage by doing CAS on the current generation: change happens only if
|
||||
@@ -161,11 +161,11 @@ storage are reachable.
|
||||
because `pull_timeline` already includes it and plus additionally would be
|
||||
broadcast by compute. More importantly, we may proceed to the next step
|
||||
only when `<last_log_term, flush_lsn>` on the majority of the new set reached
|
||||
`sync_position`. Similarly, on the happy path no waiting is not needed because
|
||||
`sync_position`. Similarly, on the happy path no waiting is needed because
|
||||
`pull_timeline` already includes it. However, we should double
|
||||
check to be safe. For example, timeline could have been created earlier e.g.
|
||||
manually or after try-to-migrate, abort, try-to-migrate-again sequence.
|
||||
7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
|
||||
7) Create `new_conf: Configuration` incrementing `joint_conf` generation and having new
|
||||
safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
|
||||
storage under one more CAS.
|
||||
8) Call `PUT` `configuration` on safekeepers from the new set,
|
||||
@@ -178,12 +178,12 @@ spec of it.
|
||||
|
||||
Description above focuses on safety. To make the flow practical and live, here a few more
|
||||
considerations.
|
||||
1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
|
||||
1) It makes sense to ping new set to ensure we are migrating to live node(s) before
|
||||
step 3.
|
||||
2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
|
||||
it is safe to rollback to the old conf with one more CAS.
|
||||
3) On step 4 timeline might be already created on members of the new set for various reasons;
|
||||
the simplest is the procedure restart. There are more complicated scenarious like mentioned
|
||||
the simplest is the procedure restart. There are more complicated scenarios like mentioned
|
||||
in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
|
||||
generations, so seems simpler to treat existing timeline as success. However, this also
|
||||
has a disadvantage: you might imagine an surpassingly unlikely schedule where condition in
|
||||
@@ -192,7 +192,7 @@ considerations.
|
||||
4) In the end timeline should be locally deleted on the safekeeper(s) which are
|
||||
in the old set but not in the new one, unless they are unreachable. To be
|
||||
safe this also should be done under generation number (deletion proceeds only if
|
||||
current configuration is <= than one in request and safekeeper is not memeber of it).
|
||||
current configuration is <= than one in request and safekeeper is not member of it).
|
||||
5) If current conf fetched on step 1 is already not joint and members equal to `desired_set`,
|
||||
jump to step 7, using it as `new_conf`.
|
||||
|
||||
@@ -261,14 +261,14 @@ Timeline (branch) creation in cplane should call storage_controller POST
|
||||
Response should be augmented with `safekeepers_generation` and `safekeepers`
|
||||
fields like described in `/notify-safekeepers` above. Initially (currently)
|
||||
these fields may be absent; in this case cplane chooses safekeepers on its own
|
||||
like it currently does. The call should be retried until succeeds.
|
||||
like it currently does. The call should be retried until it succeeds.
|
||||
|
||||
Timeline deletion and tenant deletion in cplane should call appropriate
|
||||
storage_controller endpoints like it currently does for sharded tenants. The
|
||||
calls should be retried until they succeed.
|
||||
|
||||
When compute receives safekeepers list from control plane it needs to know the
|
||||
generation to checked whether it should be updated (note that compute may get
|
||||
When compute receives safekeeper list from control plane it needs to know the
|
||||
generation to check whether it should be updated (note that compute may get
|
||||
safekeeper list from either cplane or safekeepers). Currently `neon.safekeepers`
|
||||
GUC is just a comma separates list of `host:port`. Let's prefix it with
|
||||
`g#<generation>:` to this end, so it will look like
|
||||
@@ -305,8 +305,8 @@ enum MigrationRequest {
|
||||
```
|
||||
|
||||
`FinishPending` requests to run the procedure to ensure state is clean: current
|
||||
configuration is not joint and majority of safekeepers are aware of it, but do
|
||||
not attempt to migrate anywhere. If current configuration fetched on step 1 is
|
||||
configuration is not joint and the majority of safekeepers are aware of it, but do
|
||||
not attempt to migrate anywhere. If the current configuration fetched on step 1 is
|
||||
not joint it jumps to step 7. It should be run at startup for all timelines (but
|
||||
similarly, in the first version it is ok to trigger it manually).
|
||||
|
||||
@@ -315,7 +315,7 @@ similarly, in the first version it is ok to trigger it manually).
|
||||
`safekeepers` table mirroring current `nodes` should be added, except that for
|
||||
`scheduling_policy`: it is enough to have at least in the beginning only 3
|
||||
fields: 1) `active` 2) `paused` (initially means only not assign new tlis there
|
||||
3) `decomissioned` (node is removed).
|
||||
3) `decommissioned` (node is removed).
|
||||
|
||||
`timelines` table:
|
||||
```
|
||||
@@ -326,9 +326,10 @@ table! {
|
||||
tenant_id -> Varchar,
|
||||
start_lsn -> pg_lsn,
|
||||
generation -> Int4,
|
||||
sk_set -> Array<Int4>, // list of safekeeper ids
|
||||
sk_set -> Array<Int8>, // list of safekeeper ids
|
||||
new_sk_set -> Nullable<Array<Int8>>, // list of safekeeper ids, null if not joint conf
|
||||
cplane_notified_generation -> Int4,
|
||||
sk_set_notified_generation -> Int4, // the generation a quorum of sk_set knows about
|
||||
deleted_at -> Nullable<Timestamptz>,
|
||||
}
|
||||
}
|
||||
@@ -338,13 +339,23 @@ table! {
|
||||
might also want to add ancestor_timeline_id to preserve the hierarchy, but for
|
||||
this RFC it is not needed.
|
||||
|
||||
`cplane_notified_generation` and `sk_set_notified_generation` fields are used to
|
||||
track the last stage of the algorithm, when we need to notify safekeeper set and cplane
|
||||
with the final configuration after it's already committed to DB.
|
||||
|
||||
The timeline is up-to-date (no migration in progress) if `new_sk_set` is null and
|
||||
`*_notified_generation` fields are up to date with `generation`.
|
||||
|
||||
It's possible to replace `*_notified_generation` with one boolean field `migration_completed`,
|
||||
but for better observability it's nice to have them separately.
|
||||
|
||||
#### API
|
||||
|
||||
Node management is similar to pageserver:
|
||||
1) POST `/control/v1/safekeepers` inserts safekeeper.
|
||||
2) GET `/control/v1/safekeepers` lists safekeepers.
|
||||
3) GET `/control/v1/safekeepers/:node_id` gets safekeeper.
|
||||
4) PUT `/control/v1/safekepers/:node_id/status` changes status to e.g.
|
||||
1) POST `/control/v1/safekeeper` inserts safekeeper.
|
||||
2) GET `/control/v1/safekeeper` lists safekeepers.
|
||||
3) GET `/control/v1/safekeeper/:node_id` gets safekeeper.
|
||||
4) PUT `/control/v1/safekeper/:node_id/scheduling_policy` changes status to e.g.
|
||||
`offline` or `decomissioned`. Initially it is simpler not to schedule any
|
||||
migrations here.
|
||||
|
||||
@@ -368,8 +379,8 @@ Migration API: the first version is the simplest and the most imperative:
|
||||
all timelines from one safekeeper to another. It accepts json
|
||||
```
|
||||
{
|
||||
"src_sk": u32,
|
||||
"dst_sk": u32,
|
||||
"src_sk": NodeId,
|
||||
"dst_sk": NodeId,
|
||||
"limit": Optional<u32>,
|
||||
}
|
||||
```
|
||||
@@ -379,12 +390,15 @@ Returns list of scheduled requests.
|
||||
2) PUT `/control/v1/tenant/:tenant_id/timeline/:timeline_id/safekeeper_migrate` schedules `MigrationRequest`
|
||||
to move single timeline to given set of safekeepers:
|
||||
```
|
||||
{
|
||||
"desired_set": Vec<u32>,
|
||||
struct TimelineSafekeeperMigrateRequest {
|
||||
"new_sk_set": Vec<NodeId>,
|
||||
}
|
||||
```
|
||||
|
||||
Returns scheduled request.
|
||||
In the first version the handler migrates the timeline to `new_sk_set` synchronously.
|
||||
Should be retried until success.
|
||||
|
||||
In the future we might change it to asynchronous API and return scheduled request.
|
||||
|
||||
Similar call should be added for the tenant.
|
||||
|
||||
@@ -434,6 +448,9 @@ table! {
|
||||
}
|
||||
```
|
||||
|
||||
We load all pending ops from the table on startup into the memory.
|
||||
The table is needed only to preserve the state between restarts.
|
||||
|
||||
`op_type` can be `include` (seed from peers and ensure generation is up to
|
||||
date), `exclude` (remove locally) and `delete`. Field is actually not strictly
|
||||
needed as it can be computed from current configuration, but gives more explicit
|
||||
@@ -474,7 +491,7 @@ actions must be idempotent. Now, a tricky point here is timeline start LSN. For
|
||||
the initial (tenant creation) call cplane doesn't know it. However, setting
|
||||
start_lsn on safekeepers during creation is a good thing -- it provides a
|
||||
guarantee that walproposer can always find a common point in WAL histories of
|
||||
safekeeper and its own, and so absense of it would be a clear sign of
|
||||
safekeeper and its own, and so absence of it would be a clear sign of
|
||||
corruption. The following sequence works:
|
||||
1) Create timeline (or observe that it exists) on pageserver,
|
||||
figuring out last_record_lsn in response.
|
||||
@@ -497,11 +514,9 @@ corruption. The following sequence works:
|
||||
retries the call until 200 response.
|
||||
|
||||
There is a small question how request handler (timeline creation in this
|
||||
case) would interact with per sk reconciler. As always I prefer to do the
|
||||
simplest possible thing and here it seems to be just waking it up so it
|
||||
re-reads the db for work to do. Passing work in memory is faster, but
|
||||
that shouldn't matter, and path to scan db for work will exist anyway,
|
||||
simpler to reuse it.
|
||||
case) would interact with per sk reconciler. In the current implementation
|
||||
we first persist the request in the DB, and then send an in-memory request
|
||||
to each safekeeper reconciler to process it.
|
||||
|
||||
For pg version / wal segment size: while we may persist them in `timelines`
|
||||
table, it is not necessary as initial creation at step 3 can take them from
|
||||
@@ -509,30 +524,40 @@ pageserver or cplane creation call and later pull_timeline will carry them
|
||||
around.
|
||||
|
||||
Timeline migration.
|
||||
1) CAS to the db to create joint conf, and in the same transaction create
|
||||
`safekeeper_timeline_pending_ops` `include` entries to initialize new members
|
||||
as well as deliver this conf to current ones; poke per sk reconcilers to work
|
||||
on it. Also any conf change should also poke cplane notifier task(s).
|
||||
2) Once it becomes possible per alg description above, get out of joint conf
|
||||
with another CAS. Task should get wakeups from per sk reconcilers because
|
||||
conf switch is required for advancement; however retries should be sleep
|
||||
based as well as LSN advancement might be needed, though in happy path
|
||||
it isn't. To see whether further transition is possible on wakup migration
|
||||
executor polls safekeepers per the algorithm. CAS creating new conf with only
|
||||
new members should again insert entries to `safekeeper_timeline_pending_ops`
|
||||
to switch them there, as well as `exclude` rows to remove timeline from
|
||||
old members.
|
||||
1) CAS to the db to create joint conf. Since this moment the migration is considered to be
|
||||
"in progress". We can detect all "in-progress" migrations looking into the database.
|
||||
2) Do steps 4-6 from the algorithm, including `pull_timeline` onto `new_sk_set`, update membership
|
||||
configuration on all safekeepers, notify cplane, etc. All operations are idempotent,
|
||||
so we don't need to persist anything in the database at this stage. If any errors occur,
|
||||
it's safe to retry or abort the migration.
|
||||
3) Once it becomes possible per alg description above, get out of joint conf
|
||||
with another CAS. Also should insert `exclude` entries into `safekeeper_timeline_pending_ops`
|
||||
in the same DB transaction. Adding `exclude` entries atomically is nesessary because after
|
||||
CAS we don't have the list of excluded safekeepers in the `timelines` table anymore, but we
|
||||
need to have them persisted somewhere in case the migration is interrupted right after the CAS.
|
||||
4) Finish the migration. The final membership configuration is committed to the DB at this stage.
|
||||
So, the migration can not be aborted anymore. But it can still be retried if the migration fails
|
||||
past stage 3. To finish the migration we need to send the new membership configuration to
|
||||
a new quorum of safekeepers, notify cplane with the new safekeeper list and schedule the `exclude`
|
||||
requests to in-memory queue for safekeeper reconciler. If the algrorithm is retried, it's
|
||||
possible that we have already committed `exclude` requests to DB, but didn't send them to
|
||||
the in-memory queue. In this case we need to read them from `safekeeper_timeline_pending_ops`
|
||||
because it's the only place where they are persistent. The fields `sk_set_notified_generation`
|
||||
and `cplane_notified_generation` are updated after each step. The migration is considered
|
||||
fully completed when they match the `generation` field.
|
||||
|
||||
In practice, we can report "success" after stage 3 and do the "finish" step in per-timeline
|
||||
reconciler (if we implement it). But it's wise to at least try to finish them synchronously,
|
||||
so the timeline is always in a "good state" and doesn't require an old quorum to commit
|
||||
WAL after the migration reported "success".
|
||||
|
||||
Timeline deletion: just set `deleted_at` on the timeline row and insert
|
||||
`safekeeper_timeline_pending_ops` entries in the same xact, the rest is done by
|
||||
per sk reconcilers.
|
||||
|
||||
When node is removed (set to `decomissioned`), `safekeeper_timeline_pending_ops`
|
||||
When node is removed (set to `decommissioned`), `safekeeper_timeline_pending_ops`
|
||||
for it must be cleared in the same transaction.
|
||||
|
||||
One more task pool should infinitely retry notifying control plane about changed
|
||||
safekeeper sets (trying making `cplane_notified_generation` equal `generation`).
|
||||
|
||||
#### Dealing with multiple instances of storage_controller
|
||||
|
||||
Operations described above executed concurrently might create some errors but do
|
||||
@@ -541,7 +566,7 @@ of storage_controller it is fine to have it temporarily, e.g. during redeploy.
|
||||
|
||||
To harden against some controller instance creating some work in
|
||||
`safekeeper_timeline_pending_ops` and then disappearing without anyone pickup up
|
||||
the job per sk reconcilers apart from explicit wakups should scan for work
|
||||
the job per sk reconcilers apart from explicit wakeups should scan for work
|
||||
periodically. It is possible to remove that though if all db updates are
|
||||
protected with leadership token/term -- then such scans are needed only after
|
||||
leadership is acquired.
|
||||
@@ -563,7 +588,7 @@ There should be following layers of tests:
|
||||
safekeeper communication and pull_timeline need to be mocked and main switch
|
||||
procedure wrapped to as a node (thread) in simulation tests, using these
|
||||
mocks. Test would inject migrations like it currently injects
|
||||
safekeeper/walproposer restars. Main assert is the same -- committed WAL must
|
||||
safekeeper/walproposer restarts. Main assert is the same -- committed WAL must
|
||||
not be lost.
|
||||
|
||||
3) Since simulation testing injects at relatively high level points (not
|
||||
@@ -613,7 +638,7 @@ Let's have the following implementation bits for gradual rollout:
|
||||
`notify-safekeepers`.
|
||||
|
||||
Then the rollout for a region would be:
|
||||
- Current situation: safekeepers are choosen by control_plane.
|
||||
- Current situation: safekeepers are chosen by control_plane.
|
||||
- We manually migrate some timelines, test moving them around.
|
||||
- Then we enable `--set-safekeepers` so that all new timelines
|
||||
are on storage controller.
|
||||
|
||||
@@ -13,6 +13,8 @@ use utils::backoff::retry;
|
||||
pub fn app(state: Arc<Storage>) -> Router<()> {
|
||||
use axum::routing::{delete as _delete, get as _get};
|
||||
let delete_prefix = _delete(delete_prefix);
|
||||
// NB: On any changes do not forget to update the OpenAPI spec
|
||||
// in /endpoint_storage/src/openapi_spec.yml.
|
||||
Router::new()
|
||||
.route(
|
||||
"/{tenant_id}/{timeline_id}/{endpoint_id}/{*path}",
|
||||
|
||||
146
endpoint_storage/src/openapi_spec.yml
Normal file
146
endpoint_storage/src/openapi_spec.yml
Normal file
@@ -0,0 +1,146 @@
|
||||
openapi: "3.0.2"
|
||||
info:
|
||||
title: Endpoint Storage API
|
||||
description: Endpoint Storage API
|
||||
version: "1.0"
|
||||
license:
|
||||
name: "Apache"
|
||||
url: https://github.com/neondatabase/neon/blob/main/LICENSE
|
||||
servers:
|
||||
- url: ""
|
||||
paths:
|
||||
/status:
|
||||
description: Healthcheck endpoint
|
||||
get:
|
||||
description: Healthcheck
|
||||
security: []
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
|
||||
/{tenant_id}/{timeline_id}/{endpoint_id}/{key}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: endpoint_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: key
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
get:
|
||||
description: Get file from blob storage
|
||||
responses:
|
||||
"200":
|
||||
description: "File stream from blob storage"
|
||||
content:
|
||||
application/octet-stream:
|
||||
schema:
|
||||
type: string
|
||||
format: binary
|
||||
"400":
|
||||
description: File was not found
|
||||
"403":
|
||||
description: JWT does not authorize request to this route
|
||||
put:
|
||||
description: Insert file into blob storage. If file exists, override it
|
||||
requestBody:
|
||||
content:
|
||||
application/octet-stream:
|
||||
schema:
|
||||
type: string
|
||||
format: binary
|
||||
responses:
|
||||
"200":
|
||||
description: File was inserted successfully
|
||||
"403":
|
||||
description: JWT does not authorize request to this route
|
||||
delete:
|
||||
description: Delete file from blob storage
|
||||
responses:
|
||||
"200":
|
||||
description: File was successfully deleted or not found
|
||||
"403":
|
||||
description: JWT does not authorize request to this route
|
||||
|
||||
/{tenant_id}/{timeline_id}/{endpoint_id}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: endpoint_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
delete:
|
||||
description: Delete endpoint data from blob storage
|
||||
responses:
|
||||
"200":
|
||||
description: Endpoint data was deleted
|
||||
"403":
|
||||
description: JWT does not authorize request to this route
|
||||
|
||||
/{tenant_id}/{timeline_id}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- name: timeline_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
delete:
|
||||
description: Delete timeline data from blob storage
|
||||
responses:
|
||||
"200":
|
||||
description: Timeline data was deleted
|
||||
"403":
|
||||
description: JWT does not authorize request to this route
|
||||
|
||||
/{tenant_id}:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
delete:
|
||||
description: Delete tenant data from blob storage
|
||||
responses:
|
||||
"200":
|
||||
description: Tenant data was deleted
|
||||
"403":
|
||||
description: JWT does not authorize request to this route
|
||||
|
||||
components:
|
||||
securitySchemes:
|
||||
JWT:
|
||||
type: http
|
||||
scheme: bearer
|
||||
bearerFormat: JWT
|
||||
|
||||
security:
|
||||
- JWT: []
|
||||
@@ -46,16 +46,45 @@ pub struct ExtensionInstallResponse {
|
||||
pub version: ExtVersion,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Default, Debug, Clone)]
|
||||
/// Status of the LFC prewarm process. The same state machine is reused for
|
||||
/// both autoprewarm (prewarm after compute/Postgres start using the previously
|
||||
/// stored LFC state) and explicit prewarming via API.
|
||||
#[derive(Serialize, Default, Debug, Clone, PartialEq)]
|
||||
#[serde(tag = "status", rename_all = "snake_case")]
|
||||
pub enum LfcPrewarmState {
|
||||
/// Default value when compute boots up.
|
||||
#[default]
|
||||
NotPrewarmed,
|
||||
/// Prewarming thread is active and loading pages into LFC.
|
||||
Prewarming,
|
||||
/// We found requested LFC state in the endpoint storage and
|
||||
/// completed prewarming successfully.
|
||||
Completed,
|
||||
Failed {
|
||||
error: String,
|
||||
},
|
||||
/// Unexpected error happened during prewarming. Note, `Not Found 404`
|
||||
/// response from the endpoint storage is explicitly excluded here
|
||||
/// because it can normally happen on the first compute start,
|
||||
/// since LFC state is not available yet.
|
||||
Failed { error: String },
|
||||
/// We tried to fetch the corresponding LFC state from the endpoint storage,
|
||||
/// but received `Not Found 404`. This should normally happen only during the
|
||||
/// first endpoint start after creation with `autoprewarm: true`.
|
||||
///
|
||||
/// During the orchestrated prewarm via API, when a caller explicitly
|
||||
/// provides the LFC state key to prewarm from, it's the caller responsibility
|
||||
/// to handle this status as an error state in this case.
|
||||
Skipped,
|
||||
}
|
||||
|
||||
impl Display for LfcPrewarmState {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
LfcPrewarmState::NotPrewarmed => f.write_str("NotPrewarmed"),
|
||||
LfcPrewarmState::Prewarming => f.write_str("Prewarming"),
|
||||
LfcPrewarmState::Completed => f.write_str("Completed"),
|
||||
LfcPrewarmState::Skipped => f.write_str("Skipped"),
|
||||
LfcPrewarmState::Failed { error } => write!(f, "Error({error})"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Default, Debug, Clone, PartialEq)]
|
||||
@@ -70,6 +99,23 @@ pub enum LfcOffloadState {
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||
#[serde(tag = "status", rename_all = "snake_case")]
|
||||
/// Response of /promote
|
||||
pub enum PromoteState {
|
||||
NotPromoted,
|
||||
Completed,
|
||||
Failed { error: String },
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize, Default, Debug, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
/// Result of /safekeepers_lsn
|
||||
pub struct SafekeepersLsn {
|
||||
pub safekeepers: String,
|
||||
pub wal_flush_lsn: utils::lsn::Lsn,
|
||||
}
|
||||
|
||||
/// Response of the /status API
|
||||
#[derive(Serialize, Debug, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
@@ -93,6 +139,15 @@ pub enum TerminateMode {
|
||||
Immediate,
|
||||
}
|
||||
|
||||
impl From<TerminateMode> for ComputeStatus {
|
||||
fn from(mode: TerminateMode) -> Self {
|
||||
match mode {
|
||||
TerminateMode::Fast => ComputeStatus::TerminationPendingFast,
|
||||
TerminateMode::Immediate => ComputeStatus::TerminationPendingImmediate,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ComputeStatus {
|
||||
@@ -113,7 +168,9 @@ pub enum ComputeStatus {
|
||||
// control-plane to terminate it.
|
||||
Failed,
|
||||
// Termination requested
|
||||
TerminationPending { mode: TerminateMode },
|
||||
TerminationPendingFast,
|
||||
// Termination requested, without waiting 30s before returning from /terminate
|
||||
TerminationPendingImmediate,
|
||||
// Terminated Postgres
|
||||
Terminated,
|
||||
}
|
||||
@@ -132,7 +189,10 @@ impl Display for ComputeStatus {
|
||||
ComputeStatus::Running => f.write_str("running"),
|
||||
ComputeStatus::Configuration => f.write_str("configuration"),
|
||||
ComputeStatus::Failed => f.write_str("failed"),
|
||||
ComputeStatus::TerminationPending { .. } => f.write_str("termination-pending"),
|
||||
ComputeStatus::TerminationPendingFast => f.write_str("termination-pending-fast"),
|
||||
ComputeStatus::TerminationPendingImmediate => {
|
||||
f.write_str("termination-pending-immediate")
|
||||
}
|
||||
ComputeStatus::Terminated => f.write_str("terminated"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,11 +105,7 @@ pub struct ComputeSpec {
|
||||
// updated to fill these fields, we can make these non optional.
|
||||
pub tenant_id: Option<TenantId>,
|
||||
pub timeline_id: Option<TimelineId>,
|
||||
|
||||
// Pageserver information can be passed in two different ways:
|
||||
// 1. Here
|
||||
// 2. in cluster.settings. This is legacy, we are switching to method 1.
|
||||
pub pageserver_connection_info: Option<PageserverConnectionInfo>,
|
||||
pub pageserver_connstring: Option<String>,
|
||||
|
||||
// More neon ids that we expose to the compute_ctl
|
||||
// and to postgres as neon extension GUCs.
|
||||
@@ -218,20 +214,6 @@ pub enum ComputeFeature {
|
||||
UnknownFeature,
|
||||
}
|
||||
|
||||
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
|
||||
pub struct PageserverConnectionInfo {
|
||||
pub shards: HashMap<u32, PageserverShardConnectionInfo>,
|
||||
|
||||
pub prefer_grpc: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
|
||||
pub struct PageserverShardConnectionInfo {
|
||||
pub libpq_url: Option<String>,
|
||||
pub grpc_url: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, Deserialize, Serialize)]
|
||||
pub struct RemoteExtSpec {
|
||||
pub public_extensions: Option<Vec<String>>,
|
||||
@@ -349,12 +331,6 @@ impl ComputeMode {
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for ComputeMode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(self.to_type_str())
|
||||
}
|
||||
}
|
||||
|
||||
/// Log level for audit logging
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
|
||||
pub enum ComputeAudit {
|
||||
|
||||
@@ -20,6 +20,7 @@ use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_util::io::ReaderStream;
|
||||
use tracing::{Instrument, debug, info, info_span, warn};
|
||||
use utils::auth::{AuthError, Claims, SwappableJwtAuth};
|
||||
use utils::metrics_collector::{METRICS_COLLECTOR, METRICS_STALE_MILLIS};
|
||||
|
||||
use crate::error::{ApiError, api_error_handler, route_error_handler};
|
||||
use crate::request::{get_query_param, parse_query_param};
|
||||
@@ -250,9 +251,28 @@ impl std::io::Write for ChannelWriter {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
pub async fn prometheus_metrics_handler(
|
||||
req: Request<Body>,
|
||||
force_metric_collection_on_scrape: bool,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
SERVE_METRICS_COUNT.inc();
|
||||
|
||||
// HADRON
|
||||
let requested_use_latest = parse_query_param(&req, "use_latest")?;
|
||||
|
||||
let use_latest = match requested_use_latest {
|
||||
None => force_metric_collection_on_scrape,
|
||||
Some(true) => true,
|
||||
Some(false) => {
|
||||
if force_metric_collection_on_scrape {
|
||||
// We don't cache in this case
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let started_at = std::time::Instant::now();
|
||||
|
||||
let (tx, rx) = mpsc::channel(1);
|
||||
@@ -277,12 +297,18 @@ pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<
|
||||
|
||||
let _span = span.entered();
|
||||
|
||||
let metrics = metrics::gather();
|
||||
// HADRON
|
||||
let collected = if use_latest {
|
||||
// Skip caching the results if we always force metric collection on scrape.
|
||||
METRICS_COLLECTOR.run_once(!force_metric_collection_on_scrape)
|
||||
} else {
|
||||
METRICS_COLLECTOR.last_collected()
|
||||
};
|
||||
|
||||
let gathered_at = std::time::Instant::now();
|
||||
|
||||
let res = encoder
|
||||
.encode(&metrics, &mut writer)
|
||||
.encode(&collected.metrics, &mut writer)
|
||||
.and_then(|_| writer.flush().map_err(|e| e.into()));
|
||||
|
||||
// this instant is not when we finally got the full response sent, sending is done by hyper
|
||||
@@ -295,6 +321,10 @@ pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<
|
||||
let encoded_in = encoded_at - gathered_at - writer.wait_time();
|
||||
let total = encoded_at - started_at;
|
||||
|
||||
// HADRON
|
||||
let staleness_ms = (encoded_at - collected.collected_at).as_millis();
|
||||
METRICS_STALE_MILLIS.set(staleness_ms as i64);
|
||||
|
||||
match res {
|
||||
Ok(()) => {
|
||||
tracing::info!(
|
||||
@@ -303,6 +333,7 @@ pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<
|
||||
spawning_ms = spawned_in.as_millis(),
|
||||
collection_ms = collected_in.as_millis(),
|
||||
encoding_ms = encoded_in.as_millis(),
|
||||
stalenss_ms = staleness_ms,
|
||||
"responded /metrics"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -41,17 +41,35 @@ pub fn get_query_param<'a>(
|
||||
Some(q) => q,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let mut values = url::form_urlencoded::parse(query.as_bytes())
|
||||
let values = url::form_urlencoded::parse(query.as_bytes())
|
||||
.filter_map(|(k, v)| if k == param_name { Some(v) } else { None })
|
||||
// we call .next() twice below. If it's None the first time, .fuse() ensures it's None afterwards
|
||||
.fuse();
|
||||
|
||||
let value1 = values.next();
|
||||
if values.next().is_some() {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"param {param_name} specified more than once"
|
||||
)));
|
||||
}
|
||||
// Work around an issue with Alloy's pyroscope scrape where the "seconds"
|
||||
// parameter is added several times. https://github.com/grafana/alloy/issues/3026
|
||||
// TODO: revert after Alloy is fixed.
|
||||
let value1 = values
|
||||
.map(Ok)
|
||||
.reduce(|acc, i| {
|
||||
match acc {
|
||||
Err(_) => acc,
|
||||
|
||||
// It's okay to have duplicates as along as they have the same value.
|
||||
Ok(ref a) if a == &i.unwrap() => acc,
|
||||
|
||||
_ => Err(ApiError::BadRequest(anyhow!(
|
||||
"param {param_name} specified more than once"
|
||||
))),
|
||||
}
|
||||
})
|
||||
.transpose()?;
|
||||
// if values.next().is_some() {
|
||||
// return Err(ApiError::BadRequest(anyhow!(
|
||||
// "param {param_name} specified more than once"
|
||||
// )));
|
||||
// }
|
||||
|
||||
Ok(value1)
|
||||
}
|
||||
|
||||
@@ -92,3 +110,39 @@ pub async fn ensure_no_body(request: &mut Request<Body>) -> Result<(), ApiError>
|
||||
None => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_get_query_param_duplicate() {
|
||||
let req = Request::builder()
|
||||
.uri("http://localhost:12345/testuri?testparam=1")
|
||||
.body(hyper::Body::empty())
|
||||
.unwrap();
|
||||
let value = get_query_param(&req, "testparam").unwrap();
|
||||
assert_eq!(value.unwrap(), "1");
|
||||
|
||||
let req = Request::builder()
|
||||
.uri("http://localhost:12345/testuri?testparam=1&testparam=1")
|
||||
.body(hyper::Body::empty())
|
||||
.unwrap();
|
||||
let value = get_query_param(&req, "testparam").unwrap();
|
||||
assert_eq!(value.unwrap(), "1");
|
||||
|
||||
let req = Request::builder()
|
||||
.uri("http://localhost:12345/testuri")
|
||||
.body(hyper::Body::empty())
|
||||
.unwrap();
|
||||
let value = get_query_param(&req, "testparam").unwrap();
|
||||
assert!(value.is_none());
|
||||
|
||||
let req = Request::builder()
|
||||
.uri("http://localhost:12345/testuri?testparam=1&testparam=2&testparam=3")
|
||||
.body(hyper::Body::empty())
|
||||
.unwrap();
|
||||
let value = get_query_param(&req, "testparam");
|
||||
assert!(value.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,12 +4,14 @@
|
||||
//! a default registry.
|
||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||
|
||||
use std::sync::RwLock;
|
||||
|
||||
use measured::label::{LabelGroupSet, LabelGroupVisitor, LabelName, NoLabels};
|
||||
use measured::metric::counter::CounterState;
|
||||
use measured::metric::gauge::GaugeState;
|
||||
use measured::metric::group::Encoding;
|
||||
use measured::metric::name::{MetricName, MetricNameEncoder};
|
||||
use measured::metric::{MetricEncoding, MetricFamilyEncoding};
|
||||
use measured::metric::{MetricEncoding, MetricFamilyEncoding, MetricType};
|
||||
use measured::{FixedCardinalityLabel, LabelGroup, MetricGroup};
|
||||
use once_cell::sync::Lazy;
|
||||
use prometheus::Registry;
|
||||
@@ -116,12 +118,52 @@ pub fn pow2_buckets(start: usize, end: usize) -> Vec<f64> {
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub struct InfoMetric<L: LabelGroup, M: MetricType = GaugeState> {
|
||||
label: RwLock<L>,
|
||||
metric: M,
|
||||
}
|
||||
|
||||
impl<L: LabelGroup> InfoMetric<L> {
|
||||
pub fn new(label: L) -> Self {
|
||||
Self::with_metric(label, GaugeState::new(1))
|
||||
}
|
||||
}
|
||||
|
||||
impl<L: LabelGroup, M: MetricType<Metadata = ()>> InfoMetric<L, M> {
|
||||
pub fn with_metric(label: L, metric: M) -> Self {
|
||||
Self {
|
||||
label: RwLock::new(label),
|
||||
metric,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_label(&self, label: L) {
|
||||
*self.label.write().unwrap() = label;
|
||||
}
|
||||
}
|
||||
|
||||
impl<L, M, E> MetricFamilyEncoding<E> for InfoMetric<L, M>
|
||||
where
|
||||
L: LabelGroup,
|
||||
M: MetricEncoding<E, Metadata = ()>,
|
||||
E: Encoding,
|
||||
{
|
||||
fn collect_family_into(
|
||||
&self,
|
||||
name: impl measured::metric::name::MetricNameEncoder,
|
||||
enc: &mut E,
|
||||
) -> Result<(), E::Err> {
|
||||
M::write_type(&name, enc)?;
|
||||
self.metric
|
||||
.collect_into(&(), &*self.label.read().unwrap(), name, enc)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BuildInfo {
|
||||
pub revision: &'static str,
|
||||
pub build_tag: &'static str,
|
||||
}
|
||||
|
||||
// todo: allow label group without the set
|
||||
impl LabelGroup for BuildInfo {
|
||||
fn visit_values(&self, v: &mut impl LabelGroupVisitor) {
|
||||
const REVISION: &LabelName = LabelName::from_str("revision");
|
||||
@@ -131,24 +173,6 @@ impl LabelGroup for BuildInfo {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Encoding> MetricFamilyEncoding<T> for BuildInfo
|
||||
where
|
||||
GaugeState: MetricEncoding<T>,
|
||||
{
|
||||
fn collect_family_into(
|
||||
&self,
|
||||
name: impl measured::metric::name::MetricNameEncoder,
|
||||
enc: &mut T,
|
||||
) -> Result<(), T::Err> {
|
||||
enc.write_help(&name, "Build/version information")?;
|
||||
GaugeState::write_type(&name, enc)?;
|
||||
GaugeState {
|
||||
count: std::sync::atomic::AtomicI64::new(1),
|
||||
}
|
||||
.collect_into(&(), self, name, enc)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(MetricGroup)]
|
||||
#[metric(new(build_info: BuildInfo))]
|
||||
pub struct NeonMetrics {
|
||||
@@ -165,8 +189,8 @@ pub struct NeonMetrics {
|
||||
#[derive(MetricGroup)]
|
||||
#[metric(new(build_info: BuildInfo))]
|
||||
pub struct LibMetrics {
|
||||
#[metric(init = build_info)]
|
||||
build_info: BuildInfo,
|
||||
#[metric(init = InfoMetric::new(build_info))]
|
||||
build_info: InfoMetric<BuildInfo>,
|
||||
|
||||
#[metric(flatten)]
|
||||
rusage: Rusage,
|
||||
|
||||
@@ -6,29 +6,15 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
thiserror.workspace = true
|
||||
nix.workspace = true
|
||||
nix.workspace=true
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
rustc-hash = { version = "2.1.1" }
|
||||
rand = "0.9.1"
|
||||
libc.workspace = true
|
||||
lock_api = "0.4.13"
|
||||
atomic = "0.6.1"
|
||||
bytemuck = { version = "1.23.1", features = ["derive"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { workspace = true, features = ["html_reports"] }
|
||||
rand_distr = "0.5.1"
|
||||
xxhash-rust = { version = "0.8.15", features = ["xxh3"] }
|
||||
ahash.workspace = true
|
||||
twox-hash = { version = "2.1.1" }
|
||||
seahash = "4.1.0"
|
||||
hashbrown = { git = "https://github.com/quantumish/hashbrown.git", rev = "6610e6d" }
|
||||
foldhash = "0.1.5"
|
||||
|
||||
lock_api.workspace = true
|
||||
rustc-hash.workspace = true
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
tempfile = "3.14.0"
|
||||
|
||||
[[bench]]
|
||||
name = "hmap_resize"
|
||||
harness = false
|
||||
[dev-dependencies]
|
||||
rand = "0.9"
|
||||
rand_distr = "0.5.1"
|
||||
|
||||
@@ -1,330 +0,0 @@
|
||||
use criterion::{BatchSize, BenchmarkId, Criterion, criterion_group, criterion_main};
|
||||
use neon_shmem::hash::HashMapAccess;
|
||||
use neon_shmem::hash::HashMapInit;
|
||||
use neon_shmem::hash::entry::Entry;
|
||||
use rand::distr::{Distribution, StandardUniform};
|
||||
use rand::prelude::*;
|
||||
use std::default::Default;
|
||||
use std::hash::BuildHasher;
|
||||
|
||||
// Taken from bindings to C code
|
||||
|
||||
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
|
||||
#[repr(C)]
|
||||
pub struct FileCacheKey {
|
||||
pub _spc_id: u32,
|
||||
pub _db_id: u32,
|
||||
pub _rel_number: u32,
|
||||
pub _fork_num: u32,
|
||||
pub _block_num: u32,
|
||||
}
|
||||
|
||||
impl Distribution<FileCacheKey> for StandardUniform {
|
||||
// questionable, but doesn't need to be good randomness
|
||||
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> FileCacheKey {
|
||||
FileCacheKey {
|
||||
_spc_id: rng.random(),
|
||||
_db_id: rng.random(),
|
||||
_rel_number: rng.random(),
|
||||
_fork_num: rng.random(),
|
||||
_block_num: rng.random(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
#[repr(C)]
|
||||
pub struct FileCacheEntry {
|
||||
pub _offset: u32,
|
||||
pub _access_count: u32,
|
||||
pub _prev: *mut FileCacheEntry,
|
||||
pub _next: *mut FileCacheEntry,
|
||||
pub _state: [u32; 8],
|
||||
}
|
||||
|
||||
impl FileCacheEntry {
|
||||
fn dummy() -> Self {
|
||||
Self {
|
||||
_offset: 0,
|
||||
_access_count: 0,
|
||||
_prev: std::ptr::null_mut(),
|
||||
_next: std::ptr::null_mut(),
|
||||
_state: [0; 8],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Utilities for applying operations.
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct TestOp<K, V>(K, Option<V>);
|
||||
|
||||
fn apply_op<K: Clone + std::hash::Hash + Eq, V, S: std::hash::BuildHasher>(
|
||||
op: TestOp<K, V>,
|
||||
map: &mut HashMapAccess<K, V, S>,
|
||||
) {
|
||||
let entry = map.entry(op.0);
|
||||
|
||||
match op.1 {
|
||||
Some(new) => match entry {
|
||||
Entry::Occupied(mut e) => Some(e.insert(new)),
|
||||
Entry::Vacant(e) => {
|
||||
_ = e.insert(new).unwrap();
|
||||
None
|
||||
}
|
||||
},
|
||||
None => match entry {
|
||||
Entry::Occupied(e) => Some(e.remove()),
|
||||
Entry::Vacant(_) => None,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Hash utilities
|
||||
|
||||
struct SeaRandomState {
|
||||
k1: u64,
|
||||
k2: u64,
|
||||
k3: u64,
|
||||
k4: u64,
|
||||
}
|
||||
|
||||
impl std::hash::BuildHasher for SeaRandomState {
|
||||
type Hasher = seahash::SeaHasher;
|
||||
|
||||
fn build_hasher(&self) -> Self::Hasher {
|
||||
seahash::SeaHasher::with_seeds(self.k1, self.k2, self.k3, self.k4)
|
||||
}
|
||||
}
|
||||
|
||||
impl SeaRandomState {
|
||||
fn new() -> Self {
|
||||
let mut rng = rand::rng();
|
||||
Self {
|
||||
k1: rng.random(),
|
||||
k2: rng.random(),
|
||||
k3: rng.random(),
|
||||
k4: rng.random(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn small_benchs(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("Small maps");
|
||||
group.sample_size(10);
|
||||
|
||||
group.bench_function("small_rehash", |b| {
|
||||
let ideal_filled = 4_000_000;
|
||||
let size = 5_000_000;
|
||||
let mut writer = HashMapInit::new_resizeable(size, size * 2).attach_writer();
|
||||
let mut rng = rand::rng();
|
||||
while writer.get_num_buckets_in_use() < ideal_filled as usize {
|
||||
let key: FileCacheKey = rng.random();
|
||||
let val = FileCacheEntry::dummy();
|
||||
apply_op(TestOp(key, Some(val)), &mut writer);
|
||||
}
|
||||
b.iter(|| writer.shuffle());
|
||||
});
|
||||
|
||||
group.bench_function("small_rehash_xxhash", |b| {
|
||||
let ideal_filled = 4_000_000;
|
||||
let size = 5_000_000;
|
||||
let mut writer = HashMapInit::new_resizeable(size, size * 2)
|
||||
.with_hasher(twox_hash::xxhash64::RandomState::default())
|
||||
.attach_writer();
|
||||
let mut rng = rand::rng();
|
||||
while writer.get_num_buckets_in_use() < ideal_filled as usize {
|
||||
let key: FileCacheKey = rng.random();
|
||||
let val = FileCacheEntry::dummy();
|
||||
apply_op(TestOp(key, Some(val)), &mut writer);
|
||||
}
|
||||
b.iter(|| writer.shuffle());
|
||||
});
|
||||
|
||||
group.bench_function("small_rehash_ahash", |b| {
|
||||
let ideal_filled = 4_000_000;
|
||||
let size = 5_000_000;
|
||||
let mut writer = HashMapInit::new_resizeable(size, size * 2)
|
||||
.with_hasher(ahash::RandomState::default())
|
||||
.attach_writer();
|
||||
let mut rng = rand::rng();
|
||||
while writer.get_num_buckets_in_use() < ideal_filled as usize {
|
||||
let key: FileCacheKey = rng.random();
|
||||
let val = FileCacheEntry::dummy();
|
||||
apply_op(TestOp(key, Some(val)), &mut writer);
|
||||
}
|
||||
b.iter(|| writer.shuffle());
|
||||
});
|
||||
|
||||
group.bench_function("small_rehash_seahash", |b| {
|
||||
let ideal_filled = 4_000_000;
|
||||
let size = 5_000_000;
|
||||
let mut writer = HashMapInit::new_resizeable(size, size * 2)
|
||||
.with_hasher(SeaRandomState::new())
|
||||
.attach_writer();
|
||||
let mut rng = rand::rng();
|
||||
while writer.get_num_buckets_in_use() < ideal_filled as usize {
|
||||
let key: FileCacheKey = rng.random();
|
||||
let val = FileCacheEntry::dummy();
|
||||
apply_op(TestOp(key, Some(val)), &mut writer);
|
||||
}
|
||||
b.iter(|| writer.shuffle());
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn real_benchs(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("Realistic workloads");
|
||||
group.sample_size(10);
|
||||
group.bench_function("real_bulk_insert", |b| {
|
||||
let size = 125_000_000;
|
||||
let ideal_filled = 100_000_000;
|
||||
let mut rng = rand::rng();
|
||||
b.iter_batched(
|
||||
|| HashMapInit::new_resizeable(size, size * 2).attach_writer(),
|
||||
|writer| {
|
||||
for _ in 0..ideal_filled {
|
||||
let key: FileCacheKey = rng.random();
|
||||
let val = FileCacheEntry::dummy();
|
||||
let entry = writer.entry(key);
|
||||
std::hint::black_box(match entry {
|
||||
Entry::Occupied(mut e) => {
|
||||
e.insert(val);
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
_ = e.insert(val).unwrap();
|
||||
}
|
||||
})
|
||||
}
|
||||
},
|
||||
BatchSize::SmallInput,
|
||||
)
|
||||
});
|
||||
|
||||
group.bench_function("real_rehash", |b| {
|
||||
let size = 125_000_000;
|
||||
let ideal_filled = 100_000_000;
|
||||
let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
|
||||
let mut rng = rand::rng();
|
||||
while writer.get_num_buckets_in_use() < ideal_filled {
|
||||
let key: FileCacheKey = rng.random();
|
||||
let val = FileCacheEntry::dummy();
|
||||
apply_op(TestOp(key, Some(val)), &mut writer);
|
||||
}
|
||||
b.iter(|| writer.shuffle());
|
||||
});
|
||||
|
||||
group.bench_function("real_rehash_hashbrown", |b| {
|
||||
let size = 125_000_000;
|
||||
let ideal_filled = 100_000_000;
|
||||
let mut writer = hashbrown::raw::RawTable::new();
|
||||
let mut rng = rand::rng();
|
||||
let hasher = rustc_hash::FxBuildHasher::default();
|
||||
unsafe {
|
||||
writer
|
||||
.resize(
|
||||
size,
|
||||
|(k, _)| hasher.hash_one(&k),
|
||||
hashbrown::raw::Fallibility::Infallible,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
while writer.len() < ideal_filled as usize {
|
||||
let key: FileCacheKey = rng.random();
|
||||
let val = FileCacheEntry::dummy();
|
||||
writer.insert(hasher.hash_one(&key), (key, val), |(k, _)| {
|
||||
hasher.hash_one(&k)
|
||||
});
|
||||
}
|
||||
b.iter(|| unsafe {
|
||||
writer.table.rehash_in_place(
|
||||
&|table, index| {
|
||||
hasher.hash_one(
|
||||
&table
|
||||
.bucket::<(FileCacheKey, FileCacheEntry)>(index)
|
||||
.as_ref()
|
||||
.0,
|
||||
)
|
||||
},
|
||||
std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
|
||||
if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
|
||||
Some(|ptr| std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry)))
|
||||
} else {
|
||||
None
|
||||
},
|
||||
)
|
||||
});
|
||||
});
|
||||
|
||||
for elems in [2, 4, 8, 16, 32, 64, 96, 112] {
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("real_rehash_varied", elems),
|
||||
&elems,
|
||||
|b, &size| {
|
||||
let ideal_filled = size * 1_000_000;
|
||||
let size = 125_000_000;
|
||||
let mut writer = HashMapInit::new_resizeable(size, size).attach_writer();
|
||||
let mut rng = rand::rng();
|
||||
while writer.get_num_buckets_in_use() < ideal_filled as usize {
|
||||
let key: FileCacheKey = rng.random();
|
||||
let val = FileCacheEntry::dummy();
|
||||
apply_op(TestOp(key, Some(val)), &mut writer);
|
||||
}
|
||||
b.iter(|| writer.shuffle());
|
||||
},
|
||||
);
|
||||
group.bench_with_input(
|
||||
BenchmarkId::new("real_rehash_varied_hashbrown", elems),
|
||||
&elems,
|
||||
|b, &size| {
|
||||
let ideal_filled = size * 1_000_000;
|
||||
let size = 125_000_000;
|
||||
let mut writer = hashbrown::raw::RawTable::new();
|
||||
let mut rng = rand::rng();
|
||||
let hasher = rustc_hash::FxBuildHasher::default();
|
||||
unsafe {
|
||||
writer
|
||||
.resize(
|
||||
size,
|
||||
|(k, _)| hasher.hash_one(&k),
|
||||
hashbrown::raw::Fallibility::Infallible,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
while writer.len() < ideal_filled as usize {
|
||||
let key: FileCacheKey = rng.random();
|
||||
let val = FileCacheEntry::dummy();
|
||||
writer.insert(hasher.hash_one(&key), (key, val), |(k, _)| {
|
||||
hasher.hash_one(&k)
|
||||
});
|
||||
}
|
||||
b.iter(|| unsafe {
|
||||
writer.table.rehash_in_place(
|
||||
&|table, index| {
|
||||
hasher.hash_one(
|
||||
&table
|
||||
.bucket::<(FileCacheKey, FileCacheEntry)>(index)
|
||||
.as_ref()
|
||||
.0,
|
||||
)
|
||||
},
|
||||
std::mem::size_of::<(FileCacheKey, FileCacheEntry)>(),
|
||||
if std::mem::needs_drop::<(FileCacheKey, FileCacheEntry)>() {
|
||||
Some(|ptr| {
|
||||
std::ptr::drop_in_place(ptr as *mut (FileCacheKey, FileCacheEntry))
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
)
|
||||
});
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, small_benchs, real_benchs);
|
||||
criterion_main!(benches);
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user