mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
Merge branch 'communicator-rewrite' of https://github.com/neondatabase/neon into communicator-rewrite
This commit is contained in:
@@ -33,6 +33,7 @@ workspace-members = [
|
|||||||
"compute_api",
|
"compute_api",
|
||||||
"consumption_metrics",
|
"consumption_metrics",
|
||||||
"desim",
|
"desim",
|
||||||
|
"json",
|
||||||
"metrics",
|
"metrics",
|
||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
"postgres_backend",
|
"postgres_backend",
|
||||||
|
|||||||
@@ -27,4 +27,4 @@
|
|||||||
!storage_controller/
|
!storage_controller/
|
||||||
!vendor/postgres-*/
|
!vendor/postgres-*/
|
||||||
!workspace_hack/
|
!workspace_hack/
|
||||||
!build_tools/patches
|
!build-tools/patches
|
||||||
|
|||||||
2
.github/actionlint.yml
vendored
2
.github/actionlint.yml
vendored
@@ -7,6 +7,7 @@ self-hosted-runner:
|
|||||||
- small-metal
|
- small-metal
|
||||||
- small-arm64
|
- small-arm64
|
||||||
- unit-perf
|
- unit-perf
|
||||||
|
- unit-perf-aws-arm
|
||||||
- us-east-2
|
- us-east-2
|
||||||
config-variables:
|
config-variables:
|
||||||
- AWS_ECR_REGION
|
- AWS_ECR_REGION
|
||||||
@@ -30,6 +31,7 @@ config-variables:
|
|||||||
- NEON_PROD_AWS_ACCOUNT_ID
|
- NEON_PROD_AWS_ACCOUNT_ID
|
||||||
- PGREGRESS_PG16_PROJECT_ID
|
- PGREGRESS_PG16_PROJECT_ID
|
||||||
- PGREGRESS_PG17_PROJECT_ID
|
- PGREGRESS_PG17_PROJECT_ID
|
||||||
|
- PREWARM_PGBENCH_SIZE
|
||||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||||
- REMOTE_STORAGE_AZURE_REGION
|
- REMOTE_STORAGE_AZURE_REGION
|
||||||
- SLACK_CICD_CHANNEL_ID
|
- SLACK_CICD_CHANNEL_ID
|
||||||
|
|||||||
@@ -176,7 +176,13 @@ runs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then
|
if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then
|
||||||
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
# We don't use code coverage for regression tests (the step is disabled),
|
||||||
|
# so there's no need to collect it.
|
||||||
|
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||||
|
# cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
|
||||||
|
cov_prefix=()
|
||||||
|
# Explicitly set LLVM_PROFILE_FILE to /dev/null to avoid writing *.profraw files
|
||||||
|
export LLVM_PROFILE_FILE=/dev/null
|
||||||
else
|
else
|
||||||
cov_prefix=()
|
cov_prefix=()
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -150,7 +150,7 @@ jobs:
|
|||||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||||
use-fallback: false
|
use-fallback: false
|
||||||
path: pg_install/v14
|
path: pg_install/v14
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||||
|
|
||||||
- name: Cache postgres v15 build
|
- name: Cache postgres v15 build
|
||||||
id: cache_pg_15
|
id: cache_pg_15
|
||||||
@@ -162,7 +162,7 @@ jobs:
|
|||||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||||
use-fallback: false
|
use-fallback: false
|
||||||
path: pg_install/v15
|
path: pg_install/v15
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||||
|
|
||||||
- name: Cache postgres v16 build
|
- name: Cache postgres v16 build
|
||||||
id: cache_pg_16
|
id: cache_pg_16
|
||||||
@@ -174,7 +174,7 @@ jobs:
|
|||||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||||
use-fallback: false
|
use-fallback: false
|
||||||
path: pg_install/v16
|
path: pg_install/v16
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||||
|
|
||||||
- name: Cache postgres v17 build
|
- name: Cache postgres v17 build
|
||||||
id: cache_pg_17
|
id: cache_pg_17
|
||||||
@@ -186,7 +186,7 @@ jobs:
|
|||||||
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
|
||||||
use-fallback: false
|
use-fallback: false
|
||||||
path: pg_install/v17
|
path: pg_install/v17
|
||||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}
|
||||||
|
|
||||||
- name: Build all
|
- name: Build all
|
||||||
# Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables
|
# Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables
|
||||||
|
|||||||
72
.github/workflows/benchmarking.yml
vendored
72
.github/workflows/benchmarking.yml
vendored
@@ -219,6 +219,7 @@ jobs:
|
|||||||
--ignore test_runner/performance/test_cumulative_statistics_persistence.py
|
--ignore test_runner/performance/test_cumulative_statistics_persistence.py
|
||||||
--ignore test_runner/performance/test_perf_many_relations.py
|
--ignore test_runner/performance/test_perf_many_relations.py
|
||||||
--ignore test_runner/performance/test_perf_oltp_large_tenant.py
|
--ignore test_runner/performance/test_perf_oltp_large_tenant.py
|
||||||
|
--ignore test_runner/performance/test_lfc_prewarm.py
|
||||||
env:
|
env:
|
||||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||||
@@ -410,6 +411,77 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||||
|
|
||||||
|
prewarm-test:
|
||||||
|
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
statuses: write
|
||||||
|
id-token: write # aws-actions/configure-aws-credentials
|
||||||
|
env:
|
||||||
|
PGBENCH_SIZE: ${{ vars.PREWARM_PGBENCH_SIZE }}
|
||||||
|
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||||
|
DEFAULT_PG_VERSION: 17
|
||||||
|
TEST_OUTPUT: /tmp/test_output
|
||||||
|
BUILD_TYPE: remote
|
||||||
|
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||||
|
PLATFORM: "neon-staging"
|
||||||
|
|
||||||
|
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||||
|
container:
|
||||||
|
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||||
|
credentials:
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
options: --init
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Harden the runner (Audit all outbound calls)
|
||||||
|
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||||
|
with:
|
||||||
|
egress-policy: audit
|
||||||
|
|
||||||
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- name: Configure AWS credentials
|
||||||
|
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
|
||||||
|
with:
|
||||||
|
aws-region: eu-central-1
|
||||||
|
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||||
|
role-duration-seconds: 18000 # 5 hours
|
||||||
|
|
||||||
|
- name: Download Neon artifact
|
||||||
|
uses: ./.github/actions/download
|
||||||
|
with:
|
||||||
|
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||||
|
path: /tmp/neon/
|
||||||
|
prefix: latest
|
||||||
|
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||||
|
|
||||||
|
- name: Run prewarm benchmark
|
||||||
|
uses: ./.github/actions/run-python-test-set
|
||||||
|
with:
|
||||||
|
build_type: ${{ env.BUILD_TYPE }}
|
||||||
|
test_selection: performance/test_lfc_prewarm.py
|
||||||
|
run_in_parallel: false
|
||||||
|
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||||
|
extra_params: -m remote_cluster --timeout 5400
|
||||||
|
pg_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||||
|
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||||
|
env:
|
||||||
|
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||||
|
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||||
|
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||||
|
|
||||||
|
- name: Create Allure report
|
||||||
|
id: create-allure-report
|
||||||
|
if: ${{ !cancelled() }}
|
||||||
|
uses: ./.github/actions/allure-report-generate
|
||||||
|
with:
|
||||||
|
store-test-results-into-db: true
|
||||||
|
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||||
|
env:
|
||||||
|
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||||
|
|
||||||
generate-matrices:
|
generate-matrices:
|
||||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||||
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
|
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
|
||||||
|
|||||||
@@ -72,7 +72,7 @@ jobs:
|
|||||||
ARCHS: ${{ inputs.archs || '["x64","arm64"]' }}
|
ARCHS: ${{ inputs.archs || '["x64","arm64"]' }}
|
||||||
DEBIANS: ${{ inputs.debians || '["bullseye","bookworm"]' }}
|
DEBIANS: ${{ inputs.debians || '["bullseye","bookworm"]' }}
|
||||||
IMAGE_TAG: |
|
IMAGE_TAG: |
|
||||||
${{ hashFiles('build-tools.Dockerfile',
|
${{ hashFiles('build-tools/Dockerfile',
|
||||||
'.github/workflows/build-build-tools-image.yml') }}
|
'.github/workflows/build-build-tools-image.yml') }}
|
||||||
run: |
|
run: |
|
||||||
echo "archs=${ARCHS}" | tee -a ${GITHUB_OUTPUT}
|
echo "archs=${ARCHS}" | tee -a ${GITHUB_OUTPUT}
|
||||||
@@ -144,7 +144,7 @@ jobs:
|
|||||||
|
|
||||||
- uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
|
- uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
|
||||||
with:
|
with:
|
||||||
file: build-tools.Dockerfile
|
file: build-tools/Dockerfile
|
||||||
context: .
|
context: .
|
||||||
provenance: false
|
provenance: false
|
||||||
push: true
|
push: true
|
||||||
|
|||||||
50
.github/workflows/build_and_test.yml
vendored
50
.github/workflows/build_and_test.yml
vendored
@@ -87,6 +87,29 @@ jobs:
|
|||||||
uses: ./.github/workflows/build-build-tools-image.yml
|
uses: ./.github/workflows/build-build-tools-image.yml
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
lint-yamls:
|
||||||
|
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||||
|
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||||
|
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||||
|
runs-on: [ self-hosted, small ]
|
||||||
|
container:
|
||||||
|
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||||
|
credentials:
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
options: --init
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Harden the runner (Audit all outbound calls)
|
||||||
|
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||||
|
with:
|
||||||
|
egress-policy: audit
|
||||||
|
|
||||||
|
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||||
|
|
||||||
|
- run: make -C compute manifest-schema-validation
|
||||||
|
- run: make lint-openapi-spec
|
||||||
|
|
||||||
check-codestyle-python:
|
check-codestyle-python:
|
||||||
needs: [ meta, check-permissions, build-build-tools-image ]
|
needs: [ meta, check-permissions, build-build-tools-image ]
|
||||||
# No need to run on `main` because we this in the merge queue. We do need to run this in `.*-rc-pr` because of hotfixes.
|
# No need to run on `main` because we this in the merge queue. We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||||
@@ -199,28 +222,6 @@ jobs:
|
|||||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
validate-compute-manifest:
|
|
||||||
runs-on: ubuntu-22.04
|
|
||||||
needs: [ meta, check-permissions ]
|
|
||||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
|
||||||
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
|
||||||
steps:
|
|
||||||
- name: Harden the runner (Audit all outbound calls)
|
|
||||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
|
||||||
with:
|
|
||||||
egress-policy: audit
|
|
||||||
|
|
||||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
|
||||||
|
|
||||||
- name: Set up Node.js
|
|
||||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
|
||||||
with:
|
|
||||||
node-version: '24'
|
|
||||||
|
|
||||||
- name: Validate manifest against schema
|
|
||||||
run: |
|
|
||||||
make -C compute manifest-schema-validation
|
|
||||||
|
|
||||||
build-and-test-locally:
|
build-and-test-locally:
|
||||||
needs: [ meta, build-build-tools-image ]
|
needs: [ meta, build-build-tools-image ]
|
||||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||||
@@ -306,14 +307,14 @@ jobs:
|
|||||||
statuses: write
|
statuses: write
|
||||||
contents: write
|
contents: write
|
||||||
pull-requests: write
|
pull-requests: write
|
||||||
runs-on: [ self-hosted, unit-perf ]
|
runs-on: [ self-hosted, unit-perf-aws-arm ]
|
||||||
container:
|
container:
|
||||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||||
credentials:
|
credentials:
|
||||||
username: ${{ github.actor }}
|
username: ${{ github.actor }}
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
# for changed limits, see comments on `options:` earlier in this file
|
# for changed limits, see comments on `options:` earlier in this file
|
||||||
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
|
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 --ulimit nofile=65536:65536 --security-opt seccomp=unconfined
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
@@ -986,6 +987,7 @@ jobs:
|
|||||||
- name: Verify docker-compose example and test extensions
|
- name: Verify docker-compose example and test extensions
|
||||||
timeout-minutes: 60
|
timeout-minutes: 60
|
||||||
env:
|
env:
|
||||||
|
PARALLEL_COMPUTES: 3
|
||||||
TAG: >-
|
TAG: >-
|
||||||
${{
|
${{
|
||||||
needs.meta.outputs.run-kind == 'compute-rc-pr'
|
needs.meta.outputs.run-kind == 'compute-rc-pr'
|
||||||
|
|||||||
4
.github/workflows/periodic_pagebench.yml
vendored
4
.github/workflows/periodic_pagebench.yml
vendored
@@ -1,4 +1,4 @@
|
|||||||
name: Periodic pagebench performance test on unit-perf hetzner runner
|
name: Periodic pagebench performance test on unit-perf-aws-arm runners
|
||||||
|
|
||||||
on:
|
on:
|
||||||
schedule:
|
schedule:
|
||||||
@@ -40,7 +40,7 @@ jobs:
|
|||||||
statuses: write
|
statuses: write
|
||||||
contents: write
|
contents: write
|
||||||
pull-requests: write
|
pull-requests: write
|
||||||
runs-on: [ self-hosted, unit-perf ]
|
runs-on: [ self-hosted, unit-perf-aws-arm ]
|
||||||
container:
|
container:
|
||||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||||
credentials:
|
credentials:
|
||||||
|
|||||||
4
.github/workflows/proxy-benchmark.yml
vendored
4
.github/workflows/proxy-benchmark.yml
vendored
@@ -1,4 +1,4 @@
|
|||||||
name: Periodic proxy performance test on unit-perf hetzner runner
|
name: Periodic proxy performance test on unit-perf-aws-arm runners
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push: # TODO: remove after testing
|
push: # TODO: remove after testing
|
||||||
@@ -32,7 +32,7 @@ jobs:
|
|||||||
statuses: write
|
statuses: write
|
||||||
contents: write
|
contents: write
|
||||||
pull-requests: write
|
pull-requests: write
|
||||||
runs-on: [self-hosted, unit-perf]
|
runs-on: [self-hosted, unit-perf-aws-arm]
|
||||||
timeout-minutes: 60 # 1h timeout
|
timeout-minutes: 60 # 1h timeout
|
||||||
container:
|
container:
|
||||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||||
|
|||||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -16,6 +16,7 @@ neon.iml
|
|||||||
/integration_tests/.neon
|
/integration_tests/.neon
|
||||||
compaction-suite-results.*
|
compaction-suite-results.*
|
||||||
pgxn/neon/communicator/communicator_bindings.h
|
pgxn/neon/communicator/communicator_bindings.h
|
||||||
|
docker-compose/docker-compose-parallel.yml
|
||||||
|
|
||||||
# Coverage
|
# Coverage
|
||||||
*.profraw
|
*.profraw
|
||||||
@@ -29,3 +30,6 @@ pgxn/neon/communicator/communicator_bindings.h
|
|||||||
|
|
||||||
# pgindent typedef lists
|
# pgindent typedef lists
|
||||||
*.list
|
*.list
|
||||||
|
|
||||||
|
# Node
|
||||||
|
**/node_modules/
|
||||||
|
|||||||
8
.gitmodules
vendored
8
.gitmodules
vendored
@@ -1,16 +1,16 @@
|
|||||||
[submodule "vendor/postgres-v14"]
|
[submodule "vendor/postgres-v14"]
|
||||||
path = vendor/postgres-v14
|
path = vendor/postgres-v14
|
||||||
url = https://github.com/neondatabase/postgres.git
|
url = ../postgres.git
|
||||||
branch = REL_14_STABLE_neon
|
branch = REL_14_STABLE_neon
|
||||||
[submodule "vendor/postgres-v15"]
|
[submodule "vendor/postgres-v15"]
|
||||||
path = vendor/postgres-v15
|
path = vendor/postgres-v15
|
||||||
url = https://github.com/neondatabase/postgres.git
|
url = ../postgres.git
|
||||||
branch = REL_15_STABLE_neon
|
branch = REL_15_STABLE_neon
|
||||||
[submodule "vendor/postgres-v16"]
|
[submodule "vendor/postgres-v16"]
|
||||||
path = vendor/postgres-v16
|
path = vendor/postgres-v16
|
||||||
url = https://github.com/neondatabase/postgres.git
|
url = ../postgres.git
|
||||||
branch = REL_16_STABLE_neon
|
branch = REL_16_STABLE_neon
|
||||||
[submodule "vendor/postgres-v17"]
|
[submodule "vendor/postgres-v17"]
|
||||||
path = vendor/postgres-v17
|
path = vendor/postgres-v17
|
||||||
url = https://github.com/neondatabase/postgres.git
|
url = ../postgres.git
|
||||||
branch = REL_17_STABLE_neon
|
branch = REL_17_STABLE_neon
|
||||||
|
|||||||
54
Cargo.lock
generated
54
Cargo.lock
generated
@@ -1427,6 +1427,7 @@ dependencies = [
|
|||||||
"p256 0.13.2",
|
"p256 0.13.2",
|
||||||
"pageserver_page_api",
|
"pageserver_page_api",
|
||||||
"postgres",
|
"postgres",
|
||||||
|
"postgres-types",
|
||||||
"postgres_initdb",
|
"postgres_initdb",
|
||||||
"postgres_versioninfo",
|
"postgres_versioninfo",
|
||||||
"regex",
|
"regex",
|
||||||
@@ -1950,6 +1951,7 @@ dependencies = [
|
|||||||
"diesel_derives",
|
"diesel_derives",
|
||||||
"itoa",
|
"itoa",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"uuid",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3581,6 +3583,15 @@ dependencies = [
|
|||||||
"wasm-bindgen",
|
"wasm-bindgen",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "json"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"futures",
|
||||||
|
"itoa",
|
||||||
|
"ryu",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-structural-diff"
|
name = "json-structural-diff"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
@@ -4403,6 +4414,7 @@ dependencies = [
|
|||||||
"pageserver_client",
|
"pageserver_client",
|
||||||
"pageserver_client_grpc",
|
"pageserver_client_grpc",
|
||||||
"pageserver_page_api",
|
"pageserver_page_api",
|
||||||
|
"pprof",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -4431,6 +4443,7 @@ dependencies = [
|
|||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
"postgres_ffi",
|
"postgres_ffi",
|
||||||
"remote_storage",
|
"remote_storage",
|
||||||
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"svg_fmt",
|
"svg_fmt",
|
||||||
"thiserror 1.0.69",
|
"thiserror 1.0.69",
|
||||||
@@ -4448,6 +4461,7 @@ dependencies = [
|
|||||||
"arc-swap",
|
"arc-swap",
|
||||||
"async-compression",
|
"async-compression",
|
||||||
"async-stream",
|
"async-stream",
|
||||||
|
"base64 0.22.1",
|
||||||
"bincode",
|
"bincode",
|
||||||
"bit_field",
|
"bit_field",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@@ -4609,30 +4623,18 @@ version = "0.1.0"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"arc-swap",
|
"arc-swap",
|
||||||
"async-trait",
|
|
||||||
"bytes",
|
"bytes",
|
||||||
"chrono",
|
|
||||||
"compute_api",
|
"compute_api",
|
||||||
"dashmap 5.5.0",
|
|
||||||
"futures",
|
"futures",
|
||||||
"http 1.1.0",
|
|
||||||
"hyper 1.6.0",
|
|
||||||
"hyper-util",
|
|
||||||
"metrics",
|
|
||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
"pageserver_page_api",
|
"pageserver_page_api",
|
||||||
"priority-queue",
|
|
||||||
"rand 0.8.5",
|
|
||||||
"scopeguard",
|
|
||||||
"thiserror 1.0.69",
|
|
||||||
"tokio",
|
"tokio",
|
||||||
"tokio-stream",
|
"tokio-stream",
|
||||||
"tokio-util",
|
"tokio-util",
|
||||||
"tonic 0.13.1",
|
"tonic 0.13.1",
|
||||||
"tower 0.4.13",
|
|
||||||
"tracing",
|
"tracing",
|
||||||
"utils",
|
"utils",
|
||||||
"uuid",
|
"workspace_hack",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -5248,17 +5250,6 @@ dependencies = [
|
|||||||
"elliptic-curve 0.13.8",
|
"elliptic-curve 0.13.8",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "priority-queue"
|
|
||||||
version = "2.5.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "5676d703dda103cbb035b653a9f11448c0a7216c7926bd35fcb5865475d0c970"
|
|
||||||
dependencies = [
|
|
||||||
"autocfg",
|
|
||||||
"equivalent",
|
|
||||||
"indexmap 2.9.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.94"
|
version = "1.0.94"
|
||||||
@@ -5428,6 +5419,7 @@ dependencies = [
|
|||||||
"async-trait",
|
"async-trait",
|
||||||
"atomic-take",
|
"atomic-take",
|
||||||
"aws-config",
|
"aws-config",
|
||||||
|
"aws-credential-types",
|
||||||
"aws-sdk-iam",
|
"aws-sdk-iam",
|
||||||
"aws-sigv4",
|
"aws-sigv4",
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
@@ -5467,6 +5459,7 @@ dependencies = [
|
|||||||
"itoa",
|
"itoa",
|
||||||
"jose-jwa",
|
"jose-jwa",
|
||||||
"jose-jwk",
|
"jose-jwk",
|
||||||
|
"json",
|
||||||
"lasso",
|
"lasso",
|
||||||
"measured",
|
"measured",
|
||||||
"metrics",
|
"metrics",
|
||||||
@@ -5892,6 +5885,8 @@ dependencies = [
|
|||||||
"azure_identity",
|
"azure_identity",
|
||||||
"azure_storage",
|
"azure_storage",
|
||||||
"azure_storage_blobs",
|
"azure_storage_blobs",
|
||||||
|
"base64 0.22.1",
|
||||||
|
"byteorder",
|
||||||
"bytes",
|
"bytes",
|
||||||
"camino",
|
"camino",
|
||||||
"camino-tempfile",
|
"camino-tempfile",
|
||||||
@@ -6383,6 +6378,7 @@ dependencies = [
|
|||||||
"itertools 0.10.5",
|
"itertools 0.10.5",
|
||||||
"jsonwebtoken",
|
"jsonwebtoken",
|
||||||
"metrics",
|
"metrics",
|
||||||
|
"nix 0.30.1",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
"parking_lot 0.12.1",
|
"parking_lot 0.12.1",
|
||||||
@@ -6390,6 +6386,7 @@ dependencies = [
|
|||||||
"postgres-protocol",
|
"postgres-protocol",
|
||||||
"postgres_backend",
|
"postgres_backend",
|
||||||
"postgres_ffi",
|
"postgres_ffi",
|
||||||
|
"postgres_ffi_types",
|
||||||
"postgres_versioninfo",
|
"postgres_versioninfo",
|
||||||
"pprof",
|
"pprof",
|
||||||
"pq_proto",
|
"pq_proto",
|
||||||
@@ -6434,7 +6431,7 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"const_format",
|
"const_format",
|
||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
"postgres_ffi",
|
"postgres_ffi_types",
|
||||||
"postgres_versioninfo",
|
"postgres_versioninfo",
|
||||||
"pq_proto",
|
"pq_proto",
|
||||||
"serde",
|
"serde",
|
||||||
@@ -7113,6 +7110,7 @@ dependencies = [
|
|||||||
"tokio-util",
|
"tokio-util",
|
||||||
"tracing",
|
"tracing",
|
||||||
"utils",
|
"utils",
|
||||||
|
"uuid",
|
||||||
"workspace_hack",
|
"workspace_hack",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -7176,6 +7174,7 @@ dependencies = [
|
|||||||
"pageserver_api",
|
"pageserver_api",
|
||||||
"pageserver_client",
|
"pageserver_client",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
|
"safekeeper_api",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"storage_controller_client",
|
"storage_controller_client",
|
||||||
"tokio",
|
"tokio",
|
||||||
@@ -7755,6 +7754,7 @@ dependencies = [
|
|||||||
"futures-core",
|
"futures-core",
|
||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
"tokio-util",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -8408,6 +8408,7 @@ dependencies = [
|
|||||||
"tracing-error",
|
"tracing-error",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
"tracing-utils",
|
"tracing-utils",
|
||||||
|
"uuid",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -8955,8 +8956,10 @@ dependencies = [
|
|||||||
"fail",
|
"fail",
|
||||||
"form_urlencoded",
|
"form_urlencoded",
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
|
"futures-core",
|
||||||
"futures-executor",
|
"futures-executor",
|
||||||
"futures-io",
|
"futures-io",
|
||||||
|
"futures-sink",
|
||||||
"futures-util",
|
"futures-util",
|
||||||
"generic-array",
|
"generic-array",
|
||||||
"getrandom 0.2.11",
|
"getrandom 0.2.11",
|
||||||
@@ -9025,7 +9028,6 @@ dependencies = [
|
|||||||
"tracing-log",
|
"tracing-log",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
"url",
|
"url",
|
||||||
"uuid",
|
|
||||||
"zeroize",
|
"zeroize",
|
||||||
"zstd",
|
"zstd",
|
||||||
"zstd-safe",
|
"zstd-safe",
|
||||||
|
|||||||
@@ -44,6 +44,7 @@ members = [
|
|||||||
"libs/walproposer",
|
"libs/walproposer",
|
||||||
"libs/wal_decoder",
|
"libs/wal_decoder",
|
||||||
"libs/postgres_initdb",
|
"libs/postgres_initdb",
|
||||||
|
"libs/proxy/json",
|
||||||
"libs/proxy/postgres-protocol2",
|
"libs/proxy/postgres-protocol2",
|
||||||
"libs/proxy/postgres-types2",
|
"libs/proxy/postgres-types2",
|
||||||
"libs/proxy/tokio-postgres2",
|
"libs/proxy/tokio-postgres2",
|
||||||
@@ -204,7 +205,7 @@ tokio = { version = "1.43.1", features = ["macros"] }
|
|||||||
tokio-io-timeout = "1.2.0"
|
tokio-io-timeout = "1.2.0"
|
||||||
tokio-postgres-rustls = "0.12.0"
|
tokio-postgres-rustls = "0.12.0"
|
||||||
tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
|
tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
|
||||||
tokio-stream = "0.1"
|
tokio-stream = { version = "0.1", features = ["sync"] }
|
||||||
tokio-tar = "0.3"
|
tokio-tar = "0.3"
|
||||||
tokio-util = { version = "0.7.10", features = ["io", "io-util", "rt"] }
|
tokio-util = { version = "0.7.10", features = ["io", "io-util", "rt"] }
|
||||||
toml = "0.8"
|
toml = "0.8"
|
||||||
|
|||||||
19
Makefile
19
Makefile
@@ -2,7 +2,7 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
|||||||
|
|
||||||
# Where to install Postgres, default is ./pg_install, maybe useful for package
|
# Where to install Postgres, default is ./pg_install, maybe useful for package
|
||||||
# managers.
|
# managers.
|
||||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
|
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install
|
||||||
|
|
||||||
# Supported PostgreSQL versions
|
# Supported PostgreSQL versions
|
||||||
POSTGRES_VERSIONS = v17 v16 v15 v14
|
POSTGRES_VERSIONS = v17 v16 v15 v14
|
||||||
@@ -14,7 +14,7 @@ POSTGRES_VERSIONS = v17 v16 v15 v14
|
|||||||
# it is derived from BUILD_TYPE.
|
# it is derived from BUILD_TYPE.
|
||||||
|
|
||||||
# All intermediate build artifacts are stored here.
|
# All intermediate build artifacts are stored here.
|
||||||
BUILD_DIR := build
|
BUILD_DIR := $(ROOT_PROJECT_DIR)/build
|
||||||
|
|
||||||
ICU_PREFIX_DIR := /usr/local/icu
|
ICU_PREFIX_DIR := /usr/local/icu
|
||||||
|
|
||||||
@@ -212,7 +212,7 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
|||||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
||||||
INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
||||||
-C $(BUILD_DIR)/neon-v17 \
|
-C $(BUILD_DIR)/pgxn-v17/neon \
|
||||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
|
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
|
||||||
|
|
||||||
|
|
||||||
@@ -220,6 +220,19 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
|||||||
setup-pre-commit-hook:
|
setup-pre-commit-hook:
|
||||||
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
|
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
|
||||||
|
|
||||||
|
build-tools/node_modules: build-tools/package.json
|
||||||
|
cd build-tools && $(if $(CI),npm ci,npm install)
|
||||||
|
touch build-tools/node_modules
|
||||||
|
|
||||||
|
.PHONY: lint-openapi-spec
|
||||||
|
lint-openapi-spec: build-tools/node_modules
|
||||||
|
# operation-2xx-response: pageserver timeline delete returns 404 on success
|
||||||
|
find . -iname "openapi_spec.y*ml" -exec\
|
||||||
|
npx --prefix=build-tools/ redocly\
|
||||||
|
--skip-rule=operation-operationId --skip-rule=operation-summary --extends=minimal\
|
||||||
|
--skip-rule=no-server-example.com --skip-rule=operation-2xx-response\
|
||||||
|
lint {} \+
|
||||||
|
|
||||||
# Targets for building PostgreSQL are defined in postgres.mk.
|
# Targets for building PostgreSQL are defined in postgres.mk.
|
||||||
#
|
#
|
||||||
# But if the caller has indicated that PostgreSQL is already
|
# But if the caller has indicated that PostgreSQL is already
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
|
|||||||
echo -e "retry_connrefused=on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
|
echo -e "retry_connrefused=on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
|
||||||
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
|
echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc
|
||||||
|
|
||||||
COPY build_tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
|
COPY build-tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
|
||||||
|
|
||||||
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
|
RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
|
||||||
set -e && \
|
set -e && \
|
||||||
@@ -188,6 +188,12 @@ RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
|
|||||||
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
|
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
|
||||||
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||||
|
|
||||||
|
# Install node
|
||||||
|
ENV NODE_VERSION=24
|
||||||
|
RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \
|
||||||
|
&& apt install -y nodejs \
|
||||||
|
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||||
|
|
||||||
# Install docker
|
# Install docker
|
||||||
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
|
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
|
||||||
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable" > /etc/apt/sources.list.d/docker.list \
|
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable" > /etc/apt/sources.list.d/docker.list \
|
||||||
@@ -311,14 +317,14 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
|||||||
. "$HOME/.cargo/env" && \
|
. "$HOME/.cargo/env" && \
|
||||||
cargo --version && rustup --version && \
|
cargo --version && rustup --version && \
|
||||||
rustup component add llvm-tools rustfmt clippy && \
|
rustup component add llvm-tools rustfmt clippy && \
|
||||||
cargo install rustfilt --version ${RUSTFILT_VERSION} --locked && \
|
cargo install rustfilt --locked --version ${RUSTFILT_VERSION} && \
|
||||||
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} --locked && \
|
cargo install cargo-hakari --locked --version ${CARGO_HAKARI_VERSION} && \
|
||||||
cargo install cargo-deny --version ${CARGO_DENY_VERSION} --locked && \
|
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
|
||||||
cargo install cargo-hack --version ${CARGO_HACK_VERSION} --locked && \
|
cargo install cargo-hack --locked --version ${CARGO_HACK_VERSION} && \
|
||||||
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} --locked && \
|
cargo install cargo-nextest --locked --version ${CARGO_NEXTEST_VERSION} && \
|
||||||
cargo install cargo-chef --version ${CARGO_CHEF_VERSION} --locked && \
|
cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
|
||||||
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} --locked \
|
cargo install diesel_cli --locked --version ${CARGO_DIESEL_CLI_VERSION} \
|
||||||
--features postgres-bundled --no-default-features && \
|
--features postgres-bundled --no-default-features && \
|
||||||
rm -rf /home/nonroot/.cargo/registry && \
|
rm -rf /home/nonroot/.cargo/registry && \
|
||||||
rm -rf /home/nonroot/.cargo/git
|
rm -rf /home/nonroot/.cargo/git
|
||||||
|
|
||||||
3189
build-tools/package-lock.json
generated
Normal file
3189
build-tools/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
8
build-tools/package.json
Normal file
8
build-tools/package.json
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"name": "build-tools",
|
||||||
|
"private": true,
|
||||||
|
"devDependencies": {
|
||||||
|
"@redocly/cli": "1.34.4",
|
||||||
|
"@sourcemeta/jsonschema": "10.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,9 +1,12 @@
|
|||||||
disallowed-methods = [
|
disallowed-methods = [
|
||||||
"tokio::task::block_in_place",
|
"tokio::task::block_in_place",
|
||||||
|
|
||||||
# Allow this for now, to deny it later once we stop using Handle::block_on completely
|
# Allow this for now, to deny it later once we stop using Handle::block_on completely
|
||||||
# "tokio::runtime::Handle::block_on",
|
# "tokio::runtime::Handle::block_on",
|
||||||
# use tokio_epoll_uring_ext instead
|
|
||||||
"tokio_epoll_uring::thread_local_system",
|
# tokio-epoll-uring:
|
||||||
|
# - allow-invalid because the method doesn't exist on macOS
|
||||||
|
{ path = "tokio_epoll_uring::thread_local_system", replacement = "tokio_epoll_uring_ext module inside pageserver crate", allow-invalid = true }
|
||||||
]
|
]
|
||||||
|
|
||||||
disallowed-macros = [
|
disallowed-macros = [
|
||||||
|
|||||||
@@ -50,9 +50,9 @@ jsonnetfmt-format:
|
|||||||
jsonnetfmt --in-place $(jsonnet_files)
|
jsonnetfmt --in-place $(jsonnet_files)
|
||||||
|
|
||||||
.PHONY: manifest-schema-validation
|
.PHONY: manifest-schema-validation
|
||||||
manifest-schema-validation: node_modules
|
manifest-schema-validation: ../build-tools/node_modules
|
||||||
node_modules/.bin/jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
|
npx --prefix=../build-tools/ jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
|
||||||
|
|
||||||
node_modules: package.json
|
../build-tools/node_modules: ../build-tools/package.json
|
||||||
npm install
|
cd ../build-tools && $(if $(CI),npm ci,npm install)
|
||||||
touch node_modules
|
touch ../build-tools/node_modules
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
#
|
#
|
||||||
# build-tools: This contains Rust compiler toolchain and other tools needed at compile
|
# build-tools: This contains Rust compiler toolchain and other tools needed at compile
|
||||||
# time. This is also used for the storage builds. This image is defined in
|
# time. This is also used for the storage builds. This image is defined in
|
||||||
# build-tools.Dockerfile.
|
# build-tools/Dockerfile.
|
||||||
#
|
#
|
||||||
# build-deps: Contains C compiler, other build tools, and compile-time dependencies
|
# build-deps: Contains C compiler, other build tools, and compile-time dependencies
|
||||||
# needed to compile PostgreSQL and most extensions. (Some extensions need
|
# needed to compile PostgreSQL and most extensions. (Some extensions need
|
||||||
@@ -115,7 +115,7 @@ ARG EXTENSIONS=all
|
|||||||
FROM $BASE_IMAGE_SHA AS build-deps
|
FROM $BASE_IMAGE_SHA AS build-deps
|
||||||
ARG DEBIAN_VERSION
|
ARG DEBIAN_VERSION
|
||||||
|
|
||||||
# Keep in sync with build-tools.Dockerfile
|
# Keep in sync with build-tools/Dockerfile
|
||||||
ENV PROTOC_VERSION=25.1
|
ENV PROTOC_VERSION=25.1
|
||||||
|
|
||||||
# Use strict mode for bash to catch errors early
|
# Use strict mode for bash to catch errors early
|
||||||
@@ -170,7 +170,29 @@ RUN case $DEBIAN_VERSION in \
|
|||||||
FROM build-deps AS pg-build
|
FROM build-deps AS pg-build
|
||||||
ARG PG_VERSION
|
ARG PG_VERSION
|
||||||
COPY vendor/postgres-${PG_VERSION:?} postgres
|
COPY vendor/postgres-${PG_VERSION:?} postgres
|
||||||
|
COPY compute/patches/postgres_fdw.patch .
|
||||||
|
COPY compute/patches/pg_stat_statements_pg14-16.patch .
|
||||||
|
COPY compute/patches/pg_stat_statements_pg17.patch .
|
||||||
RUN cd postgres && \
|
RUN cd postgres && \
|
||||||
|
# Apply patches to some contrib extensions
|
||||||
|
# For example, we need to grant EXECUTE on pg_stat_statements_reset() to {privileged_role_name}.
|
||||||
|
# In vanilla Postgres this function is limited to Postgres role superuser.
|
||||||
|
# In Neon we have {privileged_role_name} role that is not a superuser but replaces superuser in some cases.
|
||||||
|
# We could add the additional grant statements to the Postgres repository but it would be hard to maintain,
|
||||||
|
# whenever we need to pick up a new Postgres version and we want to limit the changes in our Postgres fork,
|
||||||
|
# so we do it here.
|
||||||
|
case "${PG_VERSION}" in \
|
||||||
|
"v14" | "v15" | "v16") \
|
||||||
|
patch -p1 < /pg_stat_statements_pg14-16.patch; \
|
||||||
|
;; \
|
||||||
|
"v17") \
|
||||||
|
patch -p1 < /pg_stat_statements_pg17.patch; \
|
||||||
|
;; \
|
||||||
|
*) \
|
||||||
|
# To do not forget to migrate patches to the next major version
|
||||||
|
echo "No contrib patches for this PostgreSQL version" && exit 1;; \
|
||||||
|
esac && \
|
||||||
|
patch -p1 < /postgres_fdw.patch && \
|
||||||
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
|
export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
|
||||||
--with-icu --with-libxml --with-libxslt --with-lz4" && \
|
--with-icu --with-libxml --with-libxslt --with-lz4" && \
|
||||||
if [ "${PG_VERSION:?}" != "v14" ]; then \
|
if [ "${PG_VERSION:?}" != "v14" ]; then \
|
||||||
@@ -184,8 +206,6 @@ RUN cd postgres && \
|
|||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgres_fdw.control && \
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgres_fdw.control && \
|
||||||
file=/usr/local/pgsql/share/extension/postgres_fdw--1.0.sql && [ -e $file ] && \
|
|
||||||
echo 'GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO neon_superuser;' >> $file && \
|
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
|
||||||
@@ -195,34 +215,7 @@ RUN cd postgres && \
|
|||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
|
||||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control && \
|
echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control
|
||||||
# We need to grant EXECUTE on pg_stat_statements_reset() to neon_superuser.
|
|
||||||
# In vanilla postgres this function is limited to Postgres role superuser.
|
|
||||||
# In neon we have neon_superuser role that is not a superuser but replaces superuser in some cases.
|
|
||||||
# We could add the additional grant statements to the postgres repository but it would be hard to maintain,
|
|
||||||
# whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
|
|
||||||
# so we do it here.
|
|
||||||
for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
|
|
||||||
filename=$(basename "$file"); \
|
|
||||||
# Note that there are no downgrade scripts for pg_stat_statements, so we \
|
|
||||||
# don't have to modify any downgrade paths or (much) older versions: we only \
|
|
||||||
# have to make sure every creation of the pg_stat_statements_reset function \
|
|
||||||
# also adds execute permissions to the neon_superuser.
|
|
||||||
case $filename in \
|
|
||||||
pg_stat_statements--1.4.sql) \
|
|
||||||
# pg_stat_statements_reset is first created with 1.4
|
|
||||||
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
|
|
||||||
;; \
|
|
||||||
pg_stat_statements--1.6--1.7.sql) \
|
|
||||||
# Then with the 1.6-1.7 migration it is re-created with a new signature, thus add the permissions back
|
|
||||||
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
|
|
||||||
;; \
|
|
||||||
pg_stat_statements--1.10--1.11.sql) \
|
|
||||||
# Then with the 1.10-1.11 migration it is re-created with a new signature again, thus add the permissions back
|
|
||||||
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO neon_superuser;' >> $file; \
|
|
||||||
;; \
|
|
||||||
esac; \
|
|
||||||
done;
|
|
||||||
|
|
||||||
# Set PATH for all the subsequent build steps
|
# Set PATH for all the subsequent build steps
|
||||||
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
ENV PATH="/usr/local/pgsql/bin:$PATH"
|
||||||
@@ -1524,7 +1517,7 @@ WORKDIR /ext-src
|
|||||||
COPY compute/patches/pg_duckdb_v031.patch .
|
COPY compute/patches/pg_duckdb_v031.patch .
|
||||||
COPY compute/patches/duckdb_v120.patch .
|
COPY compute/patches/duckdb_v120.patch .
|
||||||
# pg_duckdb build requires source dir to be a git repo to get submodules
|
# pg_duckdb build requires source dir to be a git repo to get submodules
|
||||||
# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only:
|
# allow {privileged_role_name} to execute some functions that in pg_duckdb are available to superuser only:
|
||||||
# - extension management function duckdb.install_extension()
|
# - extension management function duckdb.install_extension()
|
||||||
# - access to duckdb.extensions table and its sequence
|
# - access to duckdb.extensions table and its sequence
|
||||||
RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
|
RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
|
||||||
@@ -1790,7 +1783,7 @@ RUN set -e \
|
|||||||
#########################################################################################
|
#########################################################################################
|
||||||
FROM build-deps AS exporters
|
FROM build-deps AS exporters
|
||||||
ARG TARGETARCH
|
ARG TARGETARCH
|
||||||
# Keep sql_exporter version same as in build-tools.Dockerfile and
|
# Keep sql_exporter version same as in build-tools/Dockerfile and
|
||||||
# test_runner/regress/test_compute_metrics.py
|
# test_runner/regress/test_compute_metrics.py
|
||||||
# See comment on the top of the file regading `echo`, `-e` and `\n`
|
# See comment on the top of the file regading `echo`, `-e` and `\n`
|
||||||
RUN if [ "$TARGETARCH" = "amd64" ]; then\
|
RUN if [ "$TARGETARCH" = "amd64" ]; then\
|
||||||
@@ -1915,10 +1908,10 @@ RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /e
|
|||||||
|
|
||||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||||
RUN echo /usr/local/pgsql/lib > /etc/ld.so.conf.d/00-neon.conf && /sbin/ldconfig
|
RUN echo /usr/local/pgsql/lib > /etc/ld.so.conf.d/00-neon.conf && /sbin/ldconfig
|
||||||
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq \
|
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq parallel \
|
||||||
&& apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
|
&& apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
|
||||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||||
ENV PGHOST=compute
|
ENV PGHOST=compute1
|
||||||
ENV PGPORT=55433
|
ENV PGPORT=55433
|
||||||
ENV PGUSER=cloud_admin
|
ENV PGUSER=cloud_admin
|
||||||
ENV PGDATABASE=postgres
|
ENV PGDATABASE=postgres
|
||||||
|
|||||||
@@ -1,7 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "neon-compute",
|
|
||||||
"private": true,
|
|
||||||
"dependencies": {
|
|
||||||
"@sourcemeta/jsonschema": "9.3.4"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,22 +1,26 @@
|
|||||||
diff --git a/sql/anon.sql b/sql/anon.sql
|
diff --git a/sql/anon.sql b/sql/anon.sql
|
||||||
index 0cdc769..b450327 100644
|
index 0cdc769..5eab1d6 100644
|
||||||
--- a/sql/anon.sql
|
--- a/sql/anon.sql
|
||||||
+++ b/sql/anon.sql
|
+++ b/sql/anon.sql
|
||||||
@@ -1141,3 +1141,15 @@ $$
|
@@ -1141,3 +1141,19 @@ $$
|
||||||
-- TODO : https://en.wikipedia.org/wiki/L-diversity
|
-- TODO : https://en.wikipedia.org/wiki/L-diversity
|
||||||
|
|
||||||
-- TODO : https://en.wikipedia.org/wiki/T-closeness
|
-- TODO : https://en.wikipedia.org/wiki/T-closeness
|
||||||
+
|
+
|
||||||
+-- NEON Patches
|
+-- NEON Patches
|
||||||
+
|
+
|
||||||
+GRANT ALL ON SCHEMA anon to neon_superuser;
|
|
||||||
+GRANT ALL ON ALL TABLES IN SCHEMA anon TO neon_superuser;
|
|
||||||
+
|
|
||||||
+DO $$
|
+DO $$
|
||||||
|
+DECLARE
|
||||||
|
+ privileged_role_name text;
|
||||||
+BEGIN
|
+BEGIN
|
||||||
+ IF current_setting('server_version_num')::int >= 150000 THEN
|
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||||
+ GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO neon_superuser;
|
+
|
||||||
+ END IF;
|
+ EXECUTE format('GRANT ALL ON SCHEMA anon to %I', privileged_role_name);
|
||||||
|
+ EXECUTE format('GRANT ALL ON ALL TABLES IN SCHEMA anon TO %I', privileged_role_name);
|
||||||
|
+
|
||||||
|
+ IF current_setting('server_version_num')::int >= 150000 THEN
|
||||||
|
+ EXECUTE format('GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO %I', privileged_role_name);
|
||||||
|
+ END IF;
|
||||||
+END $$;
|
+END $$;
|
||||||
diff --git a/sql/init.sql b/sql/init.sql
|
diff --git a/sql/init.sql b/sql/init.sql
|
||||||
index 7da6553..9b6164b 100644
|
index 7da6553..9b6164b 100644
|
||||||
|
|||||||
@@ -21,13 +21,21 @@ index 3235cc8..6b892bc 100644
|
|||||||
include Makefile.global
|
include Makefile.global
|
||||||
|
|
||||||
diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
|
diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||||
index d777d76..af60106 100644
|
index d777d76..3b54396 100644
|
||||||
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
|
--- a/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||||
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
|
+++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
|
||||||
@@ -1056,3 +1056,6 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
|
@@ -1056,3 +1056,14 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
|
||||||
GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC;
|
GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC;
|
||||||
GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC;
|
GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC;
|
||||||
GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC;
|
GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC;
|
||||||
+GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO neon_superuser;
|
+
|
||||||
+GRANT ALL ON TABLE duckdb.extensions TO neon_superuser;
|
+DO $$
|
||||||
+GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO neon_superuser;
|
+DECLARE
|
||||||
|
+ privileged_role_name text;
|
||||||
|
+BEGIN
|
||||||
|
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||||
|
+
|
||||||
|
+ EXECUTE format('GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO %I', privileged_role_name);
|
||||||
|
+ EXECUTE format('GRANT ALL ON TABLE duckdb.extensions TO %I', privileged_role_name);
|
||||||
|
+ EXECUTE format('GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO %I', privileged_role_name);
|
||||||
|
+END $$;
|
||||||
|
|||||||
34
compute/patches/pg_stat_statements_pg14-16.patch
Normal file
34
compute/patches/pg_stat_statements_pg14-16.patch
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||||
|
index 58cdf600fce..8be57a996f6 100644
|
||||||
|
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||||
|
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||||
|
@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
|
||||||
|
|
||||||
|
-- Don't want this to be available to non-superusers.
|
||||||
|
REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
|
||||||
|
+
|
||||||
|
+DO $$
|
||||||
|
+DECLARE
|
||||||
|
+ privileged_role_name text;
|
||||||
|
+BEGIN
|
||||||
|
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||||
|
+
|
||||||
|
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
|
||||||
|
+END $$;
|
||||||
|
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||||
|
index 6fc3fed4c93..256345a8f79 100644
|
||||||
|
--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||||
|
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||||
|
@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
-- Don't want this to be available to non-superusers.
|
||||||
|
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
|
||||||
|
+
|
||||||
|
+DO $$
|
||||||
|
+DECLARE
|
||||||
|
+ privileged_role_name text;
|
||||||
|
+BEGIN
|
||||||
|
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||||
|
+
|
||||||
|
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
|
||||||
|
+END $$;
|
||||||
52
compute/patches/pg_stat_statements_pg17.patch
Normal file
52
compute/patches/pg_stat_statements_pg17.patch
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
|
||||||
|
index 0bb2c397711..32764db1d8b 100644
|
||||||
|
--- a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
|
||||||
|
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
|
||||||
|
@@ -80,3 +80,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
-- Don't want this to be available to non-superusers.
|
||||||
|
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) FROM PUBLIC;
|
||||||
|
+
|
||||||
|
+DO $$
|
||||||
|
+DECLARE
|
||||||
|
+ privileged_role_name text;
|
||||||
|
+BEGIN
|
||||||
|
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||||
|
+
|
||||||
|
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO %I', privileged_role_name);
|
||||||
|
+END $$;
|
||||||
|
\ No newline at end of file
|
||||||
|
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||||
|
index 58cdf600fce..8be57a996f6 100644
|
||||||
|
--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||||
|
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
|
||||||
|
@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
|
||||||
|
|
||||||
|
-- Don't want this to be available to non-superusers.
|
||||||
|
REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
|
||||||
|
+
|
||||||
|
+DO $$
|
||||||
|
+DECLARE
|
||||||
|
+ privileged_role_name text;
|
||||||
|
+BEGIN
|
||||||
|
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||||
|
+
|
||||||
|
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
|
||||||
|
+END $$;
|
||||||
|
diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||||
|
index 6fc3fed4c93..256345a8f79 100644
|
||||||
|
--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||||
|
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
|
||||||
|
@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
-- Don't want this to be available to non-superusers.
|
||||||
|
REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
|
||||||
|
+
|
||||||
|
+DO $$
|
||||||
|
+DECLARE
|
||||||
|
+ privileged_role_name text;
|
||||||
|
+BEGIN
|
||||||
|
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||||
|
+
|
||||||
|
+ EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
|
||||||
|
+END $$;
|
||||||
17
compute/patches/postgres_fdw.patch
Normal file
17
compute/patches/postgres_fdw.patch
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
diff --git a/contrib/postgres_fdw/postgres_fdw--1.0.sql b/contrib/postgres_fdw/postgres_fdw--1.0.sql
|
||||||
|
index a0f0fc1bf45..ee077f2eea6 100644
|
||||||
|
--- a/contrib/postgres_fdw/postgres_fdw--1.0.sql
|
||||||
|
+++ b/contrib/postgres_fdw/postgres_fdw--1.0.sql
|
||||||
|
@@ -16,3 +16,12 @@ LANGUAGE C STRICT;
|
||||||
|
CREATE FOREIGN DATA WRAPPER postgres_fdw
|
||||||
|
HANDLER postgres_fdw_handler
|
||||||
|
VALIDATOR postgres_fdw_validator;
|
||||||
|
+
|
||||||
|
+DO $$
|
||||||
|
+DECLARE
|
||||||
|
+ privileged_role_name text;
|
||||||
|
+BEGIN
|
||||||
|
+ privileged_role_name := current_setting('neon.privileged_role_name');
|
||||||
|
+
|
||||||
|
+ EXECUTE format('GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO %I', privileged_role_name);
|
||||||
|
+END $$;
|
||||||
@@ -66,7 +66,7 @@ url.workspace = true
|
|||||||
uuid.workspace = true
|
uuid.workspace = true
|
||||||
walkdir.workspace = true
|
walkdir.workspace = true
|
||||||
x509-cert.workspace = true
|
x509-cert.workspace = true
|
||||||
|
postgres-types.workspace = true
|
||||||
postgres_versioninfo.workspace = true
|
postgres_versioninfo.workspace = true
|
||||||
postgres_initdb.workspace = true
|
postgres_initdb.workspace = true
|
||||||
compute_api.workspace = true
|
compute_api.workspace = true
|
||||||
|
|||||||
@@ -46,11 +46,14 @@ stateDiagram-v2
|
|||||||
Configuration --> Failed : Failed to configure the compute
|
Configuration --> Failed : Failed to configure the compute
|
||||||
Configuration --> Running : Compute has been configured
|
Configuration --> Running : Compute has been configured
|
||||||
Empty --> Init : Compute spec is immediately available
|
Empty --> Init : Compute spec is immediately available
|
||||||
Empty --> TerminationPending : Requested termination
|
Empty --> TerminationPendingFast : Requested termination
|
||||||
|
Empty --> TerminationPendingImmediate : Requested termination
|
||||||
Init --> Failed : Failed to start Postgres
|
Init --> Failed : Failed to start Postgres
|
||||||
Init --> Running : Started Postgres
|
Init --> Running : Started Postgres
|
||||||
Running --> TerminationPending : Requested termination
|
Running --> TerminationPendingFast : Requested termination
|
||||||
TerminationPending --> Terminated : Terminated compute
|
Running --> TerminationPendingImmediate : Requested termination
|
||||||
|
TerminationPendingFast --> Terminated compute with 30s delay for cplane to inspect status
|
||||||
|
TerminationPendingImmediate --> Terminated : Terminated compute immediately
|
||||||
Failed --> [*] : Compute exited
|
Failed --> [*] : Compute exited
|
||||||
Terminated --> [*] : Compute exited
|
Terminated --> [*] : Compute exited
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -87,6 +87,14 @@ struct Cli {
|
|||||||
#[arg(short = 'C', long, value_name = "DATABASE_URL")]
|
#[arg(short = 'C', long, value_name = "DATABASE_URL")]
|
||||||
pub connstr: String,
|
pub connstr: String,
|
||||||
|
|
||||||
|
#[arg(
|
||||||
|
long,
|
||||||
|
default_value = "neon_superuser",
|
||||||
|
value_name = "PRIVILEGED_ROLE_NAME",
|
||||||
|
value_parser = Self::parse_privileged_role_name
|
||||||
|
)]
|
||||||
|
pub privileged_role_name: String,
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
#[arg(long, default_value = "neon-postgres")]
|
#[arg(long, default_value = "neon-postgres")]
|
||||||
pub cgroup: String,
|
pub cgroup: String,
|
||||||
@@ -149,6 +157,21 @@ impl Cli {
|
|||||||
|
|
||||||
Ok(url)
|
Ok(url)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// For simplicity, we do not escape `privileged_role_name` anywhere in the code.
|
||||||
|
/// Since it's a system role, which we fully control, that's fine. Still, let's
|
||||||
|
/// validate it to avoid any surprises.
|
||||||
|
fn parse_privileged_role_name(value: &str) -> Result<String> {
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
let pattern = Regex::new(r"^[a-z_]+$").unwrap();
|
||||||
|
|
||||||
|
if !pattern.is_match(value) {
|
||||||
|
bail!("--privileged-role-name can only contain lowercase letters and underscores")
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(value.to_string())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
@@ -178,6 +201,7 @@ fn main() -> Result<()> {
|
|||||||
ComputeNodeParams {
|
ComputeNodeParams {
|
||||||
compute_id: cli.compute_id,
|
compute_id: cli.compute_id,
|
||||||
connstr,
|
connstr,
|
||||||
|
privileged_role_name: cli.privileged_role_name.clone(),
|
||||||
pgdata: cli.pgdata.clone(),
|
pgdata: cli.pgdata.clone(),
|
||||||
pgbin: cli.pgbin.clone(),
|
pgbin: cli.pgbin.clone(),
|
||||||
pgversion: get_pg_version_string(&cli.pgbin),
|
pgversion: get_pg_version_string(&cli.pgbin),
|
||||||
@@ -327,4 +351,49 @@ mod test {
|
|||||||
])
|
])
|
||||||
.expect_err("URL parameters are not allowed");
|
.expect_err("URL parameters are not allowed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn verify_privileged_role_name() {
|
||||||
|
// Valid name
|
||||||
|
let cli = Cli::parse_from([
|
||||||
|
"compute_ctl",
|
||||||
|
"--pgdata=test",
|
||||||
|
"--connstr=test",
|
||||||
|
"--compute-id=test",
|
||||||
|
"--privileged-role-name",
|
||||||
|
"my_superuser",
|
||||||
|
]);
|
||||||
|
assert_eq!(cli.privileged_role_name, "my_superuser");
|
||||||
|
|
||||||
|
// Invalid names
|
||||||
|
Cli::try_parse_from([
|
||||||
|
"compute_ctl",
|
||||||
|
"--pgdata=test",
|
||||||
|
"--connstr=test",
|
||||||
|
"--compute-id=test",
|
||||||
|
"--privileged-role-name",
|
||||||
|
"NeonSuperuser",
|
||||||
|
])
|
||||||
|
.expect_err("uppercase letters are not allowed");
|
||||||
|
|
||||||
|
Cli::try_parse_from([
|
||||||
|
"compute_ctl",
|
||||||
|
"--pgdata=test",
|
||||||
|
"--connstr=test",
|
||||||
|
"--compute-id=test",
|
||||||
|
"--privileged-role-name",
|
||||||
|
"$'neon_superuser",
|
||||||
|
])
|
||||||
|
.expect_err("special characters are not allowed");
|
||||||
|
|
||||||
|
Cli::try_parse_from([
|
||||||
|
"compute_ctl",
|
||||||
|
"--pgdata=test",
|
||||||
|
"--connstr=test",
|
||||||
|
"--compute-id=test",
|
||||||
|
"--privileged-role-name",
|
||||||
|
"",
|
||||||
|
])
|
||||||
|
.expect_err("empty name is not allowed");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
use anyhow::{Context, Result, anyhow};
|
use anyhow::{Context, Result};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use compute_api::privilege::Privilege;
|
use compute_api::privilege::Privilege;
|
||||||
use compute_api::responses::{
|
use compute_api::responses::{
|
||||||
ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus, LfcOffloadState,
|
ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus, LfcOffloadState,
|
||||||
LfcPrewarmState, TlsConfig,
|
LfcPrewarmState, PromoteState, TlsConfig,
|
||||||
};
|
};
|
||||||
use compute_api::spec::{
|
use compute_api::spec::{
|
||||||
ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverConnectionInfo,
|
ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverConnectionInfo,
|
||||||
PageserverShardConnectionInfo, PgIdent,
|
PageserverProtocol, PageserverShardConnectionInfo, PageserverShardInfo, PgIdent,
|
||||||
};
|
};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use futures::future::join_all;
|
use futures::future::join_all;
|
||||||
@@ -30,8 +30,7 @@ use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
|
|||||||
use std::sync::{Arc, Condvar, Mutex, RwLock};
|
use std::sync::{Arc, Condvar, Mutex, RwLock};
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use std::{env, fs};
|
use std::{env, fs};
|
||||||
use tokio::task::JoinHandle;
|
use tokio::{spawn, sync::watch, task::JoinHandle, time};
|
||||||
use tokio::{spawn, time};
|
|
||||||
use tracing::{Instrument, debug, error, info, instrument, warn};
|
use tracing::{Instrument, debug, error, info, instrument, warn};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use utils::id::{TenantId, TimelineId};
|
use utils::id::{TenantId, TimelineId};
|
||||||
@@ -76,12 +75,20 @@ const DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL: u64 = 3600;
|
|||||||
|
|
||||||
/// Static configuration params that don't change after startup. These mostly
|
/// Static configuration params that don't change after startup. These mostly
|
||||||
/// come from the CLI args, or are derived from them.
|
/// come from the CLI args, or are derived from them.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
pub struct ComputeNodeParams {
|
pub struct ComputeNodeParams {
|
||||||
/// The ID of the compute
|
/// The ID of the compute
|
||||||
pub compute_id: String,
|
pub compute_id: String,
|
||||||
// Url type maintains proper escaping
|
|
||||||
|
/// Url type maintains proper escaping
|
||||||
pub connstr: url::Url,
|
pub connstr: url::Url,
|
||||||
|
|
||||||
|
/// The name of the 'weak' superuser role, which we give to the users.
|
||||||
|
/// It follows the allow list approach, i.e., we take a standard role
|
||||||
|
/// and grant it extra permissions with explicit GRANTs here and there,
|
||||||
|
/// and core patches.
|
||||||
|
pub privileged_role_name: String,
|
||||||
|
|
||||||
pub resize_swap_on_bind: bool,
|
pub resize_swap_on_bind: bool,
|
||||||
pub set_disk_quota_for_fs: Option<String>,
|
pub set_disk_quota_for_fs: Option<String>,
|
||||||
|
|
||||||
@@ -176,6 +183,7 @@ pub struct ComputeState {
|
|||||||
/// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if
|
/// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if
|
||||||
/// mode == ComputeMode::Primary. None otherwise
|
/// mode == ComputeMode::Primary. None otherwise
|
||||||
pub terminate_flush_lsn: Option<Lsn>,
|
pub terminate_flush_lsn: Option<Lsn>,
|
||||||
|
pub promote_state: Option<watch::Receiver<PromoteState>>,
|
||||||
|
|
||||||
pub metrics: ComputeMetrics,
|
pub metrics: ComputeMetrics,
|
||||||
}
|
}
|
||||||
@@ -193,6 +201,7 @@ impl ComputeState {
|
|||||||
lfc_prewarm_state: LfcPrewarmState::default(),
|
lfc_prewarm_state: LfcPrewarmState::default(),
|
||||||
lfc_offload_state: LfcOffloadState::default(),
|
lfc_offload_state: LfcOffloadState::default(),
|
||||||
terminate_flush_lsn: None,
|
terminate_flush_lsn: None,
|
||||||
|
promote_state: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -272,53 +281,114 @@ impl ParsedSpec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_pageserver_conninfo_from_guc(
|
/// Extract PageserverConnectionInfo from a comma-separated list of libpq connection strings.
|
||||||
pageserver_connstring_guc: &str,
|
///
|
||||||
) -> PageserverConnectionInfo {
|
/// This is used for backwards-compatilibity, to parse the legacye `pageserver_connstr`
|
||||||
PageserverConnectionInfo {
|
/// field in the compute spec, or the 'neon.pageserver_connstring' GUC. Nowadays, the
|
||||||
shards: pageserver_connstring_guc
|
/// 'pageserver_connection_info' field should be used instead.
|
||||||
.split(',')
|
fn extract_pageserver_conninfo_from_connstr(
|
||||||
.enumerate()
|
connstr: &str,
|
||||||
.map(|(i, connstr)| {
|
stripe_size: Option<u32>,
|
||||||
(
|
) -> Result<PageserverConnectionInfo, anyhow::Error> {
|
||||||
i as u32,
|
let shard_infos: Vec<_> = connstr
|
||||||
PageserverShardConnectionInfo {
|
.split(',')
|
||||||
libpq_url: Some(connstr.to_string()),
|
.map(|connstr| PageserverShardInfo {
|
||||||
grpc_url: None,
|
pageservers: vec![PageserverShardConnectionInfo {
|
||||||
},
|
id: None,
|
||||||
)
|
libpq_url: Some(connstr.to_string()),
|
||||||
|
grpc_url: None,
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
match shard_infos.len() {
|
||||||
|
0 => anyhow::bail!("empty connection string"),
|
||||||
|
1 => {
|
||||||
|
// We assume that if there's only connection string, it means "unsharded",
|
||||||
|
// rather than a sharded system with just a single shard. The latter is
|
||||||
|
// possible in principle, but we never do it.
|
||||||
|
let shard_count = ShardCount::unsharded();
|
||||||
|
let only_shard = shard_infos.first().unwrap().clone();
|
||||||
|
let shards = vec![(ShardIndex::unsharded(), only_shard)];
|
||||||
|
Ok(PageserverConnectionInfo {
|
||||||
|
shard_count,
|
||||||
|
stripe_size: None,
|
||||||
|
shards: shards.into_iter().collect(),
|
||||||
|
prefer_protocol: PageserverProtocol::Libpq,
|
||||||
})
|
})
|
||||||
.collect(),
|
}
|
||||||
prefer_grpc: false,
|
n => {
|
||||||
|
if stripe_size.is_none() {
|
||||||
|
anyhow::bail!("{n} shards but no stripe_size");
|
||||||
|
}
|
||||||
|
let shard_count = ShardCount(n.try_into()?);
|
||||||
|
let shards = shard_infos
|
||||||
|
.into_iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(idx, shard_info)| {
|
||||||
|
(
|
||||||
|
ShardIndex {
|
||||||
|
shard_count,
|
||||||
|
shard_number: ShardNumber(
|
||||||
|
idx.try_into().expect("shard number fits in u8"),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
shard_info,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
Ok(PageserverConnectionInfo {
|
||||||
|
shard_count,
|
||||||
|
stripe_size,
|
||||||
|
shards,
|
||||||
|
prefer_protocol: PageserverProtocol::Libpq,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TryFrom<ComputeSpec> for ParsedSpec {
|
impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||||
type Error = String;
|
type Error = anyhow::Error;
|
||||||
fn try_from(spec: ComputeSpec) -> Result<Self, String> {
|
fn try_from(spec: ComputeSpec) -> Result<Self, anyhow::Error> {
|
||||||
// Extract the options from the spec file that are needed to connect to
|
// Extract the options from the spec file that are needed to connect to
|
||||||
// the storage system.
|
// the storage system.
|
||||||
//
|
//
|
||||||
// For backwards-compatibility, the top-level fields in the spec file
|
// In compute specs generated by old control plane versions, the spec file might
|
||||||
// may be empty. In that case, we need to dig them from the GUCs in the
|
// be missing the `pageserver_connection_info` field. In that case, we need to dig
|
||||||
// cluster.settings field.
|
// the pageserver connection info from the `pageserver_connstr` field instead, or
|
||||||
let pageserver_conninfo = match &spec.pageserver_connection_info {
|
// if that's missing too, from the GUC in the cluster.settings field.
|
||||||
Some(x) => x.clone(),
|
let mut pageserver_conninfo = spec.pageserver_connection_info.clone();
|
||||||
None => {
|
if pageserver_conninfo.is_none() {
|
||||||
if let Some(guc) = spec.cluster.settings.find("neon.pageserver_connstring") {
|
if let Some(pageserver_connstr_field) = &spec.pageserver_connstring {
|
||||||
extract_pageserver_conninfo_from_guc(&guc)
|
pageserver_conninfo = Some(extract_pageserver_conninfo_from_connstr(
|
||||||
} else {
|
pageserver_connstr_field,
|
||||||
return Err("pageserver connstr should be provided".to_string());
|
spec.shard_stripe_size,
|
||||||
}
|
)?);
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
|
if pageserver_conninfo.is_none() {
|
||||||
|
if let Some(guc) = spec.cluster.settings.find("neon.pageserver_connstring") {
|
||||||
|
let stripe_size = if let Some(guc) = spec.cluster.settings.find("neon.stripe_size")
|
||||||
|
{
|
||||||
|
Some(u32::from_str(&guc)?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
pageserver_conninfo =
|
||||||
|
Some(extract_pageserver_conninfo_from_connstr(&guc, stripe_size)?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let pageserver_conninfo = pageserver_conninfo.ok_or(anyhow::anyhow!(
|
||||||
|
"pageserver connection information should be provided"
|
||||||
|
))?;
|
||||||
|
|
||||||
|
// Similarly for safekeeper connection strings
|
||||||
let safekeeper_connstrings = if spec.safekeeper_connstrings.is_empty() {
|
let safekeeper_connstrings = if spec.safekeeper_connstrings.is_empty() {
|
||||||
if matches!(spec.mode, ComputeMode::Primary) {
|
if matches!(spec.mode, ComputeMode::Primary) {
|
||||||
spec.cluster
|
spec.cluster
|
||||||
.settings
|
.settings
|
||||||
.find("neon.safekeepers")
|
.find("neon.safekeepers")
|
||||||
.ok_or("safekeeper connstrings should be provided")?
|
.ok_or(anyhow::anyhow!("safekeeper connstrings should be provided"))?
|
||||||
.split(',')
|
.split(',')
|
||||||
.map(|str| str.to_string())
|
.map(|str| str.to_string())
|
||||||
.collect()
|
.collect()
|
||||||
@@ -333,22 +403,22 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
|||||||
let tenant_id: TenantId = if let Some(tenant_id) = spec.tenant_id {
|
let tenant_id: TenantId = if let Some(tenant_id) = spec.tenant_id {
|
||||||
tenant_id
|
tenant_id
|
||||||
} else {
|
} else {
|
||||||
spec.cluster
|
let guc = spec
|
||||||
|
.cluster
|
||||||
.settings
|
.settings
|
||||||
.find("neon.tenant_id")
|
.find("neon.tenant_id")
|
||||||
.ok_or("tenant id should be provided")
|
.ok_or(anyhow::anyhow!("tenant id should be provided"))?;
|
||||||
.map(|s| TenantId::from_str(&s))?
|
TenantId::from_str(&guc).context("invalid tenant id")?
|
||||||
.or(Err("invalid tenant id"))?
|
|
||||||
};
|
};
|
||||||
let timeline_id: TimelineId = if let Some(timeline_id) = spec.timeline_id {
|
let timeline_id: TimelineId = if let Some(timeline_id) = spec.timeline_id {
|
||||||
timeline_id
|
timeline_id
|
||||||
} else {
|
} else {
|
||||||
spec.cluster
|
let guc = spec
|
||||||
|
.cluster
|
||||||
.settings
|
.settings
|
||||||
.find("neon.timeline_id")
|
.find("neon.timeline_id")
|
||||||
.ok_or("timeline id should be provided")
|
.ok_or(anyhow::anyhow!("timeline id should be provided"))?;
|
||||||
.map(|s| TimelineId::from_str(&s))?
|
TimelineId::from_str(&guc).context(anyhow::anyhow!("invalid timeline id"))?
|
||||||
.or(Err("invalid timeline id"))?
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let endpoint_storage_addr: Option<String> = spec
|
let endpoint_storage_addr: Option<String> = spec
|
||||||
@@ -372,7 +442,7 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Now check validity of the parsed specification
|
// Now check validity of the parsed specification
|
||||||
res.validate()?;
|
res.validate().map_err(anyhow::Error::msg)?;
|
||||||
Ok(res)
|
Ok(res)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -452,7 +522,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
let mut new_state = ComputeState::new();
|
let mut new_state = ComputeState::new();
|
||||||
if let Some(spec) = config.spec {
|
if let Some(spec) = config.spec {
|
||||||
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow!(msg))?;
|
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
|
||||||
new_state.pspec = Some(pspec);
|
new_state.pspec = Some(pspec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -983,14 +1053,20 @@ impl ComputeNode {
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut delay_exit = false;
|
|
||||||
let mut state = self.state.lock().unwrap();
|
let mut state = self.state.lock().unwrap();
|
||||||
state.terminate_flush_lsn = lsn;
|
state.terminate_flush_lsn = lsn;
|
||||||
if let ComputeStatus::TerminationPending { mode } = state.status {
|
|
||||||
|
let delay_exit = state.status == ComputeStatus::TerminationPendingFast;
|
||||||
|
if state.status == ComputeStatus::TerminationPendingFast
|
||||||
|
|| state.status == ComputeStatus::TerminationPendingImmediate
|
||||||
|
{
|
||||||
|
info!(
|
||||||
|
"Changing compute status from {} to {}",
|
||||||
|
state.status,
|
||||||
|
ComputeStatus::Terminated
|
||||||
|
);
|
||||||
state.status = ComputeStatus::Terminated;
|
state.status = ComputeStatus::Terminated;
|
||||||
self.state_changed.notify_all();
|
self.state_changed.notify_all();
|
||||||
// we were asked to terminate gracefully, don't exit to avoid restart
|
|
||||||
delay_exit = mode == compute_api::responses::TerminateMode::Fast
|
|
||||||
}
|
}
|
||||||
drop(state);
|
drop(state);
|
||||||
|
|
||||||
@@ -1054,12 +1130,13 @@ impl ComputeNode {
|
|||||||
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||||
|
|
||||||
let started = Instant::now();
|
let started = Instant::now();
|
||||||
let (connected, size) = if spec.pageserver_conninfo.prefer_grpc {
|
let (connected, size) = match spec.pageserver_conninfo.prefer_protocol {
|
||||||
self.try_get_basebackup_grpc(spec, lsn)?
|
PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,
|
||||||
} else {
|
PageserverProtocol::Libpq => self.try_get_basebackup_libpq(spec, lsn)?,
|
||||||
self.try_get_basebackup_libpq(spec, lsn)?
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
self.fix_zenith_signal_neon_signal()?;
|
||||||
|
|
||||||
let mut state = self.state.lock().unwrap();
|
let mut state = self.state.lock().unwrap();
|
||||||
state.metrics.pageserver_connect_micros =
|
state.metrics.pageserver_connect_micros =
|
||||||
connected.duration_since(started).as_micros() as u64;
|
connected.duration_since(started).as_micros() as u64;
|
||||||
@@ -1069,27 +1146,56 @@ impl ComputeNode {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Move the Zenith signal file to Neon signal file location.
|
||||||
|
/// This makes Compute compatible with older PageServers that don't yet
|
||||||
|
/// know about the Zenith->Neon rename.
|
||||||
|
fn fix_zenith_signal_neon_signal(&self) -> Result<()> {
|
||||||
|
let datadir = Path::new(&self.params.pgdata);
|
||||||
|
|
||||||
|
let neonsig = datadir.join("neon.signal");
|
||||||
|
|
||||||
|
if neonsig.is_file() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let zenithsig = datadir.join("zenith.signal");
|
||||||
|
|
||||||
|
if zenithsig.is_file() {
|
||||||
|
fs::copy(zenithsig, neonsig)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when
|
/// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when
|
||||||
/// the connection was established, and the (compressed) size of the basebackup.
|
/// the connection was established, and the (compressed) size of the basebackup.
|
||||||
fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
|
fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
|
||||||
|
let shard0_index = ShardIndex {
|
||||||
|
shard_number: ShardNumber(0),
|
||||||
|
shard_count: spec.pageserver_conninfo.shard_count,
|
||||||
|
};
|
||||||
let shard0 = spec
|
let shard0 = spec
|
||||||
.pageserver_conninfo
|
.pageserver_conninfo
|
||||||
.shards
|
.shards
|
||||||
.get(&0)
|
.get(&shard0_index)
|
||||||
.expect("shard 0 connection info missing");
|
.ok_or_else(|| {
|
||||||
let shard0_url = shard0.grpc_url.clone().expect("no grpc_url for shard 0");
|
anyhow::anyhow!("shard connection info missing for shard {}", shard0_index)
|
||||||
|
})?;
|
||||||
let shard_index = match spec.pageserver_conninfo.shards.len() as u8 {
|
let pageserver = shard0
|
||||||
0 | 1 => ShardIndex::unsharded(),
|
.pageservers
|
||||||
count => ShardIndex::new(ShardNumber(0), ShardCount(count)),
|
.first()
|
||||||
};
|
.expect("must have at least one pageserver");
|
||||||
|
let shard0_url = pageserver
|
||||||
|
.grpc_url
|
||||||
|
.clone()
|
||||||
|
.expect("no grpc_url for shard 0");
|
||||||
|
|
||||||
let (reader, connected) = tokio::runtime::Handle::current().block_on(async move {
|
let (reader, connected) = tokio::runtime::Handle::current().block_on(async move {
|
||||||
let mut client = page_api::Client::connect(
|
let mut client = page_api::Client::connect(
|
||||||
shard0_url,
|
shard0_url,
|
||||||
spec.tenant_id,
|
spec.tenant_id,
|
||||||
spec.timeline_id,
|
spec.timeline_id,
|
||||||
shard_index,
|
shard0_index,
|
||||||
spec.storage_auth_token.clone(),
|
spec.storage_auth_token.clone(),
|
||||||
None, // NB: base backups use payload compression
|
None, // NB: base backups use payload compression
|
||||||
)
|
)
|
||||||
@@ -1121,12 +1227,25 @@ impl ComputeNode {
|
|||||||
/// Fetches a basebackup via libpq. The connstring must use postgresql://. Returns the timestamp
|
/// Fetches a basebackup via libpq. The connstring must use postgresql://. Returns the timestamp
|
||||||
/// when the connection was established, and the (compressed) size of the basebackup.
|
/// when the connection was established, and the (compressed) size of the basebackup.
|
||||||
fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
|
fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
|
||||||
|
let shard0_index = ShardIndex {
|
||||||
|
shard_number: ShardNumber(0),
|
||||||
|
shard_count: spec.pageserver_conninfo.shard_count,
|
||||||
|
};
|
||||||
let shard0 = spec
|
let shard0 = spec
|
||||||
.pageserver_conninfo
|
.pageserver_conninfo
|
||||||
.shards
|
.shards
|
||||||
.get(&0)
|
.get(&shard0_index)
|
||||||
.expect("shard 0 connection info missing");
|
.ok_or_else(|| {
|
||||||
let shard0_connstr = shard0.libpq_url.clone().expect("no libpq_url for shard 0");
|
anyhow::anyhow!("shard connection info missing for shard {}", shard0_index)
|
||||||
|
})?;
|
||||||
|
let pageserver = shard0
|
||||||
|
.pageservers
|
||||||
|
.first()
|
||||||
|
.expect("must have at least one pageserver");
|
||||||
|
let shard0_connstr = pageserver
|
||||||
|
.libpq_url
|
||||||
|
.clone()
|
||||||
|
.expect("no libpq_url for shard 0");
|
||||||
let mut config = postgres::Config::from_str(&shard0_connstr)?;
|
let mut config = postgres::Config::from_str(&shard0_connstr)?;
|
||||||
|
|
||||||
// Use the storage auth token from the config file, if given.
|
// Use the storage auth token from the config file, if given.
|
||||||
@@ -1286,9 +1405,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
// In case of error, log and fail the check, but don't crash.
|
// In case of error, log and fail the check, but don't crash.
|
||||||
// We're playing it safe because these errors could be transient
|
// We're playing it safe because these errors could be transient
|
||||||
// and we don't yet retry. Also being careful here allows us to
|
// and we don't yet retry.
|
||||||
// be backwards compatible with safekeepers that don't have the
|
|
||||||
// TIMELINE_STATUS API yet.
|
|
||||||
if responses.len() < quorum {
|
if responses.len() < quorum {
|
||||||
error!(
|
error!(
|
||||||
"failed sync safekeepers check {:?} {:?} {:?}",
|
"failed sync safekeepers check {:?} {:?} {:?}",
|
||||||
@@ -1391,6 +1508,7 @@ impl ComputeNode {
|
|||||||
self.create_pgdata()?;
|
self.create_pgdata()?;
|
||||||
config::write_postgres_conf(
|
config::write_postgres_conf(
|
||||||
pgdata_path,
|
pgdata_path,
|
||||||
|
&self.params,
|
||||||
&pspec.spec,
|
&pspec.spec,
|
||||||
self.params.internal_http_port,
|
self.params.internal_http_port,
|
||||||
tls_config,
|
tls_config,
|
||||||
@@ -1731,6 +1849,7 @@ impl ComputeNode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Run migrations separately to not hold up cold starts
|
// Run migrations separately to not hold up cold starts
|
||||||
|
let params = self.params.clone();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
let mut conf = conf.as_ref().clone();
|
let mut conf = conf.as_ref().clone();
|
||||||
conf.application_name("compute_ctl:migrations");
|
conf.application_name("compute_ctl:migrations");
|
||||||
@@ -1742,7 +1861,7 @@ impl ComputeNode {
|
|||||||
eprintln!("connection error: {e}");
|
eprintln!("connection error: {e}");
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
if let Err(e) = handle_migrations(&mut client).await {
|
if let Err(e) = handle_migrations(params, &mut client).await {
|
||||||
error!("Failed to run migrations: {}", e);
|
error!("Failed to run migrations: {}", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1821,11 +1940,14 @@ impl ComputeNode {
|
|||||||
let pgdata_path = Path::new(&self.params.pgdata);
|
let pgdata_path = Path::new(&self.params.pgdata);
|
||||||
config::write_postgres_conf(
|
config::write_postgres_conf(
|
||||||
pgdata_path,
|
pgdata_path,
|
||||||
|
&self.params,
|
||||||
&spec,
|
&spec,
|
||||||
self.params.internal_http_port,
|
self.params.internal_http_port,
|
||||||
tls_config,
|
tls_config,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
self.pg_reload_conf()?;
|
||||||
|
|
||||||
if !spec.skip_pg_catalog_updates {
|
if !spec.skip_pg_catalog_updates {
|
||||||
let max_concurrent_connections = spec.reconfigure_concurrency;
|
let max_concurrent_connections = spec.reconfigure_concurrency;
|
||||||
// Temporarily reset max_cluster_size in config
|
// Temporarily reset max_cluster_size in config
|
||||||
@@ -1845,10 +1967,9 @@ impl ComputeNode {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
})?;
|
})?;
|
||||||
|
self.pg_reload_conf()?;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.pg_reload_conf()?;
|
|
||||||
|
|
||||||
let unknown_op = "unknown".to_string();
|
let unknown_op = "unknown".to_string();
|
||||||
let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
|
let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
|
||||||
info!(
|
info!(
|
||||||
@@ -1921,7 +2042,8 @@ impl ComputeNode {
|
|||||||
|
|
||||||
// exit loop
|
// exit loop
|
||||||
ComputeStatus::Failed
|
ComputeStatus::Failed
|
||||||
| ComputeStatus::TerminationPending { .. }
|
| ComputeStatus::TerminationPendingFast
|
||||||
|
| ComputeStatus::TerminationPendingImmediate
|
||||||
| ComputeStatus::Terminated => break 'cert_update,
|
| ComputeStatus::Terminated => break 'cert_update,
|
||||||
|
|
||||||
// wait
|
// wait
|
||||||
@@ -2087,7 +2209,7 @@ LIMIT 100",
|
|||||||
self.params
|
self.params
|
||||||
.remote_ext_base_url
|
.remote_ext_base_url
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.ok_or(DownloadError::BadInput(anyhow!(
|
.ok_or(DownloadError::BadInput(anyhow::anyhow!(
|
||||||
"Remote extensions storage is not configured",
|
"Remote extensions storage is not configured",
|
||||||
)))?;
|
)))?;
|
||||||
|
|
||||||
@@ -2283,7 +2405,7 @@ LIMIT 100",
|
|||||||
let remote_extensions = spec
|
let remote_extensions = spec
|
||||||
.remote_extensions
|
.remote_extensions
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.ok_or(anyhow!("Remote extensions are not configured"))?;
|
.ok_or(anyhow::anyhow!("Remote extensions are not configured"))?;
|
||||||
|
|
||||||
info!("parse shared_preload_libraries from spec.cluster.settings");
|
info!("parse shared_preload_libraries from spec.cluster.settings");
|
||||||
let mut libs_vec = Vec::new();
|
let mut libs_vec = Vec::new();
|
||||||
@@ -2431,14 +2553,31 @@ LIMIT 100",
|
|||||||
pub fn spawn_lfc_offload_task(self: &Arc<Self>, interval: Duration) {
|
pub fn spawn_lfc_offload_task(self: &Arc<Self>, interval: Duration) {
|
||||||
self.terminate_lfc_offload_task();
|
self.terminate_lfc_offload_task();
|
||||||
let secs = interval.as_secs();
|
let secs = interval.as_secs();
|
||||||
info!("spawning lfc offload worker with {secs}s interval");
|
|
||||||
let this = self.clone();
|
let this = self.clone();
|
||||||
|
|
||||||
|
info!("spawning LFC offload worker with {secs}s interval");
|
||||||
let handle = spawn(async move {
|
let handle = spawn(async move {
|
||||||
let mut interval = time::interval(interval);
|
let mut interval = time::interval(interval);
|
||||||
interval.tick().await; // returns immediately
|
interval.tick().await; // returns immediately
|
||||||
loop {
|
loop {
|
||||||
interval.tick().await;
|
interval.tick().await;
|
||||||
this.offload_lfc_async().await;
|
|
||||||
|
let prewarm_state = this.state.lock().unwrap().lfc_prewarm_state.clone();
|
||||||
|
// Do not offload LFC state if we are currently prewarming or any issue occurred.
|
||||||
|
// If we'd do that, we might override the LFC state in endpoint storage with some
|
||||||
|
// incomplete state. Imagine a situation:
|
||||||
|
// 1. Endpoint started with `autoprewarm: true`
|
||||||
|
// 2. While prewarming is not completed, we upload the new incomplete state
|
||||||
|
// 3. Compute gets interrupted and restarts
|
||||||
|
// 4. We start again and try to prewarm with the state from 2. instead of the previous complete state
|
||||||
|
if matches!(
|
||||||
|
prewarm_state,
|
||||||
|
LfcPrewarmState::Completed
|
||||||
|
| LfcPrewarmState::NotPrewarmed
|
||||||
|
| LfcPrewarmState::Skipped
|
||||||
|
) {
|
||||||
|
this.offload_lfc_async().await;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
*self.lfc_offload_task.lock().unwrap() = Some(handle);
|
*self.lfc_offload_task.lock().unwrap() = Some(handle);
|
||||||
@@ -2455,19 +2594,11 @@ LIMIT 100",
|
|||||||
// If the value is -1, we never suspend so set the value to default collection.
|
// If the value is -1, we never suspend so set the value to default collection.
|
||||||
// If the value is 0, it means default, we will just continue to use the default.
|
// If the value is 0, it means default, we will just continue to use the default.
|
||||||
if spec.suspend_timeout_seconds == -1 || spec.suspend_timeout_seconds == 0 {
|
if spec.suspend_timeout_seconds == -1 || spec.suspend_timeout_seconds == 0 {
|
||||||
info!(
|
|
||||||
"[NEON_EXT_INT_UPD] Spec Timeout: {}, New Timeout: {}",
|
|
||||||
spec.suspend_timeout_seconds, DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL
|
|
||||||
);
|
|
||||||
self.params.installed_extensions_collection_interval.store(
|
self.params.installed_extensions_collection_interval.store(
|
||||||
DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL,
|
DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL,
|
||||||
std::sync::atomic::Ordering::SeqCst,
|
std::sync::atomic::Ordering::SeqCst,
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
info!(
|
|
||||||
"[NEON_EXT_INT_UPD] Spec Timeout: {}",
|
|
||||||
spec.suspend_timeout_seconds
|
|
||||||
);
|
|
||||||
self.params.installed_extensions_collection_interval.store(
|
self.params.installed_extensions_collection_interval.store(
|
||||||
spec.suspend_timeout_seconds as u64,
|
spec.suspend_timeout_seconds as u64,
|
||||||
std::sync::atomic::Ordering::SeqCst,
|
std::sync::atomic::Ordering::SeqCst,
|
||||||
@@ -2485,7 +2616,7 @@ pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
|
|||||||
serde_json::to_string(&extensions).expect("failed to serialize extensions list")
|
serde_json::to_string(&extensions).expect("failed to serialize extensions list")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Err(err) => error!("could not get installed extensions: {err:?}"),
|
Err(err) => error!("could not get installed extensions: {err}"),
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -2598,7 +2729,10 @@ mod tests {
|
|||||||
|
|
||||||
match ParsedSpec::try_from(spec.clone()) {
|
match ParsedSpec::try_from(spec.clone()) {
|
||||||
Ok(_p) => panic!("Failed to detect duplicate entry"),
|
Ok(_p) => panic!("Failed to detect duplicate entry"),
|
||||||
Err(e) => assert!(e.starts_with("duplicate entry in safekeeper_connstrings:")),
|
Err(e) => assert!(
|
||||||
|
e.to_string()
|
||||||
|
.starts_with("duplicate entry in safekeeper_connstrings:")
|
||||||
|
),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ impl ComputeNode {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
let row = match client
|
let row = match client
|
||||||
.query_one("select * from get_prewarm_info()", &[])
|
.query_one("select * from neon.get_prewarm_info()", &[])
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(row) => row,
|
Ok(row) => row,
|
||||||
@@ -89,7 +89,7 @@ impl ComputeNode {
|
|||||||
self.state.lock().unwrap().lfc_offload_state.clone()
|
self.state.lock().unwrap().lfc_offload_state.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If there is a prewarm request ongoing, return false, true otherwise
|
/// If there is a prewarm request ongoing, return `false`, `true` otherwise.
|
||||||
pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
|
pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
|
||||||
{
|
{
|
||||||
let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
|
let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
|
||||||
@@ -101,14 +101,25 @@ impl ComputeNode {
|
|||||||
|
|
||||||
let cloned = self.clone();
|
let cloned = self.clone();
|
||||||
spawn(async move {
|
spawn(async move {
|
||||||
let Err(err) = cloned.prewarm_impl(from_endpoint).await else {
|
let state = match cloned.prewarm_impl(from_endpoint).await {
|
||||||
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Completed;
|
Ok(true) => LfcPrewarmState::Completed,
|
||||||
return;
|
Ok(false) => {
|
||||||
};
|
info!(
|
||||||
error!(%err);
|
"skipping LFC prewarm because LFC state is not found in endpoint storage"
|
||||||
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Failed {
|
);
|
||||||
error: err.to_string(),
|
LfcPrewarmState::Skipped
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
crate::metrics::LFC_PREWARM_ERRORS.inc();
|
||||||
|
error!(%err, "could not prewarm LFC");
|
||||||
|
|
||||||
|
LfcPrewarmState::Failed {
|
||||||
|
error: err.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
cloned.state.lock().unwrap().lfc_prewarm_state = state;
|
||||||
});
|
});
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
@@ -119,15 +130,21 @@ impl ComputeNode {
|
|||||||
EndpointStoragePair::from_spec_and_endpoint(state.pspec.as_ref().unwrap(), from_endpoint)
|
EndpointStoragePair::from_spec_and_endpoint(state.pspec.as_ref().unwrap(), from_endpoint)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<()> {
|
/// Request LFC state from endpoint storage and load corresponding pages into Postgres.
|
||||||
|
/// Returns a result with `false` if the LFC state is not found in endpoint storage.
|
||||||
|
async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<bool> {
|
||||||
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(from_endpoint)?;
|
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(from_endpoint)?;
|
||||||
info!(%url, "requesting LFC state from endpoint storage");
|
|
||||||
|
|
||||||
|
info!(%url, "requesting LFC state from endpoint storage");
|
||||||
let request = Client::new().get(&url).bearer_auth(token);
|
let request = Client::new().get(&url).bearer_auth(token);
|
||||||
let res = request.send().await.context("querying endpoint storage")?;
|
let res = request.send().await.context("querying endpoint storage")?;
|
||||||
let status = res.status();
|
let status = res.status();
|
||||||
if status != StatusCode::OK {
|
match status {
|
||||||
bail!("{status} querying endpoint storage")
|
StatusCode::OK => (),
|
||||||
|
StatusCode::NOT_FOUND => {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
_ => bail!("{status} querying endpoint storage"),
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut uncompressed = Vec::new();
|
let mut uncompressed = Vec::new();
|
||||||
@@ -140,15 +157,18 @@ impl ComputeNode {
|
|||||||
.await
|
.await
|
||||||
.context("decoding LFC state")?;
|
.context("decoding LFC state")?;
|
||||||
let uncompressed_len = uncompressed.len();
|
let uncompressed_len = uncompressed.len();
|
||||||
info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into postgres");
|
|
||||||
|
info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into Postgres");
|
||||||
|
|
||||||
ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
||||||
.await
|
.await
|
||||||
.context("connecting to postgres")?
|
.context("connecting to postgres")?
|
||||||
.query_one("select prewarm_local_cache($1)", &[&uncompressed])
|
.query_one("select neon.prewarm_local_cache($1)", &[&uncompressed])
|
||||||
.await
|
.await
|
||||||
.context("loading LFC state into postgres")
|
.context("loading LFC state into postgres")
|
||||||
.map(|_| ())
|
.map(|_| ())?;
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If offload request is ongoing, return false, true otherwise
|
/// If offload request is ongoing, return false, true otherwise
|
||||||
@@ -176,11 +196,14 @@ impl ComputeNode {
|
|||||||
|
|
||||||
async fn offload_lfc_with_state_update(&self) {
|
async fn offload_lfc_with_state_update(&self) {
|
||||||
crate::metrics::LFC_OFFLOADS.inc();
|
crate::metrics::LFC_OFFLOADS.inc();
|
||||||
|
|
||||||
let Err(err) = self.offload_lfc_impl().await else {
|
let Err(err) = self.offload_lfc_impl().await else {
|
||||||
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
|
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
error!(%err);
|
|
||||||
|
crate::metrics::LFC_OFFLOAD_ERRORS.inc();
|
||||||
|
error!(%err, "could not offload LFC state to endpoint storage");
|
||||||
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
|
self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
|
||||||
error: err.to_string(),
|
error: err.to_string(),
|
||||||
};
|
};
|
||||||
@@ -188,13 +211,13 @@ impl ComputeNode {
|
|||||||
|
|
||||||
async fn offload_lfc_impl(&self) -> Result<()> {
|
async fn offload_lfc_impl(&self) -> Result<()> {
|
||||||
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;
|
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;
|
||||||
info!(%url, "requesting LFC state from postgres");
|
info!(%url, "requesting LFC state from Postgres");
|
||||||
|
|
||||||
let mut compressed = Vec::new();
|
let mut compressed = Vec::new();
|
||||||
ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
||||||
.await
|
.await
|
||||||
.context("connecting to postgres")?
|
.context("connecting to postgres")?
|
||||||
.query_one("select get_local_cache_state()", &[])
|
.query_one("select neon.get_local_cache_state()", &[])
|
||||||
.await
|
.await
|
||||||
.context("querying LFC state")?
|
.context("querying LFC state")?
|
||||||
.try_get::<usize, &[u8]>(0)
|
.try_get::<usize, &[u8]>(0)
|
||||||
@@ -203,13 +226,17 @@ impl ComputeNode {
|
|||||||
.read_to_end(&mut compressed)
|
.read_to_end(&mut compressed)
|
||||||
.await
|
.await
|
||||||
.context("compressing LFC state")?;
|
.context("compressing LFC state")?;
|
||||||
|
|
||||||
let compressed_len = compressed.len();
|
let compressed_len = compressed.len();
|
||||||
info!(%url, "downloaded LFC state, compressed size {compressed_len}, writing to endpoint storage");
|
info!(%url, "downloaded LFC state, compressed size {compressed_len}, writing to endpoint storage");
|
||||||
|
|
||||||
let request = Client::new().put(url).bearer_auth(token).body(compressed);
|
let request = Client::new().put(url).bearer_auth(token).body(compressed);
|
||||||
match request.send().await {
|
match request.send().await {
|
||||||
Ok(res) if res.status() == StatusCode::OK => Ok(()),
|
Ok(res) if res.status() == StatusCode::OK => Ok(()),
|
||||||
Ok(res) => bail!("Error writing to endpoint storage: {}", res.status()),
|
Ok(res) => bail!(
|
||||||
|
"Request to endpoint storage failed with status: {}",
|
||||||
|
res.status()
|
||||||
|
),
|
||||||
Err(err) => Err(err).context("writing to endpoint storage"),
|
Err(err) => Err(err).context("writing to endpoint storage"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
132
compute_tools/src/compute_promote.rs
Normal file
132
compute_tools/src/compute_promote.rs
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
use crate::compute::ComputeNode;
|
||||||
|
use anyhow::{Context, Result, bail};
|
||||||
|
use compute_api::{
|
||||||
|
responses::{LfcPrewarmState, PromoteState, SafekeepersLsn},
|
||||||
|
spec::ComputeMode,
|
||||||
|
};
|
||||||
|
use std::{sync::Arc, time::Duration};
|
||||||
|
use tokio::time::sleep;
|
||||||
|
use utils::lsn::Lsn;
|
||||||
|
|
||||||
|
impl ComputeNode {
|
||||||
|
/// Returns only when promote fails or succeeds. If a network error occurs
|
||||||
|
/// and http client disconnects, this does not stop promotion, and subsequent
|
||||||
|
/// calls block until promote finishes.
|
||||||
|
/// Called by control plane on secondary after primary endpoint is terminated
|
||||||
|
pub async fn promote(self: &Arc<Self>, safekeepers_lsn: SafekeepersLsn) -> PromoteState {
|
||||||
|
let cloned = self.clone();
|
||||||
|
let start_promotion = || {
|
||||||
|
let (tx, rx) = tokio::sync::watch::channel(PromoteState::NotPromoted);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
tx.send(match cloned.promote_impl(safekeepers_lsn).await {
|
||||||
|
Ok(_) => PromoteState::Completed,
|
||||||
|
Err(err) => {
|
||||||
|
tracing::error!(%err, "promoting");
|
||||||
|
PromoteState::Failed {
|
||||||
|
error: err.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
});
|
||||||
|
rx
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut task;
|
||||||
|
// self.state is unlocked after block ends so we lock it in promote_impl
|
||||||
|
// and task.changed() is reached
|
||||||
|
{
|
||||||
|
task = self
|
||||||
|
.state
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.promote_state
|
||||||
|
.get_or_insert_with(start_promotion)
|
||||||
|
.clone()
|
||||||
|
}
|
||||||
|
task.changed().await.expect("promote sender dropped");
|
||||||
|
task.borrow().clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Why do we have to supply safekeepers?
|
||||||
|
// For secondary we use primary_connection_conninfo so safekeepers field is empty
|
||||||
|
async fn promote_impl(&self, safekeepers_lsn: SafekeepersLsn) -> Result<()> {
|
||||||
|
{
|
||||||
|
let state = self.state.lock().unwrap();
|
||||||
|
let mode = &state.pspec.as_ref().unwrap().spec.mode;
|
||||||
|
if *mode != ComputeMode::Replica {
|
||||||
|
bail!("{} is not replica", mode.to_type_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// we don't need to query Postgres so not self.lfc_prewarm_state()
|
||||||
|
match &state.lfc_prewarm_state {
|
||||||
|
LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming => {
|
||||||
|
bail!("prewarm not requested or pending")
|
||||||
|
}
|
||||||
|
LfcPrewarmState::Failed { error } => {
|
||||||
|
tracing::warn!(%error, "replica prewarm failed")
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
||||||
|
.await
|
||||||
|
.context("connecting to postgres")?;
|
||||||
|
|
||||||
|
let primary_lsn = safekeepers_lsn.wal_flush_lsn;
|
||||||
|
let mut last_wal_replay_lsn: Lsn = Lsn::INVALID;
|
||||||
|
const RETRIES: i32 = 20;
|
||||||
|
for i in 0..=RETRIES {
|
||||||
|
let row = client
|
||||||
|
.query_one("SELECT pg_last_wal_replay_lsn()", &[])
|
||||||
|
.await
|
||||||
|
.context("getting last replay lsn")?;
|
||||||
|
let lsn: u64 = row.get::<usize, postgres_types::PgLsn>(0).into();
|
||||||
|
last_wal_replay_lsn = lsn.into();
|
||||||
|
if last_wal_replay_lsn >= primary_lsn {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
tracing::info!("Try {i}, replica lsn {last_wal_replay_lsn}, primary lsn {primary_lsn}");
|
||||||
|
sleep(Duration::from_secs(1)).await;
|
||||||
|
}
|
||||||
|
if last_wal_replay_lsn < primary_lsn {
|
||||||
|
bail!("didn't catch up with primary in {RETRIES} retries");
|
||||||
|
}
|
||||||
|
|
||||||
|
// using $1 doesn't work with ALTER SYSTEM SET
|
||||||
|
let safekeepers_sql = format!(
|
||||||
|
"ALTER SYSTEM SET neon.safekeepers='{}'",
|
||||||
|
safekeepers_lsn.safekeepers
|
||||||
|
);
|
||||||
|
client
|
||||||
|
.query(&safekeepers_sql, &[])
|
||||||
|
.await
|
||||||
|
.context("setting safekeepers")?;
|
||||||
|
client
|
||||||
|
.query("SELECT pg_reload_conf()", &[])
|
||||||
|
.await
|
||||||
|
.context("reloading postgres config")?;
|
||||||
|
let row = client
|
||||||
|
.query_one("SELECT * FROM pg_promote()", &[])
|
||||||
|
.await
|
||||||
|
.context("pg_promote")?;
|
||||||
|
if !row.get::<usize, bool>(0) {
|
||||||
|
bail!("pg_promote() returned false");
|
||||||
|
}
|
||||||
|
|
||||||
|
let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
|
||||||
|
.await
|
||||||
|
.context("connecting to postgres")?;
|
||||||
|
let row = client
|
||||||
|
.query_one("SHOW transaction_read_only", &[])
|
||||||
|
.await
|
||||||
|
.context("getting transaction_read_only")?;
|
||||||
|
if row.get::<usize, &str>(0) == "on" {
|
||||||
|
bail!("replica in read only mode after promotion");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut state = self.state.lock().unwrap();
|
||||||
|
state.pspec.as_mut().unwrap().spec.mode = ComputeMode::Primary;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -9,11 +9,14 @@ use std::path::Path;
|
|||||||
use compute_api::responses::TlsConfig;
|
use compute_api::responses::TlsConfig;
|
||||||
use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
|
use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};
|
||||||
|
|
||||||
|
use crate::compute::ComputeNodeParams;
|
||||||
use crate::pg_helpers::{
|
use crate::pg_helpers::{
|
||||||
GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
|
GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
|
||||||
};
|
};
|
||||||
use crate::tls::{self, SERVER_CRT, SERVER_KEY};
|
use crate::tls::{self, SERVER_CRT, SERVER_KEY};
|
||||||
|
|
||||||
|
use utils::shard::{ShardIndex, ShardNumber};
|
||||||
|
|
||||||
/// Check that `line` is inside a text file and put it there if it is not.
|
/// Check that `line` is inside a text file and put it there if it is not.
|
||||||
/// Create file if it doesn't exist.
|
/// Create file if it doesn't exist.
|
||||||
pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
|
pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
|
||||||
@@ -41,6 +44,7 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
|
|||||||
/// Create or completely rewrite configuration file specified by `path`
|
/// Create or completely rewrite configuration file specified by `path`
|
||||||
pub fn write_postgres_conf(
|
pub fn write_postgres_conf(
|
||||||
pgdata_path: &Path,
|
pgdata_path: &Path,
|
||||||
|
params: &ComputeNodeParams,
|
||||||
spec: &ComputeSpec,
|
spec: &ComputeSpec,
|
||||||
extension_server_port: u16,
|
extension_server_port: u16,
|
||||||
tls_config: &Option<TlsConfig>,
|
tls_config: &Option<TlsConfig>,
|
||||||
@@ -56,24 +60,53 @@ pub fn write_postgres_conf(
|
|||||||
|
|
||||||
// Add options for connecting to storage
|
// Add options for connecting to storage
|
||||||
writeln!(file, "# Neon storage settings")?;
|
writeln!(file, "# Neon storage settings")?;
|
||||||
|
writeln!(file)?;
|
||||||
if let Some(conninfo) = &spec.pageserver_connection_info {
|
if let Some(conninfo) = &spec.pageserver_connection_info {
|
||||||
|
// Stripe size GUC should be defined prior to connection string
|
||||||
|
if let Some(stripe_size) = conninfo.stripe_size {
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"# from compute spec's pageserver_conninfo.stripe_size field"
|
||||||
|
)?;
|
||||||
|
writeln!(file, "neon.stripe_size={stripe_size}")?;
|
||||||
|
}
|
||||||
|
|
||||||
let mut libpq_urls: Option<Vec<String>> = Some(Vec::new());
|
let mut libpq_urls: Option<Vec<String>> = Some(Vec::new());
|
||||||
let mut grpc_urls: Option<Vec<String>> = Some(Vec::new());
|
let mut grpc_urls: Option<Vec<String>> = Some(Vec::new());
|
||||||
|
let num_shards = if conninfo.shard_count.0 == 0 {
|
||||||
|
1 // unsharded, treat it as a single shard
|
||||||
|
} else {
|
||||||
|
conninfo.shard_count.0
|
||||||
|
};
|
||||||
|
|
||||||
for shardno in 0..conninfo.shards.len() {
|
for shard_number in 0..num_shards {
|
||||||
let info = conninfo.shards.get(&(shardno as u32)).ok_or_else(|| {
|
let shard_index = ShardIndex {
|
||||||
anyhow::anyhow!("shard {shardno} missing from pageserver_connection_info shard map")
|
shard_number: ShardNumber(shard_number),
|
||||||
|
shard_count: conninfo.shard_count,
|
||||||
|
};
|
||||||
|
let info = conninfo.shards.get(&shard_index).ok_or_else(|| {
|
||||||
|
anyhow::anyhow!(
|
||||||
|
"shard {shard_index} missing from pageserver_connection_info shard map"
|
||||||
|
)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
if let Some(url) = &info.libpq_url {
|
let first_pageserver = info
|
||||||
|
.pageservers
|
||||||
|
.first()
|
||||||
|
.expect("must have at least one pageserver");
|
||||||
|
|
||||||
|
// Add the libpq URL to the array, or if the URL is missing, reset the array
|
||||||
|
// forgetting any previous entries. All servers must have a libpq URL, or none
|
||||||
|
// at all.
|
||||||
|
if let Some(url) = &first_pageserver.libpq_url {
|
||||||
if let Some(ref mut urls) = libpq_urls {
|
if let Some(ref mut urls) = libpq_urls {
|
||||||
urls.push(url.clone());
|
urls.push(url.clone());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
libpq_urls = None
|
libpq_urls = None
|
||||||
}
|
}
|
||||||
if let Some(url) = &info.grpc_url {
|
// Similarly for gRPC URLs
|
||||||
|
if let Some(url) = &first_pageserver.grpc_url {
|
||||||
if let Some(ref mut urls) = grpc_urls {
|
if let Some(ref mut urls) = grpc_urls {
|
||||||
urls.push(url.clone());
|
urls.push(url.clone());
|
||||||
}
|
}
|
||||||
@@ -82,6 +115,10 @@ pub fn write_postgres_conf(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if let Some(libpq_urls) = libpq_urls {
|
if let Some(libpq_urls) = libpq_urls {
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"# derived from compute spec's pageserver_conninfo field"
|
||||||
|
)?;
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"neon.pageserver_connstring={}",
|
"neon.pageserver_connstring={}",
|
||||||
@@ -91,6 +128,10 @@ pub fn write_postgres_conf(
|
|||||||
writeln!(file, "# no neon.pageserver_connstring")?;
|
writeln!(file, "# no neon.pageserver_connstring")?;
|
||||||
}
|
}
|
||||||
if let Some(grpc_urls) = grpc_urls {
|
if let Some(grpc_urls) = grpc_urls {
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"# derived from compute spec's pageserver_conninfo field"
|
||||||
|
)?;
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"neon.pageserver_grpc_urls={}",
|
"neon.pageserver_grpc_urls={}",
|
||||||
@@ -99,11 +140,19 @@ pub fn write_postgres_conf(
|
|||||||
} else {
|
} else {
|
||||||
writeln!(file, "# no neon.pageserver_grpc_urls")?;
|
writeln!(file, "# no neon.pageserver_grpc_urls")?;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// Stripe size GUC should be defined prior to connection string
|
||||||
|
if let Some(stripe_size) = spec.shard_stripe_size {
|
||||||
|
writeln!(file, "# from compute spec's shard_stripe_size field")?;
|
||||||
|
writeln!(file, "neon.stripe_size={stripe_size}")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(s) = &spec.pageserver_connstring {
|
||||||
|
writeln!(file, "# from compute spec's pageserver_connstring field")?;
|
||||||
|
writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(stripe_size) = spec.shard_stripe_size {
|
|
||||||
writeln!(file, "neon.stripe_size={stripe_size}")?;
|
|
||||||
}
|
|
||||||
if !spec.safekeeper_connstrings.is_empty() {
|
if !spec.safekeeper_connstrings.is_empty() {
|
||||||
let mut neon_safekeepers_value = String::new();
|
let mut neon_safekeepers_value = String::new();
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
@@ -203,6 +252,12 @@ pub fn write_postgres_conf(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"neon.privileged_role_name={}",
|
||||||
|
escape_conf_value(params.privileged_role_name.as_str())
|
||||||
|
)?;
|
||||||
|
|
||||||
// If there are any extra options in the 'settings' field, append those
|
// If there are any extra options in the 'settings' field, append those
|
||||||
if spec.cluster.settings.is_some() {
|
if spec.cluster.settings.is_some() {
|
||||||
writeln!(file, "# Managed by compute_ctl: begin")?;
|
writeln!(file, "# Managed by compute_ctl: begin")?;
|
||||||
|
|||||||
@@ -83,6 +83,87 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
$ref: "#/components/schemas/DbsAndRoles"
|
$ref: "#/components/schemas/DbsAndRoles"
|
||||||
|
|
||||||
|
/promote:
|
||||||
|
post:
|
||||||
|
tags:
|
||||||
|
- Promotion
|
||||||
|
summary: Promote secondary replica to primary
|
||||||
|
description: ""
|
||||||
|
operationId: promoteReplica
|
||||||
|
requestBody:
|
||||||
|
description: Promote requests data
|
||||||
|
required: true
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/SafekeepersLsn"
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: Promote succeeded or wasn't started
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/PromoteState"
|
||||||
|
500:
|
||||||
|
description: Promote failed
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/PromoteState"
|
||||||
|
|
||||||
|
/lfc/prewarm:
|
||||||
|
post:
|
||||||
|
summary: Request LFC Prewarm
|
||||||
|
parameters:
|
||||||
|
- name: from_endpoint
|
||||||
|
in: query
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
description: ""
|
||||||
|
operationId: lfcPrewarm
|
||||||
|
responses:
|
||||||
|
202:
|
||||||
|
description: LFC prewarm started
|
||||||
|
429:
|
||||||
|
description: LFC prewarm ongoing
|
||||||
|
get:
|
||||||
|
tags:
|
||||||
|
- Prewarm
|
||||||
|
summary: Get LFC prewarm state
|
||||||
|
description: ""
|
||||||
|
operationId: getLfcPrewarmState
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: Prewarm state
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/LfcPrewarmState"
|
||||||
|
|
||||||
|
/lfc/offload:
|
||||||
|
post:
|
||||||
|
summary: Request LFC offload
|
||||||
|
description: ""
|
||||||
|
operationId: lfcOffload
|
||||||
|
responses:
|
||||||
|
202:
|
||||||
|
description: LFC offload started
|
||||||
|
429:
|
||||||
|
description: LFC offload ongoing
|
||||||
|
get:
|
||||||
|
tags:
|
||||||
|
- Prewarm
|
||||||
|
summary: Get LFC offloading state
|
||||||
|
description: ""
|
||||||
|
operationId: getLfcOffloadState
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: Offload state
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/LfcOffloadState"
|
||||||
|
|
||||||
/database_schema:
|
/database_schema:
|
||||||
get:
|
get:
|
||||||
tags:
|
tags:
|
||||||
@@ -290,9 +371,28 @@ paths:
|
|||||||
summary: Terminate Postgres and wait for it to exit
|
summary: Terminate Postgres and wait for it to exit
|
||||||
description: ""
|
description: ""
|
||||||
operationId: terminate
|
operationId: terminate
|
||||||
|
parameters:
|
||||||
|
- name: mode
|
||||||
|
in: query
|
||||||
|
description: "Terminate mode: fast (wait 30s before returning) and immediate"
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
enum: ["fast", "immediate"]
|
||||||
|
default: fast
|
||||||
responses:
|
responses:
|
||||||
200:
|
200:
|
||||||
description: Result
|
description: Result
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/TerminateResponse"
|
||||||
|
201:
|
||||||
|
description: Result if compute is already terminated
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: "#/components/schemas/TerminateResponse"
|
||||||
412:
|
412:
|
||||||
description: "wrong state"
|
description: "wrong state"
|
||||||
content:
|
content:
|
||||||
@@ -335,15 +435,6 @@ components:
|
|||||||
total_startup_ms:
|
total_startup_ms:
|
||||||
type: integer
|
type: integer
|
||||||
|
|
||||||
Info:
|
|
||||||
type: object
|
|
||||||
description: Information about VM/Pod.
|
|
||||||
required:
|
|
||||||
- num_cpus
|
|
||||||
properties:
|
|
||||||
num_cpus:
|
|
||||||
type: integer
|
|
||||||
|
|
||||||
DbsAndRoles:
|
DbsAndRoles:
|
||||||
type: object
|
type: object
|
||||||
description: Databases and Roles
|
description: Databases and Roles
|
||||||
@@ -458,11 +549,14 @@ components:
|
|||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
- empty
|
- empty
|
||||||
- init
|
|
||||||
- failed
|
|
||||||
- running
|
|
||||||
- configuration_pending
|
- configuration_pending
|
||||||
|
- init
|
||||||
|
- running
|
||||||
- configuration
|
- configuration
|
||||||
|
- failed
|
||||||
|
- termination_pending_fast
|
||||||
|
- termination_pending_immediate
|
||||||
|
- terminated
|
||||||
example: running
|
example: running
|
||||||
|
|
||||||
ExtensionInstallRequest:
|
ExtensionInstallRequest:
|
||||||
@@ -497,25 +591,69 @@ components:
|
|||||||
type: string
|
type: string
|
||||||
example: "1.0.0"
|
example: "1.0.0"
|
||||||
|
|
||||||
InstalledExtensions:
|
SafekeepersLsn:
|
||||||
type: object
|
type: object
|
||||||
|
required:
|
||||||
|
- safekeepers
|
||||||
|
- wal_flush_lsn
|
||||||
properties:
|
properties:
|
||||||
extensions:
|
safekeepers:
|
||||||
description: Contains list of installed extensions.
|
description: Primary replica safekeepers
|
||||||
type: array
|
type: string
|
||||||
items:
|
wal_flush_lsn:
|
||||||
type: object
|
description: Primary last WAL flush LSN
|
||||||
properties:
|
type: string
|
||||||
extname:
|
|
||||||
type: string
|
LfcPrewarmState:
|
||||||
version:
|
type: object
|
||||||
type: string
|
required:
|
||||||
items:
|
- status
|
||||||
type: string
|
- total
|
||||||
n_databases:
|
- prewarmed
|
||||||
type: integer
|
- skipped
|
||||||
owned_by_superuser:
|
properties:
|
||||||
type: integer
|
status:
|
||||||
|
description: LFC prewarm status
|
||||||
|
enum: [not_prewarmed, prewarming, completed, failed, skipped]
|
||||||
|
type: string
|
||||||
|
error:
|
||||||
|
description: LFC prewarm error, if any
|
||||||
|
type: string
|
||||||
|
total:
|
||||||
|
description: Total pages processed
|
||||||
|
type: integer
|
||||||
|
prewarmed:
|
||||||
|
description: Total pages prewarmed
|
||||||
|
type: integer
|
||||||
|
skipped:
|
||||||
|
description: Pages processed but not prewarmed
|
||||||
|
type: integer
|
||||||
|
|
||||||
|
LfcOffloadState:
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- status
|
||||||
|
properties:
|
||||||
|
status:
|
||||||
|
description: LFC offload status
|
||||||
|
enum: [not_offloaded, offloading, completed, failed]
|
||||||
|
type: string
|
||||||
|
error:
|
||||||
|
description: LFC offload error, if any
|
||||||
|
type: string
|
||||||
|
|
||||||
|
PromoteState:
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- status
|
||||||
|
properties:
|
||||||
|
status:
|
||||||
|
description: Promote result
|
||||||
|
enum: [not_promoted, completed, failed]
|
||||||
|
type: string
|
||||||
|
error:
|
||||||
|
description: Promote error, if any
|
||||||
|
type: string
|
||||||
|
|
||||||
SetRoleGrantsRequest:
|
SetRoleGrantsRequest:
|
||||||
type: object
|
type: object
|
||||||
@@ -544,6 +682,17 @@ components:
|
|||||||
description: Role name.
|
description: Role name.
|
||||||
example: "neon"
|
example: "neon"
|
||||||
|
|
||||||
|
TerminateResponse:
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- lsn
|
||||||
|
properties:
|
||||||
|
lsn:
|
||||||
|
type: string
|
||||||
|
nullable: true
|
||||||
|
description: "last WAL flush LSN"
|
||||||
|
example: "0/028F10D8"
|
||||||
|
|
||||||
SetRoleGrantsResponse:
|
SetRoleGrantsResponse:
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ pub(in crate::http) mod insights;
|
|||||||
pub(in crate::http) mod lfc;
|
pub(in crate::http) mod lfc;
|
||||||
pub(in crate::http) mod metrics;
|
pub(in crate::http) mod metrics;
|
||||||
pub(in crate::http) mod metrics_json;
|
pub(in crate::http) mod metrics_json;
|
||||||
|
pub(in crate::http) mod promote;
|
||||||
pub(in crate::http) mod status;
|
pub(in crate::http) mod status;
|
||||||
pub(in crate::http) mod terminate;
|
pub(in crate::http) mod terminate;
|
||||||
|
|
||||||
|
|||||||
14
compute_tools/src/http/routes/promote.rs
Normal file
14
compute_tools/src/http/routes/promote.rs
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
use crate::http::JsonResponse;
|
||||||
|
use axum::Form;
|
||||||
|
use http::StatusCode;
|
||||||
|
|
||||||
|
pub(in crate::http) async fn promote(
|
||||||
|
compute: axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>,
|
||||||
|
Form(safekeepers_lsn): Form<compute_api::responses::SafekeepersLsn>,
|
||||||
|
) -> axum::response::Response {
|
||||||
|
let state = compute.promote(safekeepers_lsn).await;
|
||||||
|
if let compute_api::responses::PromoteState::Failed { error } = state {
|
||||||
|
return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, error);
|
||||||
|
}
|
||||||
|
JsonResponse::success(StatusCode::OK, state)
|
||||||
|
}
|
||||||
@@ -3,7 +3,7 @@ use crate::http::JsonResponse;
|
|||||||
use axum::extract::State;
|
use axum::extract::State;
|
||||||
use axum::response::Response;
|
use axum::response::Response;
|
||||||
use axum_extra::extract::OptionalQuery;
|
use axum_extra::extract::OptionalQuery;
|
||||||
use compute_api::responses::{ComputeStatus, TerminateResponse};
|
use compute_api::responses::{ComputeStatus, TerminateMode, TerminateResponse};
|
||||||
use http::StatusCode;
|
use http::StatusCode;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -12,7 +12,7 @@ use tracing::info;
|
|||||||
|
|
||||||
#[derive(Deserialize, Default)]
|
#[derive(Deserialize, Default)]
|
||||||
pub struct TerminateQuery {
|
pub struct TerminateQuery {
|
||||||
mode: compute_api::responses::TerminateMode,
|
mode: TerminateMode,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Terminate the compute.
|
/// Terminate the compute.
|
||||||
@@ -24,16 +24,16 @@ pub(in crate::http) async fn terminate(
|
|||||||
{
|
{
|
||||||
let mut state = compute.state.lock().unwrap();
|
let mut state = compute.state.lock().unwrap();
|
||||||
if state.status == ComputeStatus::Terminated {
|
if state.status == ComputeStatus::Terminated {
|
||||||
return JsonResponse::success(StatusCode::CREATED, state.terminate_flush_lsn);
|
let response = TerminateResponse {
|
||||||
|
lsn: state.terminate_flush_lsn,
|
||||||
|
};
|
||||||
|
return JsonResponse::success(StatusCode::CREATED, response);
|
||||||
}
|
}
|
||||||
|
|
||||||
if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
|
if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
|
||||||
return JsonResponse::invalid_status(state.status);
|
return JsonResponse::invalid_status(state.status);
|
||||||
}
|
}
|
||||||
state.set_status(
|
state.set_status(mode.into(), &compute.state_changed);
|
||||||
ComputeStatus::TerminationPending { mode },
|
|
||||||
&compute.state_changed,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
forward_termination_signal(false);
|
forward_termination_signal(false);
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ use super::{
|
|||||||
middleware::authorize::Authorize,
|
middleware::authorize::Authorize,
|
||||||
routes::{
|
routes::{
|
||||||
check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
|
check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
|
||||||
grants, insights, lfc, metrics, metrics_json, status, terminate,
|
grants, insights, lfc, metrics, metrics_json, promote, status, terminate,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use crate::compute::ComputeNode;
|
use crate::compute::ComputeNode;
|
||||||
@@ -87,6 +87,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
|
|||||||
let authenticated_router = Router::<Arc<ComputeNode>>::new()
|
let authenticated_router = Router::<Arc<ComputeNode>>::new()
|
||||||
.route("/lfc/prewarm", get(lfc::prewarm_state).post(lfc::prewarm))
|
.route("/lfc/prewarm", get(lfc::prewarm_state).post(lfc::prewarm))
|
||||||
.route("/lfc/offload", get(lfc::offload_state).post(lfc::offload))
|
.route("/lfc/offload", get(lfc::offload_state).post(lfc::offload))
|
||||||
|
.route("/promote", post(promote::promote))
|
||||||
.route("/check_writability", post(check_writability::is_writable))
|
.route("/check_writability", post(check_writability::is_writable))
|
||||||
.route("/configure", post(configure::configure))
|
.route("/configure", post(configure::configure))
|
||||||
.route("/database_schema", get(database_schema::get_schema_dump))
|
.route("/database_schema", get(database_schema::get_schema_dump))
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ use std::collections::HashMap;
|
|||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
use compute_api::responses::{InstalledExtension, InstalledExtensions};
|
||||||
|
use tokio_postgres::error::Error as PostgresError;
|
||||||
use tokio_postgres::{Client, Config, NoTls};
|
use tokio_postgres::{Client, Config, NoTls};
|
||||||
|
|
||||||
use crate::metrics::INSTALLED_EXTENSIONS;
|
use crate::metrics::INSTALLED_EXTENSIONS;
|
||||||
@@ -10,7 +11,7 @@ use crate::metrics::INSTALLED_EXTENSIONS;
|
|||||||
/// and to make database listing query here more explicit.
|
/// and to make database listing query here more explicit.
|
||||||
///
|
///
|
||||||
/// Limit the number of databases to 500 to avoid excessive load.
|
/// Limit the number of databases to 500 to avoid excessive load.
|
||||||
async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
async fn list_dbs(client: &mut Client) -> Result<Vec<String>, PostgresError> {
|
||||||
// `pg_database.datconnlimit = -2` means that the database is in the
|
// `pg_database.datconnlimit = -2` means that the database is in the
|
||||||
// invalid state
|
// invalid state
|
||||||
let databases = client
|
let databases = client
|
||||||
@@ -37,7 +38,9 @@ async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
|
|||||||
/// Same extension can be installed in multiple databases with different versions,
|
/// Same extension can be installed in multiple databases with different versions,
|
||||||
/// so we report a separate metric (number of databases where it is installed)
|
/// so we report a separate metric (number of databases where it is installed)
|
||||||
/// for each extension version.
|
/// for each extension version.
|
||||||
pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExtensions> {
|
pub async fn get_installed_extensions(
|
||||||
|
mut conf: Config,
|
||||||
|
) -> Result<InstalledExtensions, PostgresError> {
|
||||||
conf.application_name("compute_ctl:get_installed_extensions");
|
conf.application_name("compute_ctl:get_installed_extensions");
|
||||||
let databases: Vec<String> = {
|
let databases: Vec<String> = {
|
||||||
let (mut client, connection) = conf.connect(NoTls).await?;
|
let (mut client, connection) = conf.connect(NoTls).await?;
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ pub mod logger;
|
|||||||
pub mod catalog;
|
pub mod catalog;
|
||||||
pub mod compute;
|
pub mod compute;
|
||||||
pub mod compute_prewarm;
|
pub mod compute_prewarm;
|
||||||
|
pub mod compute_promote;
|
||||||
pub mod disk_quota;
|
pub mod disk_quota;
|
||||||
pub mod extension_server;
|
pub mod extension_server;
|
||||||
pub mod installed_extensions;
|
pub mod installed_extensions;
|
||||||
|
|||||||
@@ -4,13 +4,13 @@ use std::thread;
|
|||||||
use std::time::{Duration, SystemTime};
|
use std::time::{Duration, SystemTime};
|
||||||
|
|
||||||
use anyhow::{Result, bail};
|
use anyhow::{Result, bail};
|
||||||
use compute_api::spec::{ComputeMode, PageserverConnectionInfo};
|
use compute_api::spec::{ComputeMode, PageserverConnectionInfo, PageserverProtocol};
|
||||||
use pageserver_page_api as page_api;
|
use pageserver_page_api as page_api;
|
||||||
use postgres::{NoTls, SimpleQueryMessage};
|
use postgres::{NoTls, SimpleQueryMessage};
|
||||||
use tracing::{info, warn};
|
use tracing::{info, warn};
|
||||||
use utils::id::{TenantId, TimelineId};
|
use utils::id::{TenantId, TimelineId};
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
use utils::shard::{ShardCount, ShardNumber, TenantShardId};
|
use utils::shard::TenantShardId;
|
||||||
|
|
||||||
use crate::compute::ComputeNode;
|
use crate::compute::ComputeNode;
|
||||||
|
|
||||||
@@ -116,37 +116,38 @@ fn try_acquire_lsn_lease(
|
|||||||
timeline_id: TimelineId,
|
timeline_id: TimelineId,
|
||||||
lsn: Lsn,
|
lsn: Lsn,
|
||||||
) -> Result<Option<SystemTime>> {
|
) -> Result<Option<SystemTime>> {
|
||||||
let shard_count = conninfo.shards.len();
|
|
||||||
let mut leases = Vec::new();
|
let mut leases = Vec::new();
|
||||||
|
|
||||||
for (shard_number, shard) in conninfo.shards.into_iter() {
|
for (shard_index, shard) in conninfo.shards.into_iter() {
|
||||||
let tenant_shard_id = match shard_count {
|
let tenant_shard_id = TenantShardId {
|
||||||
0 | 1 => TenantShardId::unsharded(tenant_id),
|
tenant_id,
|
||||||
shard_count => TenantShardId {
|
shard_number: shard_index.shard_number,
|
||||||
tenant_id,
|
shard_count: shard_index.shard_count,
|
||||||
shard_number: ShardNumber(shard_number as u8),
|
|
||||||
shard_count: ShardCount::new(shard_count as u8),
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let lease = if conninfo.prefer_grpc {
|
// XXX: If there are more than pageserver for the one shard, do we need to get a
|
||||||
acquire_lsn_lease_grpc(
|
// leas on all of them? Currently, that's what we assume, but this is hypothetical
|
||||||
&shard.grpc_url.unwrap(),
|
// as of this writing, as we never pass the info for more than one pageserver per
|
||||||
auth,
|
// shard.
|
||||||
tenant_shard_id,
|
for pageserver in shard.pageservers {
|
||||||
timeline_id,
|
let lease = match conninfo.prefer_protocol {
|
||||||
lsn,
|
PageserverProtocol::Grpc => acquire_lsn_lease_grpc(
|
||||||
)?
|
&pageserver.grpc_url.unwrap(),
|
||||||
} else {
|
auth,
|
||||||
acquire_lsn_lease_libpq(
|
tenant_shard_id,
|
||||||
&shard.libpq_url.unwrap(),
|
timeline_id,
|
||||||
auth,
|
lsn,
|
||||||
tenant_shard_id,
|
)?,
|
||||||
timeline_id,
|
PageserverProtocol::Libpq => acquire_lsn_lease_libpq(
|
||||||
lsn,
|
&pageserver.libpq_url.unwrap(),
|
||||||
)?
|
auth,
|
||||||
};
|
tenant_shard_id,
|
||||||
leases.push(lease);
|
timeline_id,
|
||||||
|
lsn,
|
||||||
|
)?,
|
||||||
|
};
|
||||||
|
leases.push(lease);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(leases.into_iter().min().flatten())
|
Ok(leases.into_iter().min().flatten())
|
||||||
|
|||||||
@@ -105,6 +105,14 @@ pub(crate) static LFC_PREWARMS: Lazy<IntCounter> = Lazy::new(|| {
|
|||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
|
pub(crate) static LFC_PREWARM_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
|
||||||
|
register_int_counter!(
|
||||||
|
"compute_ctl_lfc_prewarm_errors_total",
|
||||||
|
"Total number of LFC prewarm errors",
|
||||||
|
)
|
||||||
|
.expect("failed to define a metric")
|
||||||
|
});
|
||||||
|
|
||||||
pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
|
pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
|
||||||
register_int_counter!(
|
register_int_counter!(
|
||||||
"compute_ctl_lfc_offloads_total",
|
"compute_ctl_lfc_offloads_total",
|
||||||
@@ -113,6 +121,14 @@ pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
|
|||||||
.expect("failed to define a metric")
|
.expect("failed to define a metric")
|
||||||
});
|
});
|
||||||
|
|
||||||
|
pub(crate) static LFC_OFFLOAD_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
|
||||||
|
register_int_counter!(
|
||||||
|
"compute_ctl_lfc_offload_errors_total",
|
||||||
|
"Total number of LFC offload errors",
|
||||||
|
)
|
||||||
|
.expect("failed to define a metric")
|
||||||
|
});
|
||||||
|
|
||||||
pub fn collect() -> Vec<MetricFamily> {
|
pub fn collect() -> Vec<MetricFamily> {
|
||||||
let mut metrics = COMPUTE_CTL_UP.collect();
|
let mut metrics = COMPUTE_CTL_UP.collect();
|
||||||
metrics.extend(INSTALLED_EXTENSIONS.collect());
|
metrics.extend(INSTALLED_EXTENSIONS.collect());
|
||||||
@@ -123,6 +139,8 @@ pub fn collect() -> Vec<MetricFamily> {
|
|||||||
metrics.extend(PG_CURR_DOWNTIME_MS.collect());
|
metrics.extend(PG_CURR_DOWNTIME_MS.collect());
|
||||||
metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
|
metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
|
||||||
metrics.extend(LFC_PREWARMS.collect());
|
metrics.extend(LFC_PREWARMS.collect());
|
||||||
|
metrics.extend(LFC_PREWARM_ERRORS.collect());
|
||||||
metrics.extend(LFC_OFFLOADS.collect());
|
metrics.extend(LFC_OFFLOADS.collect());
|
||||||
|
metrics.extend(LFC_OFFLOAD_ERRORS.collect());
|
||||||
metrics
|
metrics
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
ALTER ROLE {privileged_role_name} BYPASSRLS;
|
||||||
@@ -1 +0,0 @@
|
|||||||
ALTER ROLE neon_superuser BYPASSRLS;
|
|
||||||
@@ -1,8 +1,21 @@
|
|||||||
|
-- On December 8th, 2023, an engineering escalation (INC-110) was opened after
|
||||||
|
-- it was found that BYPASSRLS was being applied to all roles.
|
||||||
|
--
|
||||||
|
-- PR that introduced the issue: https://github.com/neondatabase/neon/pull/5657
|
||||||
|
-- Subsequent commit on main: https://github.com/neondatabase/neon/commit/ad99fa5f0393e2679e5323df653c508ffa0ac072
|
||||||
|
--
|
||||||
|
-- NOBYPASSRLS and INHERIT are the defaults for a Postgres role, but because it
|
||||||
|
-- isn't easy to know if a Postgres cluster is affected by the issue, we need to
|
||||||
|
-- keep the migration around for a long time, if not indefinitely, so any
|
||||||
|
-- cluster can be fixed.
|
||||||
|
--
|
||||||
|
-- Branching is the gift that keeps on giving...
|
||||||
|
|
||||||
DO $$
|
DO $$
|
||||||
DECLARE
|
DECLARE
|
||||||
role_name text;
|
role_name text;
|
||||||
BEGIN
|
BEGIN
|
||||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
|
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, '{privileged_role_name}', 'member')
|
||||||
LOOP
|
LOOP
|
||||||
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
|
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
|
||||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
|
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
|
||||||
@@ -10,7 +23,7 @@ BEGIN
|
|||||||
|
|
||||||
FOR role_name IN SELECT rolname FROM pg_roles
|
FOR role_name IN SELECT rolname FROM pg_roles
|
||||||
WHERE
|
WHERE
|
||||||
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
|
NOT pg_has_role(rolname, '{privileged_role_name}', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||||
LOOP
|
LOOP
|
||||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
|
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
|
||||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
|
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
DO $$
|
DO $$
|
||||||
BEGIN
|
BEGIN
|
||||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||||
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
|
EXECUTE 'GRANT pg_create_subscription TO {privileged_role_name}';
|
||||||
END IF;
|
END IF;
|
||||||
END $$;
|
END $$;
|
||||||
@@ -1 +0,0 @@
|
|||||||
GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;
|
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
GRANT pg_monitor TO {privileged_role_name} WITH ADMIN OPTION;
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||||
-- interacted with by neon_superuser without permission issues.
|
-- interacted with by {privileged_role_name} without permission issues.
|
||||||
|
|
||||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
|
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name};
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||||
-- interacted with by neon_superuser without permission issues.
|
-- interacted with by {privileged_role_name} without permission issues.
|
||||||
|
|
||||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
|
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name};
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
-- SKIP: Moved inline to the handle_grants() functions.
|
-- SKIP: Moved inline to the handle_grants() functions.
|
||||||
|
|
||||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
|
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name} WITH GRANT OPTION;
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
-- SKIP: Moved inline to the handle_grants() functions.
|
-- SKIP: Moved inline to the handle_grants() functions.
|
||||||
|
|
||||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
|
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name} WITH GRANT OPTION;
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
DO $$
|
DO $$
|
||||||
BEGIN
|
BEGIN
|
||||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
|
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO {privileged_role_name}';
|
||||||
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
|
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO {privileged_role_name}';
|
||||||
END IF;
|
END IF;
|
||||||
END $$;
|
END $$;
|
||||||
@@ -1 +0,0 @@
|
|||||||
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;
|
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO {privileged_role_name};
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
GRANT pg_signal_backend TO {privileged_role_name} WITH ADMIN OPTION;
|
||||||
@@ -7,13 +7,17 @@ BEGIN
|
|||||||
INTO monitor
|
INTO monitor
|
||||||
FROM pg_auth_members
|
FROM pg_auth_members
|
||||||
WHERE roleid = 'pg_monitor'::regrole
|
WHERE roleid = 'pg_monitor'::regrole
|
||||||
AND member = 'pg_monitor'::regrole;
|
AND member = 'neon_superuser'::regrole;
|
||||||
|
|
||||||
IF NOT monitor.member THEN
|
IF monitor IS NULL THEN
|
||||||
|
RAISE EXCEPTION 'no entry in pg_auth_members for neon_superuser and pg_monitor';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF monitor.admin IS NULL OR NOT monitor.member THEN
|
||||||
RAISE EXCEPTION 'neon_superuser is not a member of pg_monitor';
|
RAISE EXCEPTION 'neon_superuser is not a member of pg_monitor';
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
IF NOT monitor.admin THEN
|
IF monitor.admin IS NULL OR NOT monitor.admin THEN
|
||||||
RAISE EXCEPTION 'neon_superuser cannot grant pg_monitor';
|
RAISE EXCEPTION 'neon_superuser cannot grant pg_monitor';
|
||||||
END IF;
|
END IF;
|
||||||
END $$;
|
END $$;
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
DO $$
|
||||||
|
DECLARE
|
||||||
|
signal_backend record;
|
||||||
|
BEGIN
|
||||||
|
SELECT pg_has_role('neon_superuser', 'pg_signal_backend', 'member') AS member,
|
||||||
|
admin_option AS admin
|
||||||
|
INTO signal_backend
|
||||||
|
FROM pg_auth_members
|
||||||
|
WHERE roleid = 'pg_signal_backend'::regrole
|
||||||
|
AND member = 'neon_superuser'::regrole;
|
||||||
|
|
||||||
|
IF signal_backend IS NULL THEN
|
||||||
|
RAISE EXCEPTION 'no entry in pg_auth_members for neon_superuser and pg_signal_backend';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF signal_backend.member IS NULL OR NOT signal_backend.member THEN
|
||||||
|
RAISE EXCEPTION 'neon_superuser is not a member of pg_signal_backend';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF signal_backend.admin IS NULL OR NOT signal_backend.admin THEN
|
||||||
|
RAISE EXCEPTION 'neon_superuser cannot grant pg_signal_backend';
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
@@ -84,7 +84,8 @@ impl ComputeMonitor {
|
|||||||
if matches!(
|
if matches!(
|
||||||
compute_status,
|
compute_status,
|
||||||
ComputeStatus::Terminated
|
ComputeStatus::Terminated
|
||||||
| ComputeStatus::TerminationPending { .. }
|
| ComputeStatus::TerminationPendingFast
|
||||||
|
| ComputeStatus::TerminationPendingImmediate
|
||||||
| ComputeStatus::Failed
|
| ComputeStatus::Failed
|
||||||
) {
|
) {
|
||||||
info!(
|
info!(
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ use reqwest::StatusCode;
|
|||||||
use tokio_postgres::Client;
|
use tokio_postgres::Client;
|
||||||
use tracing::{error, info, instrument};
|
use tracing::{error, info, instrument};
|
||||||
|
|
||||||
|
use crate::compute::ComputeNodeParams;
|
||||||
use crate::config;
|
use crate::config;
|
||||||
use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
|
use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
|
||||||
use crate::migration::MigrationRunner;
|
use crate::migration::MigrationRunner;
|
||||||
@@ -169,7 +170,7 @@ pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
pub async fn handle_migrations(client: &mut Client) -> Result<()> {
|
pub async fn handle_migrations(params: ComputeNodeParams, client: &mut Client) -> Result<()> {
|
||||||
info!("handle migrations");
|
info!("handle migrations");
|
||||||
|
|
||||||
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||||
@@ -178,24 +179,58 @@ pub async fn handle_migrations(client: &mut Client) -> Result<()> {
|
|||||||
|
|
||||||
// Add new migrations in numerical order.
|
// Add new migrations in numerical order.
|
||||||
let migrations = [
|
let migrations = [
|
||||||
include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
|
&format!(
|
||||||
include_str!("./migrations/0002-alter_roles.sql"),
|
include_str!("./migrations/0001-add_bypass_rls_to_privileged_role.sql"),
|
||||||
include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
|
privileged_role_name = params.privileged_role_name
|
||||||
include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
|
|
||||||
include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
|
|
||||||
include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
|
|
||||||
include_str!(
|
|
||||||
"./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
|
|
||||||
),
|
),
|
||||||
include_str!(
|
&format!(
|
||||||
"./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
|
include_str!("./migrations/0002-alter_roles.sql"),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
|
),
|
||||||
|
&format!(
|
||||||
|
include_str!("./migrations/0003-grant_pg_create_subscription_to_privileged_role.sql"),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
|
),
|
||||||
|
&format!(
|
||||||
|
include_str!("./migrations/0004-grant_pg_monitor_to_privileged_role.sql"),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
|
),
|
||||||
|
&format!(
|
||||||
|
include_str!("./migrations/0005-grant_all_on_tables_to_privileged_role.sql"),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
|
),
|
||||||
|
&format!(
|
||||||
|
include_str!("./migrations/0006-grant_all_on_sequences_to_privileged_role.sql"),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
|
),
|
||||||
|
&format!(
|
||||||
|
include_str!(
|
||||||
|
"./migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql"
|
||||||
|
),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
|
),
|
||||||
|
&format!(
|
||||||
|
include_str!(
|
||||||
|
"./migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql"
|
||||||
|
),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
),
|
),
|
||||||
include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
|
include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
|
||||||
include_str!(
|
&format!(
|
||||||
"./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
|
include_str!(
|
||||||
|
"./migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql"
|
||||||
|
),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
),
|
),
|
||||||
include_str!(
|
&format!(
|
||||||
"./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
|
include_str!(
|
||||||
|
"./migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql"
|
||||||
|
),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
|
),
|
||||||
|
&format!(
|
||||||
|
include_str!("./migrations/0012-grant_pg_signal_backend_to_privileged_role.sql"),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
),
|
),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|||||||
@@ -13,14 +13,14 @@ use tokio_postgres::Client;
|
|||||||
use tokio_postgres::error::SqlState;
|
use tokio_postgres::error::SqlState;
|
||||||
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
|
use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
|
||||||
|
|
||||||
use crate::compute::{ComputeNode, ComputeState};
|
use crate::compute::{ComputeNode, ComputeNodeParams, ComputeState};
|
||||||
use crate::pg_helpers::{
|
use crate::pg_helpers::{
|
||||||
DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async,
|
DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async,
|
||||||
get_existing_roles_async,
|
get_existing_roles_async,
|
||||||
};
|
};
|
||||||
use crate::spec_apply::ApplySpecPhase::{
|
use crate::spec_apply::ApplySpecPhase::{
|
||||||
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateNeonSuperuser,
|
CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreatePgauditExtension,
|
||||||
CreatePgauditExtension, CreatePgauditlogtofileExtension, CreateSchemaNeon,
|
CreatePgauditlogtofileExtension, CreatePrivilegedRole, CreateSchemaNeon,
|
||||||
DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
|
DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
|
||||||
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
|
HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
|
||||||
RunInEachDatabase,
|
RunInEachDatabase,
|
||||||
@@ -49,6 +49,7 @@ impl ComputeNode {
|
|||||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||||
let client = Self::get_maintenance_client(&conf).await?;
|
let client = Self::get_maintenance_client(&conf).await?;
|
||||||
let spec = spec.clone();
|
let spec = spec.clone();
|
||||||
|
let params = Arc::new(self.params.clone());
|
||||||
|
|
||||||
let databases = get_existing_dbs_async(&client).await?;
|
let databases = get_existing_dbs_async(&client).await?;
|
||||||
let roles = get_existing_roles_async(&client)
|
let roles = get_existing_roles_async(&client)
|
||||||
@@ -157,6 +158,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
let conf = Arc::new(conf);
|
let conf = Arc::new(conf);
|
||||||
let fut = Self::apply_spec_sql_db(
|
let fut = Self::apply_spec_sql_db(
|
||||||
|
params.clone(),
|
||||||
spec.clone(),
|
spec.clone(),
|
||||||
conf,
|
conf,
|
||||||
ctx.clone(),
|
ctx.clone(),
|
||||||
@@ -185,7 +187,7 @@ impl ComputeNode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for phase in [
|
for phase in [
|
||||||
CreateNeonSuperuser,
|
CreatePrivilegedRole,
|
||||||
DropInvalidDatabases,
|
DropInvalidDatabases,
|
||||||
RenameRoles,
|
RenameRoles,
|
||||||
CreateAndAlterRoles,
|
CreateAndAlterRoles,
|
||||||
@@ -195,6 +197,7 @@ impl ComputeNode {
|
|||||||
] {
|
] {
|
||||||
info!("Applying phase {:?}", &phase);
|
info!("Applying phase {:?}", &phase);
|
||||||
apply_operations(
|
apply_operations(
|
||||||
|
params.clone(),
|
||||||
spec.clone(),
|
spec.clone(),
|
||||||
ctx.clone(),
|
ctx.clone(),
|
||||||
jwks_roles.clone(),
|
jwks_roles.clone(),
|
||||||
@@ -243,6 +246,7 @@ impl ComputeNode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let fut = Self::apply_spec_sql_db(
|
let fut = Self::apply_spec_sql_db(
|
||||||
|
params.clone(),
|
||||||
spec.clone(),
|
spec.clone(),
|
||||||
conf,
|
conf,
|
||||||
ctx.clone(),
|
ctx.clone(),
|
||||||
@@ -293,6 +297,7 @@ impl ComputeNode {
|
|||||||
for phase in phases {
|
for phase in phases {
|
||||||
debug!("Applying phase {:?}", &phase);
|
debug!("Applying phase {:?}", &phase);
|
||||||
apply_operations(
|
apply_operations(
|
||||||
|
params.clone(),
|
||||||
spec.clone(),
|
spec.clone(),
|
||||||
ctx.clone(),
|
ctx.clone(),
|
||||||
jwks_roles.clone(),
|
jwks_roles.clone(),
|
||||||
@@ -313,7 +318,9 @@ impl ComputeNode {
|
|||||||
/// May opt to not connect to databases that don't have any scheduled
|
/// May opt to not connect to databases that don't have any scheduled
|
||||||
/// operations. The function is concurrency-controlled with the provided
|
/// operations. The function is concurrency-controlled with the provided
|
||||||
/// semaphore. The caller has to make sure the semaphore isn't exhausted.
|
/// semaphore. The caller has to make sure the semaphore isn't exhausted.
|
||||||
|
#[allow(clippy::too_many_arguments)] // TODO: needs bigger refactoring
|
||||||
async fn apply_spec_sql_db(
|
async fn apply_spec_sql_db(
|
||||||
|
params: Arc<ComputeNodeParams>,
|
||||||
spec: Arc<ComputeSpec>,
|
spec: Arc<ComputeSpec>,
|
||||||
conf: Arc<tokio_postgres::Config>,
|
conf: Arc<tokio_postgres::Config>,
|
||||||
ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
|
ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
|
||||||
@@ -328,6 +335,7 @@ impl ComputeNode {
|
|||||||
|
|
||||||
for subphase in subphases {
|
for subphase in subphases {
|
||||||
apply_operations(
|
apply_operations(
|
||||||
|
params.clone(),
|
||||||
spec.clone(),
|
spec.clone(),
|
||||||
ctx.clone(),
|
ctx.clone(),
|
||||||
jwks_roles.clone(),
|
jwks_roles.clone(),
|
||||||
@@ -467,7 +475,7 @@ pub enum PerDatabasePhase {
|
|||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum ApplySpecPhase {
|
pub enum ApplySpecPhase {
|
||||||
CreateNeonSuperuser,
|
CreatePrivilegedRole,
|
||||||
DropInvalidDatabases,
|
DropInvalidDatabases,
|
||||||
RenameRoles,
|
RenameRoles,
|
||||||
CreateAndAlterRoles,
|
CreateAndAlterRoles,
|
||||||
@@ -510,6 +518,7 @@ pub struct MutableApplyContext {
|
|||||||
/// - No timeouts have (yet) been implemented.
|
/// - No timeouts have (yet) been implemented.
|
||||||
/// - The caller is responsible for limiting and/or applying concurrency.
|
/// - The caller is responsible for limiting and/or applying concurrency.
|
||||||
pub async fn apply_operations<'a, Fut, F>(
|
pub async fn apply_operations<'a, Fut, F>(
|
||||||
|
params: Arc<ComputeNodeParams>,
|
||||||
spec: Arc<ComputeSpec>,
|
spec: Arc<ComputeSpec>,
|
||||||
ctx: Arc<RwLock<MutableApplyContext>>,
|
ctx: Arc<RwLock<MutableApplyContext>>,
|
||||||
jwks_roles: Arc<HashSet<String>>,
|
jwks_roles: Arc<HashSet<String>>,
|
||||||
@@ -527,7 +536,7 @@ where
|
|||||||
debug!("Processing phase {:?}", &apply_spec_phase);
|
debug!("Processing phase {:?}", &apply_spec_phase);
|
||||||
let ctx = ctx;
|
let ctx = ctx;
|
||||||
|
|
||||||
let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase)
|
let mut ops = get_operations(¶ms, &spec, &ctx, &jwks_roles, &apply_spec_phase)
|
||||||
.await?
|
.await?
|
||||||
.peekable();
|
.peekable();
|
||||||
|
|
||||||
@@ -588,14 +597,18 @@ where
|
|||||||
/// sort/merge/batch execution, but for now this is a nice way to improve
|
/// sort/merge/batch execution, but for now this is a nice way to improve
|
||||||
/// batching behavior of the commands.
|
/// batching behavior of the commands.
|
||||||
async fn get_operations<'a>(
|
async fn get_operations<'a>(
|
||||||
|
params: &'a ComputeNodeParams,
|
||||||
spec: &'a ComputeSpec,
|
spec: &'a ComputeSpec,
|
||||||
ctx: &'a RwLock<MutableApplyContext>,
|
ctx: &'a RwLock<MutableApplyContext>,
|
||||||
jwks_roles: &'a HashSet<String>,
|
jwks_roles: &'a HashSet<String>,
|
||||||
apply_spec_phase: &'a ApplySpecPhase,
|
apply_spec_phase: &'a ApplySpecPhase,
|
||||||
) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
|
) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
|
||||||
match apply_spec_phase {
|
match apply_spec_phase {
|
||||||
ApplySpecPhase::CreateNeonSuperuser => Ok(Box::new(once(Operation {
|
ApplySpecPhase::CreatePrivilegedRole => Ok(Box::new(once(Operation {
|
||||||
query: include_str!("sql/create_neon_superuser.sql").to_string(),
|
query: format!(
|
||||||
|
include_str!("sql/create_privileged_role.sql"),
|
||||||
|
privileged_role_name = params.privileged_role_name
|
||||||
|
),
|
||||||
comment: None,
|
comment: None,
|
||||||
}))),
|
}))),
|
||||||
ApplySpecPhase::DropInvalidDatabases => {
|
ApplySpecPhase::DropInvalidDatabases => {
|
||||||
@@ -697,8 +710,9 @@ async fn get_operations<'a>(
|
|||||||
None => {
|
None => {
|
||||||
let query = if !jwks_roles.contains(role.name.as_str()) {
|
let query = if !jwks_roles.contains(role.name.as_str()) {
|
||||||
format!(
|
format!(
|
||||||
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}",
|
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE {} {}",
|
||||||
role.name.pg_quote(),
|
role.name.pg_quote(),
|
||||||
|
params.privileged_role_name,
|
||||||
role.to_pg_options(),
|
role.to_pg_options(),
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
@@ -849,8 +863,9 @@ async fn get_operations<'a>(
|
|||||||
// ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database
|
// ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database
|
||||||
// (see https://www.postgresql.org/docs/current/ddl-priv.html)
|
// (see https://www.postgresql.org/docs/current/ddl-priv.html)
|
||||||
query: format!(
|
query: format!(
|
||||||
"GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
|
"GRANT ALL PRIVILEGES ON DATABASE {} TO {}",
|
||||||
db.name.pg_quote()
|
db.name.pg_quote(),
|
||||||
|
params.privileged_role_name
|
||||||
),
|
),
|
||||||
comment: None,
|
comment: None,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
DO $$
|
|
||||||
BEGIN
|
|
||||||
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
|
|
||||||
THEN
|
|
||||||
CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
|
|
||||||
END IF;
|
|
||||||
END
|
|
||||||
$$;
|
|
||||||
8
compute_tools/src/sql/create_privileged_role.sql
Normal file
8
compute_tools/src/sql/create_privileged_role.sql
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{privileged_role_name}')
|
||||||
|
THEN
|
||||||
|
CREATE ROLE {privileged_role_name} CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
@@ -8,10 +8,10 @@ code changes locally, but not suitable for running production systems.
|
|||||||
|
|
||||||
## Example: Start with Postgres 16
|
## Example: Start with Postgres 16
|
||||||
|
|
||||||
To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 3 of the start-up commands.
|
To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 2 of the start-up commands.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
cargo neon init --pg-version 16
|
cargo neon init
|
||||||
cargo neon start
|
cargo neon start
|
||||||
cargo neon tenant create --set-default --pg-version 16
|
cargo neon tenant create --set-default --pg-version 16
|
||||||
cargo neon endpoint create main --pg-version 16
|
cargo neon endpoint create main --pg-version 16
|
||||||
|
|||||||
@@ -16,9 +16,14 @@ use std::time::Duration;
|
|||||||
use anyhow::{Context, Result, anyhow, bail};
|
use anyhow::{Context, Result, anyhow, bail};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use compute_api::requests::ComputeClaimsScope;
|
use compute_api::requests::ComputeClaimsScope;
|
||||||
use compute_api::spec::{ComputeMode, PageserverConnectionInfo, PageserverShardConnectionInfo};
|
use compute_api::spec::{
|
||||||
|
ComputeMode, PageserverConnectionInfo, PageserverProtocol, PageserverShardInfo,
|
||||||
|
};
|
||||||
use control_plane::broker::StorageBroker;
|
use control_plane::broker::StorageBroker;
|
||||||
use control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode};
|
use control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode};
|
||||||
|
use control_plane::endpoint::{
|
||||||
|
pageserver_conf_to_shard_conn_info, tenant_locate_response_to_conn_info,
|
||||||
|
};
|
||||||
use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
|
use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
|
||||||
use control_plane::local_env;
|
use control_plane::local_env;
|
||||||
use control_plane::local_env::{
|
use control_plane::local_env::{
|
||||||
@@ -44,7 +49,6 @@ use pageserver_api::models::{
|
|||||||
};
|
};
|
||||||
use pageserver_api::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize, TenantShardId};
|
use pageserver_api::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize, TenantShardId};
|
||||||
use postgres_backend::AuthType;
|
use postgres_backend::AuthType;
|
||||||
use postgres_connection::parse_host_port;
|
|
||||||
use safekeeper_api::membership::{SafekeeperGeneration, SafekeeperId};
|
use safekeeper_api::membership::{SafekeeperGeneration, SafekeeperId};
|
||||||
use safekeeper_api::{
|
use safekeeper_api::{
|
||||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
|
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
|
||||||
@@ -52,11 +56,11 @@ use safekeeper_api::{
|
|||||||
};
|
};
|
||||||
use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
|
use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
|
||||||
use tokio::task::JoinSet;
|
use tokio::task::JoinSet;
|
||||||
use url::Host;
|
|
||||||
use utils::auth::{Claims, Scope};
|
use utils::auth::{Claims, Scope};
|
||||||
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
|
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
use utils::project_git_version;
|
use utils::project_git_version;
|
||||||
|
use utils::shard::ShardIndex;
|
||||||
|
|
||||||
// Default id of a safekeeper node, if not specified on the command line.
|
// Default id of a safekeeper node, if not specified on the command line.
|
||||||
const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
|
const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
|
||||||
@@ -631,6 +635,10 @@ struct EndpointCreateCmdArgs {
|
|||||||
help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
|
help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
|
||||||
)]
|
)]
|
||||||
allow_multiple: bool,
|
allow_multiple: bool,
|
||||||
|
|
||||||
|
/// Only allow changing it on creation
|
||||||
|
#[clap(long, help = "Name of the privileged role for the endpoint")]
|
||||||
|
privileged_role_name: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(clap::Args)]
|
#[derive(clap::Args)]
|
||||||
@@ -1480,6 +1488,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
|||||||
args.grpc,
|
args.grpc,
|
||||||
!args.update_catalog,
|
!args.update_catalog,
|
||||||
false,
|
false,
|
||||||
|
args.privileged_role_name.clone(),
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
EndpointCmd::Start(args) => {
|
EndpointCmd::Start(args) => {
|
||||||
@@ -1516,74 +1525,56 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let (shards, stripe_size) = if let Some(ps_id) = pageserver_id {
|
let prefer_protocol = if endpoint.grpc {
|
||||||
let conf = env.get_pageserver_conf(ps_id).unwrap();
|
PageserverProtocol::Grpc
|
||||||
let libpq_url = Some({
|
} else {
|
||||||
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
PageserverProtocol::Libpq
|
||||||
let port = port.unwrap_or(5432);
|
};
|
||||||
format!("postgres://no_user@{host}:{port}")
|
|
||||||
});
|
|
||||||
let grpc_url = if let Some(grpc_addr) = &conf.listen_grpc_addr {
|
|
||||||
let (host, port) = parse_host_port(grpc_addr)?;
|
|
||||||
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
|
|
||||||
Some(format!("grpc://no_user@{host}:{port}"))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
let pageserver = PageserverShardConnectionInfo {
|
|
||||||
libpq_url,
|
|
||||||
grpc_url,
|
|
||||||
};
|
|
||||||
|
|
||||||
|
let mut pageserver_conninfo = if let Some(ps_id) = pageserver_id {
|
||||||
|
let conf = env.get_pageserver_conf(ps_id).unwrap();
|
||||||
|
let ps_conninfo = pageserver_conf_to_shard_conn_info(conf)?;
|
||||||
|
|
||||||
|
let shard_info = PageserverShardInfo {
|
||||||
|
pageservers: vec![ps_conninfo],
|
||||||
|
};
|
||||||
// If caller is telling us what pageserver to use, this is not a tenant which is
|
// If caller is telling us what pageserver to use, this is not a tenant which is
|
||||||
// fully managed by storage controller, therefore not sharded.
|
// fully managed by storage controller, therefore not sharded.
|
||||||
(vec![(0, pageserver)], DEFAULT_STRIPE_SIZE)
|
let shards: HashMap<_, _> = vec![(ShardIndex::unsharded(), shard_info)]
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
PageserverConnectionInfo {
|
||||||
|
shard_count: ShardCount(0),
|
||||||
|
stripe_size: None,
|
||||||
|
shards,
|
||||||
|
prefer_protocol,
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Look up the currently attached location of the tenant, and its striping metadata,
|
// Look up the currently attached location of the tenant, and its striping metadata,
|
||||||
// to pass these on to postgres.
|
// to pass these on to postgres.
|
||||||
let storage_controller = StorageController::from_env(env);
|
let storage_controller = StorageController::from_env(env);
|
||||||
let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
|
let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
|
||||||
let shards = futures::future::try_join_all(locate_result.shards.into_iter().map(
|
assert!(!locate_result.shards.is_empty());
|
||||||
|shard| async move {
|
|
||||||
if let ComputeMode::Static(lsn) = endpoint.mode {
|
// Initialize LSN leases for static computes.
|
||||||
// Initialize LSN leases for static computes.
|
if let ComputeMode::Static(lsn) = endpoint.mode {
|
||||||
|
futures::future::try_join_all(locate_result.shards.iter().map(
|
||||||
|
|shard| async move {
|
||||||
let conf = env.get_pageserver_conf(shard.node_id).unwrap();
|
let conf = env.get_pageserver_conf(shard.node_id).unwrap();
|
||||||
let pageserver = PageServerNode::from_env(env, conf);
|
let pageserver = PageServerNode::from_env(env, conf);
|
||||||
|
|
||||||
pageserver
|
pageserver
|
||||||
.http_client
|
.http_client
|
||||||
.timeline_init_lsn_lease(shard.shard_id, endpoint.timeline_id, lsn)
|
.timeline_init_lsn_lease(shard.shard_id, endpoint.timeline_id, lsn)
|
||||||
.await?;
|
.await
|
||||||
}
|
},
|
||||||
|
))
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
|
|
||||||
let libpq_host = Host::parse(&shard.listen_pg_addr)?;
|
tenant_locate_response_to_conn_info(&locate_result)?
|
||||||
let libpq_port = shard.listen_pg_port;
|
|
||||||
let libpq_url =
|
|
||||||
Some(format!("postgres://no_user@{libpq_host}:{libpq_port}"));
|
|
||||||
|
|
||||||
let grpc_url = if let Some(grpc_host) = shard.listen_grpc_addr {
|
|
||||||
let grpc_port = shard.listen_grpc_port.expect("no gRPC port");
|
|
||||||
Some(format!("grpc://no_user@{grpc_host}:{grpc_port}"))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
let pageserver = PageserverShardConnectionInfo {
|
|
||||||
libpq_url,
|
|
||||||
grpc_url,
|
|
||||||
};
|
|
||||||
anyhow::Ok((shard.shard_id.shard_number.0 as u32, pageserver))
|
|
||||||
},
|
|
||||||
))
|
|
||||||
.await?;
|
|
||||||
let stripe_size = locate_result.shard_params.stripe_size;
|
|
||||||
|
|
||||||
(shards, stripe_size)
|
|
||||||
};
|
|
||||||
assert!(!shards.is_empty());
|
|
||||||
let pageserver_conninfo = PageserverConnectionInfo {
|
|
||||||
shards: shards.into_iter().collect(),
|
|
||||||
prefer_grpc: endpoint.grpc,
|
|
||||||
};
|
};
|
||||||
|
pageserver_conninfo.prefer_protocol = prefer_protocol;
|
||||||
|
|
||||||
let ps_conf = env.get_pageserver_conf(DEFAULT_PAGESERVER_ID)?;
|
let ps_conf = env.get_pageserver_conf(DEFAULT_PAGESERVER_ID)?;
|
||||||
let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {
|
let auth_token = if matches!(ps_conf.pg_auth_type, AuthType::NeonJWT) {
|
||||||
@@ -1615,7 +1606,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
|||||||
safekeepers,
|
safekeepers,
|
||||||
pageserver_conninfo,
|
pageserver_conninfo,
|
||||||
remote_ext_base_url: remote_ext_base_url.clone(),
|
remote_ext_base_url: remote_ext_base_url.clone(),
|
||||||
shard_stripe_size: stripe_size.0 as usize,
|
|
||||||
create_test_user: args.create_test_user,
|
create_test_user: args.create_test_user,
|
||||||
start_timeout: args.start_timeout,
|
start_timeout: args.start_timeout,
|
||||||
autoprewarm: args.autoprewarm,
|
autoprewarm: args.autoprewarm,
|
||||||
@@ -1632,66 +1622,45 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
|||||||
.endpoints
|
.endpoints
|
||||||
.get(endpoint_id.as_str())
|
.get(endpoint_id.as_str())
|
||||||
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||||
let shards = if let Some(ps_id) = args.endpoint_pageserver_id {
|
|
||||||
|
let prefer_protocol = if endpoint.grpc {
|
||||||
|
PageserverProtocol::Grpc
|
||||||
|
} else {
|
||||||
|
PageserverProtocol::Libpq
|
||||||
|
};
|
||||||
|
let mut pageserver_conninfo = if let Some(ps_id) = args.endpoint_pageserver_id {
|
||||||
let conf = env.get_pageserver_conf(ps_id)?;
|
let conf = env.get_pageserver_conf(ps_id)?;
|
||||||
let libpq_url = Some({
|
let ps_conninfo = pageserver_conf_to_shard_conn_info(conf)?;
|
||||||
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
let shard_info = PageserverShardInfo {
|
||||||
let port = port.unwrap_or(5432);
|
pageservers: vec![ps_conninfo],
|
||||||
format!("postgres://no_user@{host}:{port}")
|
|
||||||
});
|
|
||||||
let grpc_url = if let Some(grpc_addr) = &conf.listen_grpc_addr {
|
|
||||||
let (host, port) = parse_host_port(grpc_addr)?;
|
|
||||||
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
|
|
||||||
Some(format!("grpc://no_user@{host}:{port}"))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
let pageserver = PageserverShardConnectionInfo {
|
|
||||||
libpq_url,
|
|
||||||
grpc_url,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// If caller is telling us what pageserver to use, this is not a tenant which is
|
// If caller is telling us what pageserver to use, this is not a tenant which is
|
||||||
// fully managed by storage controller, therefore not sharded.
|
// fully managed by storage controller, therefore not sharded.
|
||||||
vec![(0, pageserver)]
|
let shards: HashMap<_, _> = vec![(ShardIndex::unsharded(), shard_info)]
|
||||||
} else {
|
|
||||||
let storage_controller = StorageController::from_env(env);
|
|
||||||
storage_controller
|
|
||||||
.tenant_locate(endpoint.tenant_id)
|
|
||||||
.await?
|
|
||||||
.shards
|
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|shard| {
|
.collect();
|
||||||
// Use gRPC if requested.
|
PageserverConnectionInfo {
|
||||||
let libpq_host = Host::parse(&shard.listen_pg_addr).expect("bad hostname");
|
shard_count: ShardCount::unsharded(),
|
||||||
let libpq_port = shard.listen_pg_port;
|
stripe_size: None,
|
||||||
let libpq_url =
|
shards,
|
||||||
Some(format!("postgres://no_user@{libpq_host}:{libpq_port}"));
|
prefer_protocol,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Look up the currently attached location of the tenant, and its striping metadata,
|
||||||
|
// to pass these on to postgres.
|
||||||
|
let storage_controller = StorageController::from_env(env);
|
||||||
|
let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
|
||||||
|
|
||||||
let grpc_url = if let Some(grpc_host) = shard.listen_grpc_addr {
|
tenant_locate_response_to_conn_info(&locate_result)?
|
||||||
let grpc_port = shard.listen_grpc_port.expect("no gRPC port");
|
|
||||||
Some(format!("grpc://no_user@{grpc_host}:{grpc_port}"))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
(
|
|
||||||
shard.shard_id.shard_number.0 as u32,
|
|
||||||
PageserverShardConnectionInfo {
|
|
||||||
libpq_url,
|
|
||||||
grpc_url,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
};
|
|
||||||
let pageserver_conninfo = PageserverConnectionInfo {
|
|
||||||
shards: shards.into_iter().collect(),
|
|
||||||
prefer_grpc: endpoint.grpc,
|
|
||||||
};
|
};
|
||||||
|
pageserver_conninfo.prefer_protocol = prefer_protocol;
|
||||||
|
|
||||||
// If --safekeepers argument is given, use only the listed
|
// If --safekeepers argument is given, use only the listed
|
||||||
// safekeeper nodes; otherwise all from the env.
|
// safekeeper nodes; otherwise all from the env.
|
||||||
let safekeepers = parse_safekeepers(&args.safekeepers)?;
|
let safekeepers = parse_safekeepers(&args.safekeepers)?;
|
||||||
endpoint
|
endpoint
|
||||||
.reconfigure(Some(pageserver_conninfo), None, safekeepers, None)
|
.reconfigure(Some(&pageserver_conninfo), safekeepers, None)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
EndpointCmd::Stop(args) => {
|
EndpointCmd::Stop(args) => {
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ impl StorageBroker {
|
|||||||
pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
||||||
let broker = &self.env.broker;
|
let broker = &self.env.broker;
|
||||||
|
|
||||||
print!("Starting neon broker at {}", broker.client_url());
|
println!("Starting neon broker at {}", broker.client_url());
|
||||||
|
|
||||||
let mut args = Vec::new();
|
let mut args = Vec::new();
|
||||||
|
|
||||||
|
|||||||
@@ -32,11 +32,12 @@
|
|||||||
//! config.json - passed to `compute_ctl`
|
//! config.json - passed to `compute_ctl`
|
||||||
//! pgdata/
|
//! pgdata/
|
||||||
//! postgresql.conf - copy of postgresql.conf created by `compute_ctl`
|
//! postgresql.conf - copy of postgresql.conf created by `compute_ctl`
|
||||||
//! zenith.signal
|
//! neon.signal
|
||||||
|
//! zenith.signal - copy of neon.signal, for backward compatibility
|
||||||
//! <other PostgreSQL files>
|
//! <other PostgreSQL files>
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
use std::collections::BTreeMap;
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
|
use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
@@ -56,8 +57,8 @@ use compute_api::responses::{
|
|||||||
TlsConfig,
|
TlsConfig,
|
||||||
};
|
};
|
||||||
use compute_api::spec::{
|
use compute_api::spec::{
|
||||||
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
|
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PageserverProtocol,
|
||||||
RemoteExtSpec, Role,
|
PageserverShardInfo, PgIdent, RemoteExtSpec, Role,
|
||||||
};
|
};
|
||||||
|
|
||||||
// re-export these, because they're used in the reconfigure() function
|
// re-export these, because they're used in the reconfigure() function
|
||||||
@@ -68,7 +69,6 @@ use jsonwebtoken::jwk::{
|
|||||||
OctetKeyPairParameters, OctetKeyPairType, PublicKeyUse,
|
OctetKeyPairParameters, OctetKeyPairType, PublicKeyUse,
|
||||||
};
|
};
|
||||||
use nix::sys::signal::{Signal, kill};
|
use nix::sys::signal::{Signal, kill};
|
||||||
use pageserver_api::shard::ShardStripeSize;
|
|
||||||
use pem::Pem;
|
use pem::Pem;
|
||||||
use reqwest::header::CONTENT_TYPE;
|
use reqwest::header::CONTENT_TYPE;
|
||||||
use safekeeper_api::PgMajorVersion;
|
use safekeeper_api::PgMajorVersion;
|
||||||
@@ -79,6 +79,10 @@ use spki::der::Decode;
|
|||||||
use spki::{SubjectPublicKeyInfo, SubjectPublicKeyInfoRef};
|
use spki::{SubjectPublicKeyInfo, SubjectPublicKeyInfoRef};
|
||||||
use tracing::debug;
|
use tracing::debug;
|
||||||
use utils::id::{NodeId, TenantId, TimelineId};
|
use utils::id::{NodeId, TenantId, TimelineId};
|
||||||
|
use utils::shard::{ShardIndex, ShardNumber};
|
||||||
|
|
||||||
|
use pageserver_api::config::DEFAULT_GRPC_LISTEN_PORT as DEFAULT_PAGESERVER_GRPC_PORT;
|
||||||
|
use postgres_connection::parse_host_port;
|
||||||
|
|
||||||
use crate::local_env::LocalEnv;
|
use crate::local_env::LocalEnv;
|
||||||
use crate::postgresql_conf::PostgresConf;
|
use crate::postgresql_conf::PostgresConf;
|
||||||
@@ -101,6 +105,7 @@ pub struct EndpointConf {
|
|||||||
features: Vec<ComputeFeature>,
|
features: Vec<ComputeFeature>,
|
||||||
cluster: Option<Cluster>,
|
cluster: Option<Cluster>,
|
||||||
compute_ctl_config: ComputeCtlConfig,
|
compute_ctl_config: ComputeCtlConfig,
|
||||||
|
privileged_role_name: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -201,6 +206,7 @@ impl ComputeControlPlane {
|
|||||||
grpc: bool,
|
grpc: bool,
|
||||||
skip_pg_catalog_updates: bool,
|
skip_pg_catalog_updates: bool,
|
||||||
drop_subscriptions_before_start: bool,
|
drop_subscriptions_before_start: bool,
|
||||||
|
privileged_role_name: Option<String>,
|
||||||
) -> Result<Arc<Endpoint>> {
|
) -> Result<Arc<Endpoint>> {
|
||||||
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
|
let pg_port = pg_port.unwrap_or_else(|| self.get_port());
|
||||||
let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
|
let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
|
||||||
@@ -238,6 +244,7 @@ impl ComputeControlPlane {
|
|||||||
features: vec![],
|
features: vec![],
|
||||||
cluster: None,
|
cluster: None,
|
||||||
compute_ctl_config: compute_ctl_config.clone(),
|
compute_ctl_config: compute_ctl_config.clone(),
|
||||||
|
privileged_role_name: privileged_role_name.clone(),
|
||||||
});
|
});
|
||||||
|
|
||||||
ep.create_endpoint_dir()?;
|
ep.create_endpoint_dir()?;
|
||||||
@@ -259,6 +266,7 @@ impl ComputeControlPlane {
|
|||||||
features: vec![],
|
features: vec![],
|
||||||
cluster: None,
|
cluster: None,
|
||||||
compute_ctl_config,
|
compute_ctl_config,
|
||||||
|
privileged_role_name,
|
||||||
})?,
|
})?,
|
||||||
)?;
|
)?;
|
||||||
std::fs::write(
|
std::fs::write(
|
||||||
@@ -334,6 +342,9 @@ pub struct Endpoint {
|
|||||||
|
|
||||||
/// The compute_ctl config for the endpoint's compute.
|
/// The compute_ctl config for the endpoint's compute.
|
||||||
compute_ctl_config: ComputeCtlConfig,
|
compute_ctl_config: ComputeCtlConfig,
|
||||||
|
|
||||||
|
/// The name of the privileged role for the endpoint.
|
||||||
|
privileged_role_name: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq)]
|
#[derive(PartialEq, Eq)]
|
||||||
@@ -384,7 +395,6 @@ pub struct EndpointStartArgs {
|
|||||||
pub safekeepers: Vec<NodeId>,
|
pub safekeepers: Vec<NodeId>,
|
||||||
pub pageserver_conninfo: PageserverConnectionInfo,
|
pub pageserver_conninfo: PageserverConnectionInfo,
|
||||||
pub remote_ext_base_url: Option<String>,
|
pub remote_ext_base_url: Option<String>,
|
||||||
pub shard_stripe_size: usize,
|
|
||||||
pub create_test_user: bool,
|
pub create_test_user: bool,
|
||||||
pub start_timeout: Duration,
|
pub start_timeout: Duration,
|
||||||
pub autoprewarm: bool,
|
pub autoprewarm: bool,
|
||||||
@@ -434,6 +444,7 @@ impl Endpoint {
|
|||||||
features: conf.features,
|
features: conf.features,
|
||||||
cluster: conf.cluster,
|
cluster: conf.cluster,
|
||||||
compute_ctl_config: conf.compute_ctl_config,
|
compute_ctl_config: conf.compute_ctl_config,
|
||||||
|
privileged_role_name: conf.privileged_role_name,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -466,7 +477,7 @@ impl Endpoint {
|
|||||||
conf.append("max_connections", "100");
|
conf.append("max_connections", "100");
|
||||||
conf.append("wal_level", "logical");
|
conf.append("wal_level", "logical");
|
||||||
// wal_sender_timeout is the maximum time to wait for WAL replication.
|
// wal_sender_timeout is the maximum time to wait for WAL replication.
|
||||||
// It also defines how often the walreciever will send a feedback message to the wal sender.
|
// It also defines how often the walreceiver will send a feedback message to the wal sender.
|
||||||
conf.append("wal_sender_timeout", "5s");
|
conf.append("wal_sender_timeout", "5s");
|
||||||
conf.append("listen_addresses", &self.pg_address.ip().to_string());
|
conf.append("listen_addresses", &self.pg_address.ip().to_string());
|
||||||
conf.append("port", &self.pg_address.port().to_string());
|
conf.append("port", &self.pg_address.port().to_string());
|
||||||
@@ -715,6 +726,46 @@ impl Endpoint {
|
|||||||
remote_extensions = None;
|
remote_extensions = None;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// For the sake of backwards-compatibility, also fill in 'pageserver_connstring'
|
||||||
|
//
|
||||||
|
// XXX: I believe this is not really needed, except to make
|
||||||
|
// test_forward_compatibility happy.
|
||||||
|
//
|
||||||
|
// Use a closure so that we can conviniently return None in the middle of the
|
||||||
|
// loop.
|
||||||
|
let pageserver_connstring = (|| {
|
||||||
|
let num_shards = if args.pageserver_conninfo.shard_count.is_unsharded() {
|
||||||
|
1
|
||||||
|
} else {
|
||||||
|
args.pageserver_conninfo.shard_count.0
|
||||||
|
};
|
||||||
|
let mut connstrings = Vec::new();
|
||||||
|
for shard_no in 0..num_shards {
|
||||||
|
let shard_index = ShardIndex {
|
||||||
|
shard_count: args.pageserver_conninfo.shard_count,
|
||||||
|
shard_number: ShardNumber(shard_no),
|
||||||
|
};
|
||||||
|
let shard = args
|
||||||
|
.pageserver_conninfo
|
||||||
|
.shards
|
||||||
|
.get(&shard_index)
|
||||||
|
.expect(&format!(
|
||||||
|
"shard {} not found in pageserver_connection_info",
|
||||||
|
shard_index
|
||||||
|
));
|
||||||
|
let pageserver = shard
|
||||||
|
.pageservers
|
||||||
|
.first()
|
||||||
|
.expect("must have at least one pageserver");
|
||||||
|
if let Some(libpq_url) = &pageserver.libpq_url {
|
||||||
|
connstrings.push(libpq_url.clone());
|
||||||
|
} else {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(connstrings.join(","))
|
||||||
|
})();
|
||||||
|
|
||||||
// Create config file
|
// Create config file
|
||||||
let config = {
|
let config = {
|
||||||
let mut spec = ComputeSpec {
|
let mut spec = ComputeSpec {
|
||||||
@@ -759,13 +810,14 @@ impl Endpoint {
|
|||||||
branch_id: None,
|
branch_id: None,
|
||||||
endpoint_id: Some(self.endpoint_id.clone()),
|
endpoint_id: Some(self.endpoint_id.clone()),
|
||||||
mode: self.mode,
|
mode: self.mode,
|
||||||
pageserver_connection_info: Some(args.pageserver_conninfo),
|
pageserver_connection_info: Some(args.pageserver_conninfo.clone()),
|
||||||
|
pageserver_connstring,
|
||||||
safekeepers_generation: args.safekeepers_generation.map(|g| g.into_inner()),
|
safekeepers_generation: args.safekeepers_generation.map(|g| g.into_inner()),
|
||||||
safekeeper_connstrings,
|
safekeeper_connstrings,
|
||||||
storage_auth_token: args.auth_token.clone(),
|
storage_auth_token: args.auth_token.clone(),
|
||||||
remote_extensions,
|
remote_extensions,
|
||||||
pgbouncer_settings: None,
|
pgbouncer_settings: None,
|
||||||
shard_stripe_size: Some(args.shard_stripe_size),
|
shard_stripe_size: args.pageserver_conninfo.stripe_size, // redundant with pageserver_connection_info.stripe_size
|
||||||
local_proxy_config: None,
|
local_proxy_config: None,
|
||||||
reconfigure_concurrency: self.reconfigure_concurrency,
|
reconfigure_concurrency: self.reconfigure_concurrency,
|
||||||
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
|
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
|
||||||
@@ -861,6 +913,10 @@ impl Endpoint {
|
|||||||
cmd.arg("--dev");
|
cmd.arg("--dev");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(privileged_role_name) = self.privileged_role_name.clone() {
|
||||||
|
cmd.args(["--privileged-role-name", &privileged_role_name]);
|
||||||
|
}
|
||||||
|
|
||||||
let child = cmd.spawn()?;
|
let child = cmd.spawn()?;
|
||||||
// set up a scopeguard to kill & wait for the child in case we panic or bail below
|
// set up a scopeguard to kill & wait for the child in case we panic or bail below
|
||||||
let child = scopeguard::guard(child, |mut child| {
|
let child = scopeguard::guard(child, |mut child| {
|
||||||
@@ -914,7 +970,8 @@ impl Endpoint {
|
|||||||
ComputeStatus::Empty
|
ComputeStatus::Empty
|
||||||
| ComputeStatus::ConfigurationPending
|
| ComputeStatus::ConfigurationPending
|
||||||
| ComputeStatus::Configuration
|
| ComputeStatus::Configuration
|
||||||
| ComputeStatus::TerminationPending { .. }
|
| ComputeStatus::TerminationPendingFast
|
||||||
|
| ComputeStatus::TerminationPendingImmediate
|
||||||
| ComputeStatus::Terminated => {
|
| ComputeStatus::Terminated => {
|
||||||
bail!("unexpected compute status: {:?}", state.status)
|
bail!("unexpected compute status: {:?}", state.status)
|
||||||
}
|
}
|
||||||
@@ -972,8 +1029,7 @@ impl Endpoint {
|
|||||||
|
|
||||||
pub async fn reconfigure(
|
pub async fn reconfigure(
|
||||||
&self,
|
&self,
|
||||||
pageserver_conninfo: Option<PageserverConnectionInfo>,
|
pageserver_conninfo: Option<&PageserverConnectionInfo>,
|
||||||
stripe_size: Option<ShardStripeSize>,
|
|
||||||
safekeepers: Option<Vec<NodeId>>,
|
safekeepers: Option<Vec<NodeId>>,
|
||||||
safekeeper_generation: Option<SafekeeperGeneration>,
|
safekeeper_generation: Option<SafekeeperGeneration>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@@ -995,10 +1051,8 @@ impl Endpoint {
|
|||||||
!pageserver_conninfo.shards.is_empty(),
|
!pageserver_conninfo.shards.is_empty(),
|
||||||
"no pageservers provided"
|
"no pageservers provided"
|
||||||
);
|
);
|
||||||
spec.pageserver_connection_info = Some(pageserver_conninfo);
|
spec.pageserver_connection_info = Some(pageserver_conninfo.clone());
|
||||||
}
|
spec.shard_stripe_size = pageserver_conninfo.stripe_size;
|
||||||
if stripe_size.is_some() {
|
|
||||||
spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If safekeepers are not specified, don't change them.
|
// If safekeepers are not specified, don't change them.
|
||||||
@@ -1047,11 +1101,9 @@ impl Endpoint {
|
|||||||
|
|
||||||
pub async fn reconfigure_pageservers(
|
pub async fn reconfigure_pageservers(
|
||||||
&self,
|
&self,
|
||||||
pageservers: PageserverConnectionInfo,
|
pageservers: &PageserverConnectionInfo,
|
||||||
stripe_size: Option<ShardStripeSize>,
|
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
self.reconfigure(Some(pageservers), stripe_size, None, None)
|
self.reconfigure(Some(pageservers), None, None).await
|
||||||
.await
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn reconfigure_safekeepers(
|
pub async fn reconfigure_safekeepers(
|
||||||
@@ -1059,7 +1111,7 @@ impl Endpoint {
|
|||||||
safekeepers: Vec<NodeId>,
|
safekeepers: Vec<NodeId>,
|
||||||
generation: SafekeeperGeneration,
|
generation: SafekeeperGeneration,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
self.reconfigure(None, None, Some(safekeepers), Some(generation))
|
self.reconfigure(None, Some(safekeepers), Some(generation))
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1115,3 +1167,68 @@ impl Endpoint {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn pageserver_conf_to_shard_conn_info(
|
||||||
|
conf: &crate::local_env::PageServerConf,
|
||||||
|
) -> Result<PageserverShardConnectionInfo> {
|
||||||
|
let libpq_url = {
|
||||||
|
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
||||||
|
let port = port.unwrap_or(5432);
|
||||||
|
Some(format!("postgres://no_user@{host}:{port}"))
|
||||||
|
};
|
||||||
|
let grpc_url = if let Some(grpc_addr) = &conf.listen_grpc_addr {
|
||||||
|
let (host, port) = parse_host_port(grpc_addr)?;
|
||||||
|
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
|
||||||
|
Some(format!("grpc://no_user@{host}:{port}"))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
Ok(PageserverShardConnectionInfo {
|
||||||
|
id: Some(conf.id.to_string()),
|
||||||
|
libpq_url,
|
||||||
|
grpc_url,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn tenant_locate_response_to_conn_info(
|
||||||
|
response: &pageserver_api::controller_api::TenantLocateResponse,
|
||||||
|
) -> Result<PageserverConnectionInfo> {
|
||||||
|
let mut shards = HashMap::new();
|
||||||
|
for shard in response.shards.iter() {
|
||||||
|
tracing::info!("parsing {}", shard.listen_pg_addr);
|
||||||
|
let libpq_url = {
|
||||||
|
let host = &shard.listen_pg_addr;
|
||||||
|
let port = shard.listen_pg_port;
|
||||||
|
Some(format!("postgres://no_user@{host}:{port}"))
|
||||||
|
};
|
||||||
|
let grpc_url = if let Some(grpc_addr) = &shard.listen_grpc_addr {
|
||||||
|
let host = grpc_addr;
|
||||||
|
let port = shard.listen_grpc_port.expect("no gRPC port");
|
||||||
|
Some(format!("grpc://no_user@{host}:{port}"))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let shard_info = PageserverShardInfo {
|
||||||
|
pageservers: vec![PageserverShardConnectionInfo {
|
||||||
|
id: Some(shard.node_id.to_string()),
|
||||||
|
libpq_url,
|
||||||
|
grpc_url,
|
||||||
|
}],
|
||||||
|
};
|
||||||
|
|
||||||
|
shards.insert(shard.shard_id.to_index(), shard_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
let stripe_size = if response.shard_params.count.is_unsharded() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(response.shard_params.stripe_size.0)
|
||||||
|
};
|
||||||
|
Ok(PageserverConnectionInfo {
|
||||||
|
shard_count: response.shard_params.count,
|
||||||
|
stripe_size,
|
||||||
|
shards,
|
||||||
|
prefer_protocol: PageserverProtocol::default(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -217,6 +217,9 @@ pub struct NeonStorageControllerConf {
|
|||||||
pub posthog_config: Option<PostHogConfig>,
|
pub posthog_config: Option<PostHogConfig>,
|
||||||
|
|
||||||
pub kick_secondary_downloads: Option<bool>,
|
pub kick_secondary_downloads: Option<bool>,
|
||||||
|
|
||||||
|
#[serde(with = "humantime_serde")]
|
||||||
|
pub shard_split_request_timeout: Option<Duration>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NeonStorageControllerConf {
|
impl NeonStorageControllerConf {
|
||||||
@@ -250,6 +253,7 @@ impl Default for NeonStorageControllerConf {
|
|||||||
timeline_safekeeper_count: None,
|
timeline_safekeeper_count: None,
|
||||||
posthog_config: None,
|
posthog_config: None,
|
||||||
kick_secondary_downloads: None,
|
kick_secondary_downloads: None,
|
||||||
|
shard_split_request_timeout: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -303,7 +303,7 @@ impl PageServerNode {
|
|||||||
async fn start_node(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
async fn start_node(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
|
||||||
// TODO: using a thread here because start_process() is not async but we need to call check_status()
|
// TODO: using a thread here because start_process() is not async but we need to call check_status()
|
||||||
let datadir = self.repo_path();
|
let datadir = self.repo_path();
|
||||||
print!(
|
println!(
|
||||||
"Starting pageserver node {} at '{}' in {:?}, retrying for {:?}",
|
"Starting pageserver node {} at '{}' in {:?}, retrying for {:?}",
|
||||||
self.conf.id,
|
self.conf.id,
|
||||||
self.pg_connection_config.raw_address(),
|
self.pg_connection_config.raw_address(),
|
||||||
@@ -452,6 +452,12 @@ impl PageServerNode {
|
|||||||
.map(|x| x.parse::<usize>())
|
.map(|x| x.parse::<usize>())
|
||||||
.transpose()
|
.transpose()
|
||||||
.context("Failed to parse 'image_creation_threshold' as non zero integer")?,
|
.context("Failed to parse 'image_creation_threshold' as non zero integer")?,
|
||||||
|
// HADRON
|
||||||
|
image_layer_force_creation_period: settings
|
||||||
|
.remove("image_layer_force_creation_period")
|
||||||
|
.map(humantime::parse_duration)
|
||||||
|
.transpose()
|
||||||
|
.context("Failed to parse 'image_layer_force_creation_period' as duration")?,
|
||||||
image_layer_creation_check_threshold: settings
|
image_layer_creation_check_threshold: settings
|
||||||
.remove("image_layer_creation_check_threshold")
|
.remove("image_layer_creation_check_threshold")
|
||||||
.map(|x| x.parse::<u8>())
|
.map(|x| x.parse::<u8>())
|
||||||
|
|||||||
@@ -127,7 +127,7 @@ impl SafekeeperNode {
|
|||||||
extra_opts: &[String],
|
extra_opts: &[String],
|
||||||
retry_timeout: &Duration,
|
retry_timeout: &Duration,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
print!(
|
println!(
|
||||||
"Starting safekeeper at '{}' in '{}', retrying for {:?}",
|
"Starting safekeeper at '{}' in '{}', retrying for {:?}",
|
||||||
self.pg_connection_config.raw_address(),
|
self.pg_connection_config.raw_address(),
|
||||||
self.datadir_path().display(),
|
self.datadir_path().display(),
|
||||||
|
|||||||
@@ -648,6 +648,13 @@ impl StorageController {
|
|||||||
args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
|
args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(duration) = self.config.shard_split_request_timeout {
|
||||||
|
args.push(format!(
|
||||||
|
"--shard-split-request-timeout={}",
|
||||||
|
humantime::Duration::from(duration)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
let mut envs = vec![
|
let mut envs = vec![
|
||||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||||
@@ -660,7 +667,7 @@ impl StorageController {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("Starting storage controller");
|
println!("Starting storage controller at {scheme}://{host}:{listen_port}");
|
||||||
|
|
||||||
background_process::start_process(
|
background_process::start_process(
|
||||||
COMMAND,
|
COMMAND,
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ humantime.workspace = true
|
|||||||
pageserver_api.workspace = true
|
pageserver_api.workspace = true
|
||||||
pageserver_client.workspace = true
|
pageserver_client.workspace = true
|
||||||
reqwest.workspace = true
|
reqwest.workspace = true
|
||||||
|
safekeeper_api.workspace=true
|
||||||
serde_json = { workspace = true, features = ["raw_value"] }
|
serde_json = { workspace = true, features = ["raw_value"] }
|
||||||
storage_controller_client.workspace = true
|
storage_controller_client.workspace = true
|
||||||
tokio.workspace = true
|
tokio.workspace = true
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ use pageserver_api::controller_api::{
|
|||||||
PlacementPolicy, SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest,
|
PlacementPolicy, SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest,
|
||||||
ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,
|
ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,
|
||||||
SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
|
||||||
TenantShardMigrateRequest, TenantShardMigrateResponse,
|
TenantShardMigrateRequest, TenantShardMigrateResponse, TimelineSafekeeperMigrateRequest,
|
||||||
};
|
};
|
||||||
use pageserver_api::models::{
|
use pageserver_api::models::{
|
||||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, ShardParameters, TenantConfig,
|
EvictionPolicy, EvictionPolicyLayerAccessThreshold, ShardParameters, TenantConfig,
|
||||||
@@ -21,6 +21,7 @@ use pageserver_api::models::{
|
|||||||
use pageserver_api::shard::{ShardStripeSize, TenantShardId};
|
use pageserver_api::shard::{ShardStripeSize, TenantShardId};
|
||||||
use pageserver_client::mgmt_api::{self};
|
use pageserver_client::mgmt_api::{self};
|
||||||
use reqwest::{Certificate, Method, StatusCode, Url};
|
use reqwest::{Certificate, Method, StatusCode, Url};
|
||||||
|
use safekeeper_api::models::TimelineLocateResponse;
|
||||||
use storage_controller_client::control_api::Client;
|
use storage_controller_client::control_api::Client;
|
||||||
use utils::id::{NodeId, TenantId, TimelineId};
|
use utils::id::{NodeId, TenantId, TimelineId};
|
||||||
|
|
||||||
@@ -75,6 +76,12 @@ enum Command {
|
|||||||
NodeStartDelete {
|
NodeStartDelete {
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
node_id: NodeId,
|
node_id: NodeId,
|
||||||
|
/// When `force` is true, skip waiting for shards to prewarm during migration.
|
||||||
|
/// This can significantly speed up node deletion since prewarming all shards
|
||||||
|
/// can take considerable time, but may result in slower initial access to
|
||||||
|
/// migrated shards until they warm up naturally.
|
||||||
|
#[arg(long)]
|
||||||
|
force: bool,
|
||||||
},
|
},
|
||||||
/// Cancel deletion of the specified pageserver and wait for `timeout`
|
/// Cancel deletion of the specified pageserver and wait for `timeout`
|
||||||
/// for the operation to be canceled. May be retried.
|
/// for the operation to be canceled. May be retried.
|
||||||
@@ -279,6 +286,23 @@ enum Command {
|
|||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
concurrency: Option<usize>,
|
concurrency: Option<usize>,
|
||||||
},
|
},
|
||||||
|
/// Locate safekeepers for a timeline from the storcon DB.
|
||||||
|
TimelineLocate {
|
||||||
|
#[arg(long)]
|
||||||
|
tenant_id: TenantId,
|
||||||
|
#[arg(long)]
|
||||||
|
timeline_id: TimelineId,
|
||||||
|
},
|
||||||
|
/// Migrate a timeline to a new set of safekeepers
|
||||||
|
TimelineSafekeeperMigrate {
|
||||||
|
#[arg(long)]
|
||||||
|
tenant_id: TenantId,
|
||||||
|
#[arg(long)]
|
||||||
|
timeline_id: TimelineId,
|
||||||
|
/// Example: --new-sk-set 1,2,3
|
||||||
|
#[arg(long, required = true, value_delimiter = ',')]
|
||||||
|
new_sk_set: Vec<NodeId>,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Parser)]
|
#[derive(Parser)]
|
||||||
@@ -458,6 +482,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
listen_http_port,
|
listen_http_port,
|
||||||
listen_https_port,
|
listen_https_port,
|
||||||
availability_zone_id: AvailabilityZone(availability_zone_id),
|
availability_zone_id: AvailabilityZone(availability_zone_id),
|
||||||
|
node_ip_addr: None,
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
@@ -933,13 +958,14 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
.dispatch::<(), ()>(Method::DELETE, format!("control/v1/node/{node_id}"), None)
|
.dispatch::<(), ()>(Method::DELETE, format!("control/v1/node/{node_id}"), None)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
Command::NodeStartDelete { node_id } => {
|
Command::NodeStartDelete { node_id, force } => {
|
||||||
|
let query = if force {
|
||||||
|
format!("control/v1/node/{node_id}/delete?force=true")
|
||||||
|
} else {
|
||||||
|
format!("control/v1/node/{node_id}/delete")
|
||||||
|
};
|
||||||
storcon_client
|
storcon_client
|
||||||
.dispatch::<(), ()>(
|
.dispatch::<(), ()>(Method::PUT, query, None)
|
||||||
Method::PUT,
|
|
||||||
format!("control/v1/node/{node_id}/delete"),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
.await?;
|
.await?;
|
||||||
println!("Delete started for {node_id}");
|
println!("Delete started for {node_id}");
|
||||||
}
|
}
|
||||||
@@ -1324,7 +1350,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
concurrency,
|
concurrency,
|
||||||
} => {
|
} => {
|
||||||
let mut path = format!(
|
let mut path = format!(
|
||||||
"/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
|
"v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
|
||||||
);
|
);
|
||||||
|
|
||||||
if let Some(c) = concurrency {
|
if let Some(c) = concurrency {
|
||||||
@@ -1335,6 +1361,41 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
.dispatch::<(), ()>(Method::POST, path, None)
|
.dispatch::<(), ()>(Method::POST, path, None)
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
|
Command::TimelineLocate {
|
||||||
|
tenant_id,
|
||||||
|
timeline_id,
|
||||||
|
} => {
|
||||||
|
let path = format!("debug/v1/tenant/{tenant_id}/timeline/{timeline_id}/locate");
|
||||||
|
|
||||||
|
let resp = storcon_client
|
||||||
|
.dispatch::<(), TimelineLocateResponse>(Method::GET, path, None)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let sk_set = resp.sk_set.iter().map(|id| id.0 as i64).collect::<Vec<_>>();
|
||||||
|
let new_sk_set = resp
|
||||||
|
.new_sk_set
|
||||||
|
.as_ref()
|
||||||
|
.map(|ids| ids.iter().map(|id| id.0 as i64).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
println!("generation = {}", resp.generation);
|
||||||
|
println!("sk_set = {sk_set:?}");
|
||||||
|
println!("new_sk_set = {new_sk_set:?}");
|
||||||
|
}
|
||||||
|
Command::TimelineSafekeeperMigrate {
|
||||||
|
tenant_id,
|
||||||
|
timeline_id,
|
||||||
|
new_sk_set,
|
||||||
|
} => {
|
||||||
|
let path = format!("v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate");
|
||||||
|
|
||||||
|
storcon_client
|
||||||
|
.dispatch::<_, ()>(
|
||||||
|
Method::POST,
|
||||||
|
path,
|
||||||
|
Some(TimelineSafekeeperMigrateRequest { new_sk_set }),
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -54,14 +54,16 @@ else
|
|||||||
printf '%s\n' "${result}" | jq .
|
printf '%s\n' "${result}" | jq .
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Check if a timeline present"
|
if [[ "${RUN_PARALLEL:-false}" != "true" ]]; then
|
||||||
PARAMS=(
|
echo "Check if a timeline present"
|
||||||
-X GET
|
PARAMS=(
|
||||||
-H "Content-Type: application/json"
|
-X GET
|
||||||
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
|
-H "Content-Type: application/json"
|
||||||
)
|
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
|
||||||
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
|
)
|
||||||
if [[ -z "${timeline_id}" || "${timeline_id}" = null ]]; then
|
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
|
||||||
|
fi
|
||||||
|
if [[ -z "${timeline_id:-}" || "${timeline_id:-}" = null ]]; then
|
||||||
generate_id timeline_id
|
generate_id timeline_id
|
||||||
PARAMS=(
|
PARAMS=(
|
||||||
-sbf
|
-sbf
|
||||||
|
|||||||
@@ -142,7 +142,7 @@ services:
|
|||||||
- "storage_broker"
|
- "storage_broker"
|
||||||
- "--listen-addr=0.0.0.0:50051"
|
- "--listen-addr=0.0.0.0:50051"
|
||||||
|
|
||||||
compute:
|
compute1:
|
||||||
restart: always
|
restart: always
|
||||||
build:
|
build:
|
||||||
context: ./compute_wrapper/
|
context: ./compute_wrapper/
|
||||||
@@ -152,6 +152,7 @@ services:
|
|||||||
- TAG=${COMPUTE_TAG:-${TAG:-latest}}
|
- TAG=${COMPUTE_TAG:-${TAG:-latest}}
|
||||||
- http_proxy=${http_proxy:-}
|
- http_proxy=${http_proxy:-}
|
||||||
- https_proxy=${https_proxy:-}
|
- https_proxy=${https_proxy:-}
|
||||||
|
image: built-compute
|
||||||
environment:
|
environment:
|
||||||
- PG_VERSION=${PG_VERSION:-16}
|
- PG_VERSION=${PG_VERSION:-16}
|
||||||
- TENANT_ID=${TENANT_ID:-}
|
- TENANT_ID=${TENANT_ID:-}
|
||||||
@@ -166,6 +167,11 @@ services:
|
|||||||
- 3080:3080 # http endpoints
|
- 3080:3080 # http endpoints
|
||||||
entrypoint:
|
entrypoint:
|
||||||
- "/shell/compute.sh"
|
- "/shell/compute.sh"
|
||||||
|
# Ad an alias for compute1 for compatibility
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
aliases:
|
||||||
|
- compute
|
||||||
depends_on:
|
depends_on:
|
||||||
- safekeeper1
|
- safekeeper1
|
||||||
- safekeeper2
|
- safekeeper2
|
||||||
@@ -174,15 +180,20 @@ services:
|
|||||||
|
|
||||||
compute_is_ready:
|
compute_is_ready:
|
||||||
image: postgres:latest
|
image: postgres:latest
|
||||||
|
environment:
|
||||||
|
- PARALLEL_COMPUTES=1
|
||||||
entrypoint:
|
entrypoint:
|
||||||
- "/bin/bash"
|
- "/bin/sh"
|
||||||
- "-c"
|
- "-c"
|
||||||
command:
|
command:
|
||||||
- "until pg_isready -h compute -p 55433 -U cloud_admin ; do
|
- "for i in $(seq 1 $${PARALLEL_COMPUTES}); do
|
||||||
echo 'Waiting to start compute...' && sleep 1;
|
until pg_isready -h compute$$i -p 55433 -U cloud_admin ; do
|
||||||
done"
|
sleep 1;
|
||||||
|
done;
|
||||||
|
done;
|
||||||
|
echo All computes are started"
|
||||||
depends_on:
|
depends_on:
|
||||||
- compute
|
- compute1
|
||||||
|
|
||||||
neon-test-extensions:
|
neon-test-extensions:
|
||||||
profiles: ["test-extensions"]
|
profiles: ["test-extensions"]
|
||||||
@@ -196,4 +207,4 @@ services:
|
|||||||
command:
|
command:
|
||||||
- sleep 3600
|
- sleep 3600
|
||||||
depends_on:
|
depends_on:
|
||||||
- compute
|
- compute1
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
#!/bin/bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
# A basic test to ensure Docker images are built correctly.
|
# A basic test to ensure Docker images are built correctly.
|
||||||
# Build a wrapper around the compute, start all services and runs a simple SQL query.
|
# Build a wrapper around the compute, start all services and runs a simple SQL query.
|
||||||
@@ -13,9 +13,36 @@
|
|||||||
#
|
#
|
||||||
set -eux -o pipefail
|
set -eux -o pipefail
|
||||||
|
|
||||||
|
cd "$(dirname "${0}")"
|
||||||
export COMPOSE_FILE='docker-compose.yml'
|
export COMPOSE_FILE='docker-compose.yml'
|
||||||
export COMPOSE_PROFILES=test-extensions
|
export COMPOSE_PROFILES=test-extensions
|
||||||
cd "$(dirname "${0}")"
|
export PARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}
|
||||||
|
READY_MESSAGE="All computes are started"
|
||||||
|
COMPUTES=()
|
||||||
|
for i in $(seq 1 "${PARALLEL_COMPUTES}"); do
|
||||||
|
COMPUTES+=("compute${i}")
|
||||||
|
done
|
||||||
|
CURRENT_TMPDIR=$(mktemp -d)
|
||||||
|
trap 'rm -rf ${CURRENT_TMPDIR} docker-compose-parallel.yml' EXIT
|
||||||
|
if [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then
|
||||||
|
export COMPOSE_FILE=docker-compose-parallel.yml
|
||||||
|
cp docker-compose.yml docker-compose-parallel.yml
|
||||||
|
# Replace the environment variable PARALLEL_COMPUTES with the actual value
|
||||||
|
yq eval -i ".services.compute_is_ready.environment |= map(select(. | test(\"^PARALLEL_COMPUTES=\") | not)) + [\"PARALLEL_COMPUTES=${PARALLEL_COMPUTES}\"]" ${COMPOSE_FILE}
|
||||||
|
for i in $(seq 2 "${PARALLEL_COMPUTES}"); do
|
||||||
|
# Duplicate compute1 as compute${i} for parallel execution
|
||||||
|
yq eval -i ".services.compute${i} = .services.compute1" ${COMPOSE_FILE}
|
||||||
|
# We don't need these sections, so delete them
|
||||||
|
yq eval -i "(del .services.compute${i}.build) | (del .services.compute${i}.ports) | (del .services.compute${i}.networks)" ${COMPOSE_FILE}
|
||||||
|
# Let the compute 1 be the only dependence
|
||||||
|
yq eval -i ".services.compute${i}.depends_on = [\"compute1\"]" ${COMPOSE_FILE}
|
||||||
|
# Set RUN_PARALLEL=true for compute2. They will generate tenant_id and timeline_id to avoid using the same as other computes
|
||||||
|
yq eval -i ".services.compute${i}.environment += [\"RUN_PARALLEL=true\"]" ${COMPOSE_FILE}
|
||||||
|
# Remove TENANT_ID and TIMELINE_ID from the environment variables of the generated computes
|
||||||
|
# They will create new TENANT_ID and TIMELINE_ID anyway.
|
||||||
|
yq eval -i ".services.compute${i}.environment |= map(select(. | (test(\"^TENANT_ID=\") or test(\"^TIMELINE_ID=\")) | not))" ${COMPOSE_FILE}
|
||||||
|
done
|
||||||
|
fi
|
||||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
|
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
|
||||||
|
|
||||||
function cleanup() {
|
function cleanup() {
|
||||||
@@ -27,11 +54,11 @@ function cleanup() {
|
|||||||
|
|
||||||
for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
||||||
pg_version=${pg_version/v/}
|
pg_version=${pg_version/v/}
|
||||||
echo "clean up containers if exists"
|
echo "clean up containers if exist"
|
||||||
cleanup
|
cleanup
|
||||||
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
|
PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
|
||||||
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull --build -d
|
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose build compute1
|
||||||
|
PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull -d
|
||||||
echo "wait until the compute is ready. timeout after 60s. "
|
echo "wait until the compute is ready. timeout after 60s. "
|
||||||
cnt=0
|
cnt=0
|
||||||
while sleep 3; do
|
while sleep 3; do
|
||||||
@@ -41,45 +68,50 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
|
|||||||
echo "timeout before the compute is ready."
|
echo "timeout before the compute is ready."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if docker compose logs "compute_is_ready" | grep -q "accepting connections"; then
|
if docker compose logs compute_is_ready | grep -q "${READY_MESSAGE}"; then
|
||||||
echo "OK. The compute is ready to connect."
|
echo "OK. The compute is ready to connect."
|
||||||
echo "execute simple queries."
|
echo "execute simple queries."
|
||||||
docker compose exec compute /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
|
for compute in "${COMPUTES[@]}"; do
|
||||||
|
docker compose exec "${compute}" /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
|
||||||
|
done
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
if [[ ${pg_version} -ge 16 ]]; then
|
if [[ ${pg_version} -ge 16 ]]; then
|
||||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
mkdir "${CURRENT_TMPDIR}"/{pg_hint_plan-src,file_fdw,postgis-src}
|
||||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${CURRENT_TMPDIR}/postgis-src/test"
|
||||||
echo Adding dummy config
|
docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${CURRENT_TMPDIR}/postgis-src/00-regress-install"
|
||||||
docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${CURRENT_TMPDIR}/pg_hint_plan-src/data"
|
||||||
# Prepare for the PostGIS test
|
docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${CURRENT_TMPDIR}/file_fdw/data"
|
||||||
docker compose exec compute mkdir -p /tmp/pgis_reg/pgis_reg_tmp
|
|
||||||
TMPDIR=$(mktemp -d)
|
for compute in "${COMPUTES[@]}"; do
|
||||||
docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${TMPDIR}"
|
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||||
docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${TMPDIR}"
|
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||||
docker compose exec compute mkdir -p /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
|
echo Adding dummy config on "${compute}"
|
||||||
docker compose cp "${TMPDIR}/test" compute:/ext-src/postgis-src/raster/test
|
docker compose exec "${compute}" touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||||
docker compose cp "${TMPDIR}/00-regress-install" compute:/ext-src/postgis-src/regress
|
# Prepare for the PostGIS test
|
||||||
rm -rf "${TMPDIR}"
|
docker compose exec "${compute}" mkdir -p /tmp/pgis_reg/pgis_reg_tmp /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
|
||||||
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
|
docker compose cp "${CURRENT_TMPDIR}/postgis-src/test" "${compute}":/ext-src/postgis-src/raster/test
|
||||||
TMPDIR=$(mktemp -d)
|
docker compose cp "${CURRENT_TMPDIR}/postgis-src/00-regress-install" "${compute}":/ext-src/postgis-src/regress
|
||||||
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
|
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
|
||||||
docker compose cp "${TMPDIR}/data" compute:/ext-src/pg_hint_plan-src/
|
docker compose cp "${CURRENT_TMPDIR}/pg_hint_plan-src/data" "${compute}":/ext-src/pg_hint_plan-src/
|
||||||
rm -rf "${TMPDIR}"
|
# The following block does the same for the contrib/file_fdw test
|
||||||
# The following block does the same for the contrib/file_fdw test
|
docker compose cp "${CURRENT_TMPDIR}/file_fdw/data" "${compute}":/postgres/contrib/file_fdw/data
|
||||||
TMPDIR=$(mktemp -d)
|
done
|
||||||
docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${TMPDIR}/data"
|
|
||||||
docker compose cp "${TMPDIR}/data" compute:/postgres/contrib/file_fdw/data
|
|
||||||
rm -rf "${TMPDIR}"
|
|
||||||
# Apply patches
|
# Apply patches
|
||||||
docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
|
docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
|
||||||
# We are running tests now
|
# We are running tests now
|
||||||
rm -f testout.txt testout_contrib.txt
|
rm -f testout.txt testout_contrib.txt
|
||||||
|
# We want to run the longest tests first to better utilize parallelization and reduce overall test time.
|
||||||
|
# Tests listed in the RUN_FIRST variable will be run before others.
|
||||||
|
# If parallelization is not used, this environment variable will be ignored.
|
||||||
|
|
||||||
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
|
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
|
||||||
|
-e RUN_FIRST=hll-src,postgis-src,pgtap-src -e PARALLEL_COMPUTES="${PARALLEL_COMPUTES}" \
|
||||||
neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
|
neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
|
||||||
docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
|
docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
|
||||||
|
-e PARALLEL_COMPUTES="${PARALLEL_COMPUTES}" \
|
||||||
neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
|
neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
|
||||||
if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then
|
if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then
|
||||||
CONTRIB_FAILED=
|
CONTRIB_FAILED=
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
#!/bin/bash
|
#!/usr/bin/env bash
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
if [[ -v BENCHMARK_CONNSTR ]]; then
|
if [[ -v BENCHMARK_CONNSTR ]]; then
|
||||||
@@ -26,8 +26,9 @@ if [[ -v BENCHMARK_CONNSTR ]]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
REGULAR_USER=false
|
REGULAR_USER=false
|
||||||
while getopts r arg; do
|
PARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}
|
||||||
case $arg in
|
while getopts pr arg; do
|
||||||
|
case ${arg} in
|
||||||
r)
|
r)
|
||||||
REGULAR_USER=true
|
REGULAR_USER=true
|
||||||
shift $((OPTIND-1))
|
shift $((OPTIND-1))
|
||||||
@@ -41,26 +42,49 @@ extdir=${1}
|
|||||||
|
|
||||||
cd "${extdir}" || exit 2
|
cd "${extdir}" || exit 2
|
||||||
FAILED=
|
FAILED=
|
||||||
LIST=$( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
|
export FAILED_FILE=/tmp/failed
|
||||||
for d in ${LIST}; do
|
rm -f ${FAILED_FILE}
|
||||||
[ -d "${d}" ] || continue
|
mapfile -t LIST < <( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
|
||||||
if ! psql -w -c "select 1" >/dev/null; then
|
if [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then
|
||||||
FAILED="${d} ${FAILED}"
|
# Avoid errors if RUN_FIRST is not defined
|
||||||
break
|
RUN_FIRST=${RUN_FIRST:-}
|
||||||
fi
|
# Move entries listed in the RUN_FIRST variable to the beginning
|
||||||
if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then
|
ORDERED_LIST=$(printf "%s\n" "${LIST[@]}" | grep -x -Ff <(echo -e "${RUN_FIRST//,/$'\n'}"); printf "%s\n" "${LIST[@]}" | grep -vx -Ff <(echo -e "${RUN_FIRST//,/$'\n'}"))
|
||||||
"${d}/regular-test.sh" || FAILED="${d} ${FAILED}"
|
parallel -j"${PARALLEL_COMPUTES}" "[[ -d {} ]] || exit 0
|
||||||
continue
|
export PGHOST=compute{%}
|
||||||
fi
|
if ! psql -c 'select 1'>/dev/null; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo Running on \${PGHOST}
|
||||||
|
if [[ -f ${extdir}/{}/neon-test.sh ]]; then
|
||||||
|
echo Running from script
|
||||||
|
${extdir}/{}/neon-test.sh || echo {} >> ${FAILED_FILE};
|
||||||
|
else
|
||||||
|
echo Running using make;
|
||||||
|
USE_PGXS=1 make -C {} installcheck || echo {} >> ${FAILED_FILE};
|
||||||
|
fi" ::: ${ORDERED_LIST}
|
||||||
|
[[ ! -f ${FAILED_FILE} ]] && exit 0
|
||||||
|
else
|
||||||
|
for d in "${LIST[@]}"; do
|
||||||
|
[ -d "${d}" ] || continue
|
||||||
|
if ! psql -w -c "select 1" >/dev/null; then
|
||||||
|
FAILED="${d} ${FAILED}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then
|
||||||
|
"${d}/regular-test.sh" || FAILED="${d} ${FAILED}"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -f "${d}/neon-test.sh" ]; then
|
if [ -f "${d}/neon-test.sh" ]; then
|
||||||
"${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
|
"${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
|
||||||
else
|
else
|
||||||
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
|
USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
[ -z "${FAILED}" ] && exit 0
|
[[ -z ${FAILED} ]] && exit 0
|
||||||
for d in ${FAILED}; do
|
fi
|
||||||
|
for d in ${FAILED} $([[ ! -f ${FAILED_FILE} ]] || cat ${FAILED_FILE}); do
|
||||||
cat "$(find $d -name regression.diffs)"
|
cat "$(find $d -name regression.diffs)"
|
||||||
done
|
done
|
||||||
for postgis_diff in /tmp/pgis_reg/*_diff; do
|
for postgis_diff in /tmp/pgis_reg/*_diff; do
|
||||||
@@ -68,4 +92,5 @@ for postgis_diff in /tmp/pgis_reg/*_diff; do
|
|||||||
cat "${postgis_diff}"
|
cat "${postgis_diff}"
|
||||||
done
|
done
|
||||||
echo "${FAILED}"
|
echo "${FAILED}"
|
||||||
|
cat ${FAILED_FILE}
|
||||||
exit 1
|
exit 1
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
#!/bin/bash
|
#!/usr/bin/env bash
|
||||||
set -eux -o pipefail
|
set -eux -o pipefail
|
||||||
cd "$(dirname "${0}")"
|
cd "$(dirname "${0}")"
|
||||||
# Takes a variable name as argument. The result is stored in that variable.
|
# Takes a variable name as argument. The result is stored in that variable.
|
||||||
@@ -60,8 +60,8 @@ function check_timeline() {
|
|||||||
# Restarts the compute node with the required compute tag and timeline.
|
# Restarts the compute node with the required compute tag and timeline.
|
||||||
# Accepts the tag for the compute node and the timeline as parameters.
|
# Accepts the tag for the compute node and the timeline as parameters.
|
||||||
function restart_compute() {
|
function restart_compute() {
|
||||||
docker compose down compute compute_is_ready
|
docker compose down compute1 compute_is_ready
|
||||||
COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready
|
COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute1 compute_is_ready
|
||||||
wait_for_ready
|
wait_for_ready
|
||||||
check_timeline ${2}
|
check_timeline ${2}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -129,9 +129,10 @@ segment to bootstrap the WAL writing, but it doesn't contain the checkpoint reco
|
|||||||
changes in xlog.c, to allow starting the compute node without reading the last checkpoint record
|
changes in xlog.c, to allow starting the compute node without reading the last checkpoint record
|
||||||
from WAL.
|
from WAL.
|
||||||
|
|
||||||
This includes code to read the `zenith.signal` file, which tells the startup code the LSN to start
|
This includes code to read the `neon.signal` (also `zenith.signal`) file, which tells the startup
|
||||||
at. When the `zenith.signal` file is present, the startup uses that LSN instead of the last
|
code the LSN to start at. When the `neon.signal` file is present, the startup uses that LSN
|
||||||
checkpoint's LSN. The system is known to be consistent at that LSN, without any WAL redo.
|
instead of the last checkpoint's LSN. The system is known to be consistent at that LSN, without
|
||||||
|
any WAL redo.
|
||||||
|
|
||||||
|
|
||||||
### How to get rid of the patch
|
### How to get rid of the patch
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ CLI examples:
|
|||||||
* AWS S3 : `env AWS_ACCESS_KEY_ID='SOMEKEYAAAAASADSAH*#' AWS_SECRET_ACCESS_KEY='SOMEsEcReTsd292v' ${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/'}"`
|
* AWS S3 : `env AWS_ACCESS_KEY_ID='SOMEKEYAAAAASADSAH*#' AWS_SECRET_ACCESS_KEY='SOMEsEcReTsd292v' ${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/'}"`
|
||||||
|
|
||||||
For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
|
For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
|
||||||
For local S3 installations, refer to the their documentation for name format and credentials.
|
For local S3 installations, refer to their documentation for name format and credentials.
|
||||||
|
|
||||||
Similar to other pageserver settings, toml config file can be used to configure either of the storages as backup targets.
|
Similar to other pageserver settings, toml config file can be used to configure either of the storages as backup targets.
|
||||||
Required sections are:
|
Required sections are:
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ In our case consensus leader is compute (walproposer), and we don't want to wake
|
|||||||
up all computes for the change. Neither we want to fully reimplement the leader
|
up all computes for the change. Neither we want to fully reimplement the leader
|
||||||
logic second time outside compute. Because of that the proposed algorithm relies
|
logic second time outside compute. Because of that the proposed algorithm relies
|
||||||
for issuing configurations on the external fault tolerant (distributed) strongly
|
for issuing configurations on the external fault tolerant (distributed) strongly
|
||||||
consisent storage with simple API: CAS (compare-and-swap) on the single key.
|
consistent storage with simple API: CAS (compare-and-swap) on the single key.
|
||||||
Properly configured postgres suits this.
|
Properly configured postgres suits this.
|
||||||
|
|
||||||
In the system consensus is implemented at the timeline level, so algorithm below
|
In the system consensus is implemented at the timeline level, so algorithm below
|
||||||
@@ -34,7 +34,7 @@ A configuration is
|
|||||||
|
|
||||||
```
|
```
|
||||||
struct Configuration {
|
struct Configuration {
|
||||||
generation: Generation, // a number uniquely identifying configuration
|
generation: SafekeeperGeneration, // a number uniquely identifying configuration
|
||||||
sk_set: Vec<NodeId>, // current safekeeper set
|
sk_set: Vec<NodeId>, // current safekeeper set
|
||||||
new_sk_set: Optional<Vec<NodeId>>,
|
new_sk_set: Optional<Vec<NodeId>>,
|
||||||
}
|
}
|
||||||
@@ -81,11 +81,11 @@ configuration generation in them is less than its current one. Namely, it
|
|||||||
refuses to vote, to truncate WAL in `handle_elected` and to accept WAL. In
|
refuses to vote, to truncate WAL in `handle_elected` and to accept WAL. In
|
||||||
response it sends its current configuration generation to let walproposer know.
|
response it sends its current configuration generation to let walproposer know.
|
||||||
|
|
||||||
Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
|
Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/membership`
|
||||||
accepting `Configuration`. Safekeeper switches to the given conf it is higher than its
|
accepting `Configuration`. Safekeeper switches to the given conf if it is higher than its
|
||||||
current one and ignores it otherwise. In any case it replies with
|
current one and ignores it otherwise. In any case it replies with
|
||||||
```
|
```
|
||||||
struct ConfigurationSwitchResponse {
|
struct TimelineMembershipSwitchResponse {
|
||||||
conf: Configuration,
|
conf: Configuration,
|
||||||
term: Term,
|
term: Term,
|
||||||
last_log_term: Term,
|
last_log_term: Term,
|
||||||
@@ -108,7 +108,7 @@ establishes this configuration as its own and moves to voting.
|
|||||||
It should stop talking to safekeepers not listed in the configuration at this
|
It should stop talking to safekeepers not listed in the configuration at this
|
||||||
point, though it is not unsafe to continue doing so.
|
point, though it is not unsafe to continue doing so.
|
||||||
|
|
||||||
To be elected it must receive votes from both majorites if `new_sk_set` is present.
|
To be elected it must receive votes from both majorities if `new_sk_set` is present.
|
||||||
Similarly, to commit WAL it must receive flush acknowledge from both majorities.
|
Similarly, to commit WAL it must receive flush acknowledge from both majorities.
|
||||||
|
|
||||||
If walproposer hears from safekeeper configuration higher than his own (i.e.
|
If walproposer hears from safekeeper configuration higher than his own (i.e.
|
||||||
@@ -130,7 +130,7 @@ storage are reachable.
|
|||||||
1) Fetch current timeline configuration from the configuration storage.
|
1) Fetch current timeline configuration from the configuration storage.
|
||||||
2) If it is already joint one and `new_set` is different from `desired_set`
|
2) If it is already joint one and `new_set` is different from `desired_set`
|
||||||
refuse to change. However, assign join conf to (in memory) var
|
refuse to change. However, assign join conf to (in memory) var
|
||||||
`join_conf` and proceed to step 4 to finish the ongoing change.
|
`joint_conf` and proceed to step 4 to finish the ongoing change.
|
||||||
3) Else, create joint `joint_conf: Configuration`: increment current conf number
|
3) Else, create joint `joint_conf: Configuration`: increment current conf number
|
||||||
`n` and put `desired_set` to `new_sk_set`. Persist it in the configuration
|
`n` and put `desired_set` to `new_sk_set`. Persist it in the configuration
|
||||||
storage by doing CAS on the current generation: change happens only if
|
storage by doing CAS on the current generation: change happens only if
|
||||||
@@ -161,11 +161,11 @@ storage are reachable.
|
|||||||
because `pull_timeline` already includes it and plus additionally would be
|
because `pull_timeline` already includes it and plus additionally would be
|
||||||
broadcast by compute. More importantly, we may proceed to the next step
|
broadcast by compute. More importantly, we may proceed to the next step
|
||||||
only when `<last_log_term, flush_lsn>` on the majority of the new set reached
|
only when `<last_log_term, flush_lsn>` on the majority of the new set reached
|
||||||
`sync_position`. Similarly, on the happy path no waiting is not needed because
|
`sync_position`. Similarly, on the happy path no waiting is needed because
|
||||||
`pull_timeline` already includes it. However, we should double
|
`pull_timeline` already includes it. However, we should double
|
||||||
check to be safe. For example, timeline could have been created earlier e.g.
|
check to be safe. For example, timeline could have been created earlier e.g.
|
||||||
manually or after try-to-migrate, abort, try-to-migrate-again sequence.
|
manually or after try-to-migrate, abort, try-to-migrate-again sequence.
|
||||||
7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
|
7) Create `new_conf: Configuration` incrementing `joint_conf` generation and having new
|
||||||
safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
|
safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
|
||||||
storage under one more CAS.
|
storage under one more CAS.
|
||||||
8) Call `PUT` `configuration` on safekeepers from the new set,
|
8) Call `PUT` `configuration` on safekeepers from the new set,
|
||||||
@@ -178,12 +178,12 @@ spec of it.
|
|||||||
|
|
||||||
Description above focuses on safety. To make the flow practical and live, here a few more
|
Description above focuses on safety. To make the flow practical and live, here a few more
|
||||||
considerations.
|
considerations.
|
||||||
1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
|
1) It makes sense to ping new set to ensure we are migrating to live node(s) before
|
||||||
step 3.
|
step 3.
|
||||||
2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
|
2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
|
||||||
it is safe to rollback to the old conf with one more CAS.
|
it is safe to rollback to the old conf with one more CAS.
|
||||||
3) On step 4 timeline might be already created on members of the new set for various reasons;
|
3) On step 4 timeline might be already created on members of the new set for various reasons;
|
||||||
the simplest is the procedure restart. There are more complicated scenarious like mentioned
|
the simplest is the procedure restart. There are more complicated scenarios like mentioned
|
||||||
in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
|
in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
|
||||||
generations, so seems simpler to treat existing timeline as success. However, this also
|
generations, so seems simpler to treat existing timeline as success. However, this also
|
||||||
has a disadvantage: you might imagine an surpassingly unlikely schedule where condition in
|
has a disadvantage: you might imagine an surpassingly unlikely schedule where condition in
|
||||||
@@ -192,7 +192,7 @@ considerations.
|
|||||||
4) In the end timeline should be locally deleted on the safekeeper(s) which are
|
4) In the end timeline should be locally deleted on the safekeeper(s) which are
|
||||||
in the old set but not in the new one, unless they are unreachable. To be
|
in the old set but not in the new one, unless they are unreachable. To be
|
||||||
safe this also should be done under generation number (deletion proceeds only if
|
safe this also should be done under generation number (deletion proceeds only if
|
||||||
current configuration is <= than one in request and safekeeper is not memeber of it).
|
current configuration is <= than one in request and safekeeper is not member of it).
|
||||||
5) If current conf fetched on step 1 is already not joint and members equal to `desired_set`,
|
5) If current conf fetched on step 1 is already not joint and members equal to `desired_set`,
|
||||||
jump to step 7, using it as `new_conf`.
|
jump to step 7, using it as `new_conf`.
|
||||||
|
|
||||||
@@ -261,14 +261,14 @@ Timeline (branch) creation in cplane should call storage_controller POST
|
|||||||
Response should be augmented with `safekeepers_generation` and `safekeepers`
|
Response should be augmented with `safekeepers_generation` and `safekeepers`
|
||||||
fields like described in `/notify-safekeepers` above. Initially (currently)
|
fields like described in `/notify-safekeepers` above. Initially (currently)
|
||||||
these fields may be absent; in this case cplane chooses safekeepers on its own
|
these fields may be absent; in this case cplane chooses safekeepers on its own
|
||||||
like it currently does. The call should be retried until succeeds.
|
like it currently does. The call should be retried until it succeeds.
|
||||||
|
|
||||||
Timeline deletion and tenant deletion in cplane should call appropriate
|
Timeline deletion and tenant deletion in cplane should call appropriate
|
||||||
storage_controller endpoints like it currently does for sharded tenants. The
|
storage_controller endpoints like it currently does for sharded tenants. The
|
||||||
calls should be retried until they succeed.
|
calls should be retried until they succeed.
|
||||||
|
|
||||||
When compute receives safekeepers list from control plane it needs to know the
|
When compute receives safekeeper list from control plane it needs to know the
|
||||||
generation to checked whether it should be updated (note that compute may get
|
generation to check whether it should be updated (note that compute may get
|
||||||
safekeeper list from either cplane or safekeepers). Currently `neon.safekeepers`
|
safekeeper list from either cplane or safekeepers). Currently `neon.safekeepers`
|
||||||
GUC is just a comma separates list of `host:port`. Let's prefix it with
|
GUC is just a comma separates list of `host:port`. Let's prefix it with
|
||||||
`g#<generation>:` to this end, so it will look like
|
`g#<generation>:` to this end, so it will look like
|
||||||
@@ -305,8 +305,8 @@ enum MigrationRequest {
|
|||||||
```
|
```
|
||||||
|
|
||||||
`FinishPending` requests to run the procedure to ensure state is clean: current
|
`FinishPending` requests to run the procedure to ensure state is clean: current
|
||||||
configuration is not joint and majority of safekeepers are aware of it, but do
|
configuration is not joint and the majority of safekeepers are aware of it, but do
|
||||||
not attempt to migrate anywhere. If current configuration fetched on step 1 is
|
not attempt to migrate anywhere. If the current configuration fetched on step 1 is
|
||||||
not joint it jumps to step 7. It should be run at startup for all timelines (but
|
not joint it jumps to step 7. It should be run at startup for all timelines (but
|
||||||
similarly, in the first version it is ok to trigger it manually).
|
similarly, in the first version it is ok to trigger it manually).
|
||||||
|
|
||||||
@@ -315,7 +315,7 @@ similarly, in the first version it is ok to trigger it manually).
|
|||||||
`safekeepers` table mirroring current `nodes` should be added, except that for
|
`safekeepers` table mirroring current `nodes` should be added, except that for
|
||||||
`scheduling_policy`: it is enough to have at least in the beginning only 3
|
`scheduling_policy`: it is enough to have at least in the beginning only 3
|
||||||
fields: 1) `active` 2) `paused` (initially means only not assign new tlis there
|
fields: 1) `active` 2) `paused` (initially means only not assign new tlis there
|
||||||
3) `decomissioned` (node is removed).
|
3) `decommissioned` (node is removed).
|
||||||
|
|
||||||
`timelines` table:
|
`timelines` table:
|
||||||
```
|
```
|
||||||
@@ -326,9 +326,10 @@ table! {
|
|||||||
tenant_id -> Varchar,
|
tenant_id -> Varchar,
|
||||||
start_lsn -> pg_lsn,
|
start_lsn -> pg_lsn,
|
||||||
generation -> Int4,
|
generation -> Int4,
|
||||||
sk_set -> Array<Int4>, // list of safekeeper ids
|
sk_set -> Array<Int8>, // list of safekeeper ids
|
||||||
new_sk_set -> Nullable<Array<Int8>>, // list of safekeeper ids, null if not joint conf
|
new_sk_set -> Nullable<Array<Int8>>, // list of safekeeper ids, null if not joint conf
|
||||||
cplane_notified_generation -> Int4,
|
cplane_notified_generation -> Int4,
|
||||||
|
sk_set_notified_generation -> Int4, // the generation a quorum of sk_set knows about
|
||||||
deleted_at -> Nullable<Timestamptz>,
|
deleted_at -> Nullable<Timestamptz>,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -338,13 +339,23 @@ table! {
|
|||||||
might also want to add ancestor_timeline_id to preserve the hierarchy, but for
|
might also want to add ancestor_timeline_id to preserve the hierarchy, but for
|
||||||
this RFC it is not needed.
|
this RFC it is not needed.
|
||||||
|
|
||||||
|
`cplane_notified_generation` and `sk_set_notified_generation` fields are used to
|
||||||
|
track the last stage of the algorithm, when we need to notify safekeeper set and cplane
|
||||||
|
with the final configuration after it's already committed to DB.
|
||||||
|
|
||||||
|
The timeline is up-to-date (no migration in progress) if `new_sk_set` is null and
|
||||||
|
`*_notified_generation` fields are up to date with `generation`.
|
||||||
|
|
||||||
|
It's possible to replace `*_notified_generation` with one boolean field `migration_completed`,
|
||||||
|
but for better observability it's nice to have them separately.
|
||||||
|
|
||||||
#### API
|
#### API
|
||||||
|
|
||||||
Node management is similar to pageserver:
|
Node management is similar to pageserver:
|
||||||
1) POST `/control/v1/safekeepers` inserts safekeeper.
|
1) POST `/control/v1/safekeeper` inserts safekeeper.
|
||||||
2) GET `/control/v1/safekeepers` lists safekeepers.
|
2) GET `/control/v1/safekeeper` lists safekeepers.
|
||||||
3) GET `/control/v1/safekeepers/:node_id` gets safekeeper.
|
3) GET `/control/v1/safekeeper/:node_id` gets safekeeper.
|
||||||
4) PUT `/control/v1/safekepers/:node_id/status` changes status to e.g.
|
4) PUT `/control/v1/safekeper/:node_id/scheduling_policy` changes status to e.g.
|
||||||
`offline` or `decomissioned`. Initially it is simpler not to schedule any
|
`offline` or `decomissioned`. Initially it is simpler not to schedule any
|
||||||
migrations here.
|
migrations here.
|
||||||
|
|
||||||
@@ -368,8 +379,8 @@ Migration API: the first version is the simplest and the most imperative:
|
|||||||
all timelines from one safekeeper to another. It accepts json
|
all timelines from one safekeeper to another. It accepts json
|
||||||
```
|
```
|
||||||
{
|
{
|
||||||
"src_sk": u32,
|
"src_sk": NodeId,
|
||||||
"dst_sk": u32,
|
"dst_sk": NodeId,
|
||||||
"limit": Optional<u32>,
|
"limit": Optional<u32>,
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
@@ -379,12 +390,15 @@ Returns list of scheduled requests.
|
|||||||
2) PUT `/control/v1/tenant/:tenant_id/timeline/:timeline_id/safekeeper_migrate` schedules `MigrationRequest`
|
2) PUT `/control/v1/tenant/:tenant_id/timeline/:timeline_id/safekeeper_migrate` schedules `MigrationRequest`
|
||||||
to move single timeline to given set of safekeepers:
|
to move single timeline to given set of safekeepers:
|
||||||
```
|
```
|
||||||
{
|
struct TimelineSafekeeperMigrateRequest {
|
||||||
"desired_set": Vec<u32>,
|
"new_sk_set": Vec<NodeId>,
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Returns scheduled request.
|
In the first version the handler migrates the timeline to `new_sk_set` synchronously.
|
||||||
|
Should be retried until success.
|
||||||
|
|
||||||
|
In the future we might change it to asynchronous API and return scheduled request.
|
||||||
|
|
||||||
Similar call should be added for the tenant.
|
Similar call should be added for the tenant.
|
||||||
|
|
||||||
@@ -434,6 +448,9 @@ table! {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
We load all pending ops from the table on startup into the memory.
|
||||||
|
The table is needed only to preserve the state between restarts.
|
||||||
|
|
||||||
`op_type` can be `include` (seed from peers and ensure generation is up to
|
`op_type` can be `include` (seed from peers and ensure generation is up to
|
||||||
date), `exclude` (remove locally) and `delete`. Field is actually not strictly
|
date), `exclude` (remove locally) and `delete`. Field is actually not strictly
|
||||||
needed as it can be computed from current configuration, but gives more explicit
|
needed as it can be computed from current configuration, but gives more explicit
|
||||||
@@ -474,7 +491,7 @@ actions must be idempotent. Now, a tricky point here is timeline start LSN. For
|
|||||||
the initial (tenant creation) call cplane doesn't know it. However, setting
|
the initial (tenant creation) call cplane doesn't know it. However, setting
|
||||||
start_lsn on safekeepers during creation is a good thing -- it provides a
|
start_lsn on safekeepers during creation is a good thing -- it provides a
|
||||||
guarantee that walproposer can always find a common point in WAL histories of
|
guarantee that walproposer can always find a common point in WAL histories of
|
||||||
safekeeper and its own, and so absense of it would be a clear sign of
|
safekeeper and its own, and so absence of it would be a clear sign of
|
||||||
corruption. The following sequence works:
|
corruption. The following sequence works:
|
||||||
1) Create timeline (or observe that it exists) on pageserver,
|
1) Create timeline (or observe that it exists) on pageserver,
|
||||||
figuring out last_record_lsn in response.
|
figuring out last_record_lsn in response.
|
||||||
@@ -497,11 +514,9 @@ corruption. The following sequence works:
|
|||||||
retries the call until 200 response.
|
retries the call until 200 response.
|
||||||
|
|
||||||
There is a small question how request handler (timeline creation in this
|
There is a small question how request handler (timeline creation in this
|
||||||
case) would interact with per sk reconciler. As always I prefer to do the
|
case) would interact with per sk reconciler. In the current implementation
|
||||||
simplest possible thing and here it seems to be just waking it up so it
|
we first persist the request in the DB, and then send an in-memory request
|
||||||
re-reads the db for work to do. Passing work in memory is faster, but
|
to each safekeeper reconciler to process it.
|
||||||
that shouldn't matter, and path to scan db for work will exist anyway,
|
|
||||||
simpler to reuse it.
|
|
||||||
|
|
||||||
For pg version / wal segment size: while we may persist them in `timelines`
|
For pg version / wal segment size: while we may persist them in `timelines`
|
||||||
table, it is not necessary as initial creation at step 3 can take them from
|
table, it is not necessary as initial creation at step 3 can take them from
|
||||||
@@ -509,30 +524,40 @@ pageserver or cplane creation call and later pull_timeline will carry them
|
|||||||
around.
|
around.
|
||||||
|
|
||||||
Timeline migration.
|
Timeline migration.
|
||||||
1) CAS to the db to create joint conf, and in the same transaction create
|
1) CAS to the db to create joint conf. Since this moment the migration is considered to be
|
||||||
`safekeeper_timeline_pending_ops` `include` entries to initialize new members
|
"in progress". We can detect all "in-progress" migrations looking into the database.
|
||||||
as well as deliver this conf to current ones; poke per sk reconcilers to work
|
2) Do steps 4-6 from the algorithm, including `pull_timeline` onto `new_sk_set`, update membership
|
||||||
on it. Also any conf change should also poke cplane notifier task(s).
|
configuration on all safekeepers, notify cplane, etc. All operations are idempotent,
|
||||||
2) Once it becomes possible per alg description above, get out of joint conf
|
so we don't need to persist anything in the database at this stage. If any errors occur,
|
||||||
with another CAS. Task should get wakeups from per sk reconcilers because
|
it's safe to retry or abort the migration.
|
||||||
conf switch is required for advancement; however retries should be sleep
|
3) Once it becomes possible per alg description above, get out of joint conf
|
||||||
based as well as LSN advancement might be needed, though in happy path
|
with another CAS. Also should insert `exclude` entries into `safekeeper_timeline_pending_ops`
|
||||||
it isn't. To see whether further transition is possible on wakup migration
|
in the same DB transaction. Adding `exclude` entries atomically is nesessary because after
|
||||||
executor polls safekeepers per the algorithm. CAS creating new conf with only
|
CAS we don't have the list of excluded safekeepers in the `timelines` table anymore, but we
|
||||||
new members should again insert entries to `safekeeper_timeline_pending_ops`
|
need to have them persisted somewhere in case the migration is interrupted right after the CAS.
|
||||||
to switch them there, as well as `exclude` rows to remove timeline from
|
4) Finish the migration. The final membership configuration is committed to the DB at this stage.
|
||||||
old members.
|
So, the migration can not be aborted anymore. But it can still be retried if the migration fails
|
||||||
|
past stage 3. To finish the migration we need to send the new membership configuration to
|
||||||
|
a new quorum of safekeepers, notify cplane with the new safekeeper list and schedule the `exclude`
|
||||||
|
requests to in-memory queue for safekeeper reconciler. If the algrorithm is retried, it's
|
||||||
|
possible that we have already committed `exclude` requests to DB, but didn't send them to
|
||||||
|
the in-memory queue. In this case we need to read them from `safekeeper_timeline_pending_ops`
|
||||||
|
because it's the only place where they are persistent. The fields `sk_set_notified_generation`
|
||||||
|
and `cplane_notified_generation` are updated after each step. The migration is considered
|
||||||
|
fully completed when they match the `generation` field.
|
||||||
|
|
||||||
|
In practice, we can report "success" after stage 3 and do the "finish" step in per-timeline
|
||||||
|
reconciler (if we implement it). But it's wise to at least try to finish them synchronously,
|
||||||
|
so the timeline is always in a "good state" and doesn't require an old quorum to commit
|
||||||
|
WAL after the migration reported "success".
|
||||||
|
|
||||||
Timeline deletion: just set `deleted_at` on the timeline row and insert
|
Timeline deletion: just set `deleted_at` on the timeline row and insert
|
||||||
`safekeeper_timeline_pending_ops` entries in the same xact, the rest is done by
|
`safekeeper_timeline_pending_ops` entries in the same xact, the rest is done by
|
||||||
per sk reconcilers.
|
per sk reconcilers.
|
||||||
|
|
||||||
When node is removed (set to `decomissioned`), `safekeeper_timeline_pending_ops`
|
When node is removed (set to `decommissioned`), `safekeeper_timeline_pending_ops`
|
||||||
for it must be cleared in the same transaction.
|
for it must be cleared in the same transaction.
|
||||||
|
|
||||||
One more task pool should infinitely retry notifying control plane about changed
|
|
||||||
safekeeper sets (trying making `cplane_notified_generation` equal `generation`).
|
|
||||||
|
|
||||||
#### Dealing with multiple instances of storage_controller
|
#### Dealing with multiple instances of storage_controller
|
||||||
|
|
||||||
Operations described above executed concurrently might create some errors but do
|
Operations described above executed concurrently might create some errors but do
|
||||||
@@ -541,7 +566,7 @@ of storage_controller it is fine to have it temporarily, e.g. during redeploy.
|
|||||||
|
|
||||||
To harden against some controller instance creating some work in
|
To harden against some controller instance creating some work in
|
||||||
`safekeeper_timeline_pending_ops` and then disappearing without anyone pickup up
|
`safekeeper_timeline_pending_ops` and then disappearing without anyone pickup up
|
||||||
the job per sk reconcilers apart from explicit wakups should scan for work
|
the job per sk reconcilers apart from explicit wakeups should scan for work
|
||||||
periodically. It is possible to remove that though if all db updates are
|
periodically. It is possible to remove that though if all db updates are
|
||||||
protected with leadership token/term -- then such scans are needed only after
|
protected with leadership token/term -- then such scans are needed only after
|
||||||
leadership is acquired.
|
leadership is acquired.
|
||||||
@@ -563,7 +588,7 @@ There should be following layers of tests:
|
|||||||
safekeeper communication and pull_timeline need to be mocked and main switch
|
safekeeper communication and pull_timeline need to be mocked and main switch
|
||||||
procedure wrapped to as a node (thread) in simulation tests, using these
|
procedure wrapped to as a node (thread) in simulation tests, using these
|
||||||
mocks. Test would inject migrations like it currently injects
|
mocks. Test would inject migrations like it currently injects
|
||||||
safekeeper/walproposer restars. Main assert is the same -- committed WAL must
|
safekeeper/walproposer restarts. Main assert is the same -- committed WAL must
|
||||||
not be lost.
|
not be lost.
|
||||||
|
|
||||||
3) Since simulation testing injects at relatively high level points (not
|
3) Since simulation testing injects at relatively high level points (not
|
||||||
@@ -613,7 +638,7 @@ Let's have the following implementation bits for gradual rollout:
|
|||||||
`notify-safekeepers`.
|
`notify-safekeepers`.
|
||||||
|
|
||||||
Then the rollout for a region would be:
|
Then the rollout for a region would be:
|
||||||
- Current situation: safekeepers are choosen by control_plane.
|
- Current situation: safekeepers are chosen by control_plane.
|
||||||
- We manually migrate some timelines, test moving them around.
|
- We manually migrate some timelines, test moving them around.
|
||||||
- Then we enable `--set-safekeepers` so that all new timelines
|
- Then we enable `--set-safekeepers` so that all new timelines
|
||||||
are on storage controller.
|
are on storage controller.
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ use utils::backoff::retry;
|
|||||||
pub fn app(state: Arc<Storage>) -> Router<()> {
|
pub fn app(state: Arc<Storage>) -> Router<()> {
|
||||||
use axum::routing::{delete as _delete, get as _get};
|
use axum::routing::{delete as _delete, get as _get};
|
||||||
let delete_prefix = _delete(delete_prefix);
|
let delete_prefix = _delete(delete_prefix);
|
||||||
|
// NB: On any changes do not forget to update the OpenAPI spec
|
||||||
|
// in /endpoint_storage/src/openapi_spec.yml.
|
||||||
Router::new()
|
Router::new()
|
||||||
.route(
|
.route(
|
||||||
"/{tenant_id}/{timeline_id}/{endpoint_id}/{*path}",
|
"/{tenant_id}/{timeline_id}/{endpoint_id}/{*path}",
|
||||||
|
|||||||
146
endpoint_storage/src/openapi_spec.yml
Normal file
146
endpoint_storage/src/openapi_spec.yml
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
openapi: "3.0.2"
|
||||||
|
info:
|
||||||
|
title: Endpoint Storage API
|
||||||
|
description: Endpoint Storage API
|
||||||
|
version: "1.0"
|
||||||
|
license:
|
||||||
|
name: "Apache"
|
||||||
|
url: https://github.com/neondatabase/neon/blob/main/LICENSE
|
||||||
|
servers:
|
||||||
|
- url: ""
|
||||||
|
paths:
|
||||||
|
/status:
|
||||||
|
description: Healthcheck endpoint
|
||||||
|
get:
|
||||||
|
description: Healthcheck
|
||||||
|
security: []
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: OK
|
||||||
|
|
||||||
|
/{tenant_id}/{timeline_id}/{endpoint_id}/{key}:
|
||||||
|
parameters:
|
||||||
|
- name: tenant_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: timeline_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: endpoint_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: key
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
get:
|
||||||
|
description: Get file from blob storage
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: "File stream from blob storage"
|
||||||
|
content:
|
||||||
|
application/octet-stream:
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
"400":
|
||||||
|
description: File was not found
|
||||||
|
"403":
|
||||||
|
description: JWT does not authorize request to this route
|
||||||
|
put:
|
||||||
|
description: Insert file into blob storage. If file exists, override it
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/octet-stream:
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: File was inserted successfully
|
||||||
|
"403":
|
||||||
|
description: JWT does not authorize request to this route
|
||||||
|
delete:
|
||||||
|
description: Delete file from blob storage
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: File was successfully deleted or not found
|
||||||
|
"403":
|
||||||
|
description: JWT does not authorize request to this route
|
||||||
|
|
||||||
|
/{tenant_id}/{timeline_id}/{endpoint_id}:
|
||||||
|
parameters:
|
||||||
|
- name: tenant_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: timeline_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: endpoint_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
delete:
|
||||||
|
description: Delete endpoint data from blob storage
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Endpoint data was deleted
|
||||||
|
"403":
|
||||||
|
description: JWT does not authorize request to this route
|
||||||
|
|
||||||
|
/{tenant_id}/{timeline_id}:
|
||||||
|
parameters:
|
||||||
|
- name: tenant_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: timeline_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
delete:
|
||||||
|
description: Delete timeline data from blob storage
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Timeline data was deleted
|
||||||
|
"403":
|
||||||
|
description: JWT does not authorize request to this route
|
||||||
|
|
||||||
|
/{tenant_id}:
|
||||||
|
parameters:
|
||||||
|
- name: tenant_id
|
||||||
|
in: path
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
delete:
|
||||||
|
description: Delete tenant data from blob storage
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: Tenant data was deleted
|
||||||
|
"403":
|
||||||
|
description: JWT does not authorize request to this route
|
||||||
|
|
||||||
|
components:
|
||||||
|
securitySchemes:
|
||||||
|
JWT:
|
||||||
|
type: http
|
||||||
|
scheme: bearer
|
||||||
|
bearerFormat: JWT
|
||||||
|
|
||||||
|
security:
|
||||||
|
- JWT: []
|
||||||
@@ -46,16 +46,45 @@ pub struct ExtensionInstallResponse {
|
|||||||
pub version: ExtVersion,
|
pub version: ExtVersion,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Default, Debug, Clone)]
|
/// Status of the LFC prewarm process. The same state machine is reused for
|
||||||
|
/// both autoprewarm (prewarm after compute/Postgres start using the previously
|
||||||
|
/// stored LFC state) and explicit prewarming via API.
|
||||||
|
#[derive(Serialize, Default, Debug, Clone, PartialEq)]
|
||||||
#[serde(tag = "status", rename_all = "snake_case")]
|
#[serde(tag = "status", rename_all = "snake_case")]
|
||||||
pub enum LfcPrewarmState {
|
pub enum LfcPrewarmState {
|
||||||
|
/// Default value when compute boots up.
|
||||||
#[default]
|
#[default]
|
||||||
NotPrewarmed,
|
NotPrewarmed,
|
||||||
|
/// Prewarming thread is active and loading pages into LFC.
|
||||||
Prewarming,
|
Prewarming,
|
||||||
|
/// We found requested LFC state in the endpoint storage and
|
||||||
|
/// completed prewarming successfully.
|
||||||
Completed,
|
Completed,
|
||||||
Failed {
|
/// Unexpected error happened during prewarming. Note, `Not Found 404`
|
||||||
error: String,
|
/// response from the endpoint storage is explicitly excluded here
|
||||||
},
|
/// because it can normally happen on the first compute start,
|
||||||
|
/// since LFC state is not available yet.
|
||||||
|
Failed { error: String },
|
||||||
|
/// We tried to fetch the corresponding LFC state from the endpoint storage,
|
||||||
|
/// but received `Not Found 404`. This should normally happen only during the
|
||||||
|
/// first endpoint start after creation with `autoprewarm: true`.
|
||||||
|
///
|
||||||
|
/// During the orchestrated prewarm via API, when a caller explicitly
|
||||||
|
/// provides the LFC state key to prewarm from, it's the caller responsibility
|
||||||
|
/// to handle this status as an error state in this case.
|
||||||
|
Skipped,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for LfcPrewarmState {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
LfcPrewarmState::NotPrewarmed => f.write_str("NotPrewarmed"),
|
||||||
|
LfcPrewarmState::Prewarming => f.write_str("Prewarming"),
|
||||||
|
LfcPrewarmState::Completed => f.write_str("Completed"),
|
||||||
|
LfcPrewarmState::Skipped => f.write_str("Skipped"),
|
||||||
|
LfcPrewarmState::Failed { error } => write!(f, "Error({error})"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Default, Debug, Clone, PartialEq)]
|
#[derive(Serialize, Default, Debug, Clone, PartialEq)]
|
||||||
@@ -70,6 +99,23 @@ pub enum LfcOffloadState {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||||
|
#[serde(tag = "status", rename_all = "snake_case")]
|
||||||
|
/// Response of /promote
|
||||||
|
pub enum PromoteState {
|
||||||
|
NotPromoted,
|
||||||
|
Completed,
|
||||||
|
Failed { error: String },
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Serialize, Default, Debug, Clone)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
/// Result of /safekeepers_lsn
|
||||||
|
pub struct SafekeepersLsn {
|
||||||
|
pub safekeepers: String,
|
||||||
|
pub wal_flush_lsn: utils::lsn::Lsn,
|
||||||
|
}
|
||||||
|
|
||||||
/// Response of the /status API
|
/// Response of the /status API
|
||||||
#[derive(Serialize, Debug, Deserialize)]
|
#[derive(Serialize, Debug, Deserialize)]
|
||||||
#[serde(rename_all = "snake_case")]
|
#[serde(rename_all = "snake_case")]
|
||||||
@@ -93,6 +139,15 @@ pub enum TerminateMode {
|
|||||||
Immediate,
|
Immediate,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<TerminateMode> for ComputeStatus {
|
||||||
|
fn from(mode: TerminateMode) -> Self {
|
||||||
|
match mode {
|
||||||
|
TerminateMode::Fast => ComputeStatus::TerminationPendingFast,
|
||||||
|
TerminateMode::Immediate => ComputeStatus::TerminationPendingImmediate,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
|
#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
|
||||||
#[serde(rename_all = "snake_case")]
|
#[serde(rename_all = "snake_case")]
|
||||||
pub enum ComputeStatus {
|
pub enum ComputeStatus {
|
||||||
@@ -113,7 +168,9 @@ pub enum ComputeStatus {
|
|||||||
// control-plane to terminate it.
|
// control-plane to terminate it.
|
||||||
Failed,
|
Failed,
|
||||||
// Termination requested
|
// Termination requested
|
||||||
TerminationPending { mode: TerminateMode },
|
TerminationPendingFast,
|
||||||
|
// Termination requested, without waiting 30s before returning from /terminate
|
||||||
|
TerminationPendingImmediate,
|
||||||
// Terminated Postgres
|
// Terminated Postgres
|
||||||
Terminated,
|
Terminated,
|
||||||
}
|
}
|
||||||
@@ -132,7 +189,10 @@ impl Display for ComputeStatus {
|
|||||||
ComputeStatus::Running => f.write_str("running"),
|
ComputeStatus::Running => f.write_str("running"),
|
||||||
ComputeStatus::Configuration => f.write_str("configuration"),
|
ComputeStatus::Configuration => f.write_str("configuration"),
|
||||||
ComputeStatus::Failed => f.write_str("failed"),
|
ComputeStatus::Failed => f.write_str("failed"),
|
||||||
ComputeStatus::TerminationPending { .. } => f.write_str("termination-pending"),
|
ComputeStatus::TerminationPendingFast => f.write_str("termination-pending-fast"),
|
||||||
|
ComputeStatus::TerminationPendingImmediate => {
|
||||||
|
f.write_str("termination-pending-immediate")
|
||||||
|
}
|
||||||
ComputeStatus::Terminated => f.write_str("terminated"),
|
ComputeStatus::Terminated => f.write_str("terminated"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ use serde::{Deserialize, Serialize};
|
|||||||
use url::Url;
|
use url::Url;
|
||||||
use utils::id::{TenantId, TimelineId};
|
use utils::id::{TenantId, TimelineId};
|
||||||
use utils::lsn::Lsn;
|
use utils::lsn::Lsn;
|
||||||
|
use utils::shard::{ShardCount, ShardIndex};
|
||||||
|
|
||||||
use crate::responses::TlsConfig;
|
use crate::responses::TlsConfig;
|
||||||
|
|
||||||
@@ -106,11 +107,18 @@ pub struct ComputeSpec {
|
|||||||
pub tenant_id: Option<TenantId>,
|
pub tenant_id: Option<TenantId>,
|
||||||
pub timeline_id: Option<TimelineId>,
|
pub timeline_id: Option<TimelineId>,
|
||||||
|
|
||||||
// Pageserver information can be passed in two different ways:
|
/// Pageserver information can be passed in three different ways:
|
||||||
// 1. Here
|
/// 1. Here in `pageserver_connection_info`
|
||||||
// 2. in cluster.settings. This is legacy, we are switching to method 1.
|
/// 2. In the `pageserver_connstring` field.
|
||||||
|
/// 3. in `cluster.settings`.
|
||||||
|
///
|
||||||
|
/// The goal is to use method 1. everywhere. But for backwards-compatibility with old
|
||||||
|
/// versions of the control plane, `compute_ctl` will check 2. and 3. if the
|
||||||
|
/// `pageserver_connection_info` field is missing.
|
||||||
pub pageserver_connection_info: Option<PageserverConnectionInfo>,
|
pub pageserver_connection_info: Option<PageserverConnectionInfo>,
|
||||||
|
|
||||||
|
pub pageserver_connstring: Option<String>,
|
||||||
|
|
||||||
// More neon ids that we expose to the compute_ctl
|
// More neon ids that we expose to the compute_ctl
|
||||||
// and to postgres as neon extension GUCs.
|
// and to postgres as neon extension GUCs.
|
||||||
pub project_id: Option<String>,
|
pub project_id: Option<String>,
|
||||||
@@ -145,7 +153,7 @@ pub struct ComputeSpec {
|
|||||||
|
|
||||||
// Stripe size for pageserver sharding, in pages
|
// Stripe size for pageserver sharding, in pages
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub shard_stripe_size: Option<usize>,
|
pub shard_stripe_size: Option<u32>,
|
||||||
|
|
||||||
/// Local Proxy configuration used for JWT authentication
|
/// Local Proxy configuration used for JWT authentication
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@@ -218,16 +226,28 @@ pub enum ComputeFeature {
|
|||||||
UnknownFeature,
|
UnknownFeature,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
|
#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
|
||||||
#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
|
|
||||||
pub struct PageserverConnectionInfo {
|
pub struct PageserverConnectionInfo {
|
||||||
pub shards: HashMap<u32, PageserverShardConnectionInfo>,
|
/// NB: 0 for unsharded tenants, 1 for sharded tenants with 1 shard, following storage
|
||||||
|
pub shard_count: ShardCount,
|
||||||
|
|
||||||
pub prefer_grpc: bool,
|
/// INVARIANT: null if shard_count is 0, otherwise non-null and immutable
|
||||||
|
pub stripe_size: Option<u32>,
|
||||||
|
|
||||||
|
pub shards: HashMap<ShardIndex, PageserverShardInfo>,
|
||||||
|
|
||||||
|
#[serde(default)]
|
||||||
|
pub prefer_protocol: PageserverProtocol,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, Deserialize, Serialize, Eq, PartialEq)]
|
#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
|
||||||
|
pub struct PageserverShardInfo {
|
||||||
|
pub pageservers: Vec<PageserverShardConnectionInfo>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
|
||||||
pub struct PageserverShardConnectionInfo {
|
pub struct PageserverShardConnectionInfo {
|
||||||
|
pub id: Option<String>,
|
||||||
pub libpq_url: Option<String>,
|
pub libpq_url: Option<String>,
|
||||||
pub grpc_url: Option<String>,
|
pub grpc_url: Option<String>,
|
||||||
}
|
}
|
||||||
@@ -465,13 +485,15 @@ pub struct JwksSettings {
|
|||||||
pub jwt_audience: Option<String>,
|
pub jwt_audience: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Protocol used to connect to a Pageserver. Parsed from the connstring scheme.
|
/// Protocol used to connect to a Pageserver.
|
||||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
|
||||||
pub enum PageserverProtocol {
|
pub enum PageserverProtocol {
|
||||||
/// The original protocol based on libpq and COPY. Uses postgresql:// or postgres:// scheme.
|
/// The original protocol based on libpq and COPY. Uses postgresql:// or postgres:// scheme.
|
||||||
#[default]
|
#[default]
|
||||||
|
#[serde(rename = "libpq")]
|
||||||
Libpq,
|
Libpq,
|
||||||
/// A newer, gRPC-based protocol. Uses grpc:// scheme.
|
/// A newer, gRPC-based protocol. Uses grpc:// scheme.
|
||||||
|
#[serde(rename = "grpc")]
|
||||||
Grpc,
|
Grpc,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ use tokio_stream::wrappers::ReceiverStream;
|
|||||||
use tokio_util::io::ReaderStream;
|
use tokio_util::io::ReaderStream;
|
||||||
use tracing::{Instrument, debug, info, info_span, warn};
|
use tracing::{Instrument, debug, info, info_span, warn};
|
||||||
use utils::auth::{AuthError, Claims, SwappableJwtAuth};
|
use utils::auth::{AuthError, Claims, SwappableJwtAuth};
|
||||||
|
use utils::metrics_collector::{METRICS_COLLECTOR, METRICS_STALE_MILLIS};
|
||||||
|
|
||||||
use crate::error::{ApiError, api_error_handler, route_error_handler};
|
use crate::error::{ApiError, api_error_handler, route_error_handler};
|
||||||
use crate::request::{get_query_param, parse_query_param};
|
use crate::request::{get_query_param, parse_query_param};
|
||||||
@@ -250,9 +251,28 @@ impl std::io::Write for ChannelWriter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
pub async fn prometheus_metrics_handler(
|
||||||
|
req: Request<Body>,
|
||||||
|
force_metric_collection_on_scrape: bool,
|
||||||
|
) -> Result<Response<Body>, ApiError> {
|
||||||
SERVE_METRICS_COUNT.inc();
|
SERVE_METRICS_COUNT.inc();
|
||||||
|
|
||||||
|
// HADRON
|
||||||
|
let requested_use_latest = parse_query_param(&req, "use_latest")?;
|
||||||
|
|
||||||
|
let use_latest = match requested_use_latest {
|
||||||
|
None => force_metric_collection_on_scrape,
|
||||||
|
Some(true) => true,
|
||||||
|
Some(false) => {
|
||||||
|
if force_metric_collection_on_scrape {
|
||||||
|
// We don't cache in this case
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
let started_at = std::time::Instant::now();
|
let started_at = std::time::Instant::now();
|
||||||
|
|
||||||
let (tx, rx) = mpsc::channel(1);
|
let (tx, rx) = mpsc::channel(1);
|
||||||
@@ -277,12 +297,18 @@ pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<
|
|||||||
|
|
||||||
let _span = span.entered();
|
let _span = span.entered();
|
||||||
|
|
||||||
let metrics = metrics::gather();
|
// HADRON
|
||||||
|
let collected = if use_latest {
|
||||||
|
// Skip caching the results if we always force metric collection on scrape.
|
||||||
|
METRICS_COLLECTOR.run_once(!force_metric_collection_on_scrape)
|
||||||
|
} else {
|
||||||
|
METRICS_COLLECTOR.last_collected()
|
||||||
|
};
|
||||||
|
|
||||||
let gathered_at = std::time::Instant::now();
|
let gathered_at = std::time::Instant::now();
|
||||||
|
|
||||||
let res = encoder
|
let res = encoder
|
||||||
.encode(&metrics, &mut writer)
|
.encode(&collected.metrics, &mut writer)
|
||||||
.and_then(|_| writer.flush().map_err(|e| e.into()));
|
.and_then(|_| writer.flush().map_err(|e| e.into()));
|
||||||
|
|
||||||
// this instant is not when we finally got the full response sent, sending is done by hyper
|
// this instant is not when we finally got the full response sent, sending is done by hyper
|
||||||
@@ -295,6 +321,10 @@ pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<
|
|||||||
let encoded_in = encoded_at - gathered_at - writer.wait_time();
|
let encoded_in = encoded_at - gathered_at - writer.wait_time();
|
||||||
let total = encoded_at - started_at;
|
let total = encoded_at - started_at;
|
||||||
|
|
||||||
|
// HADRON
|
||||||
|
let staleness_ms = (encoded_at - collected.collected_at).as_millis();
|
||||||
|
METRICS_STALE_MILLIS.set(staleness_ms as i64);
|
||||||
|
|
||||||
match res {
|
match res {
|
||||||
Ok(()) => {
|
Ok(()) => {
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
@@ -303,6 +333,7 @@ pub async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<
|
|||||||
spawning_ms = spawned_in.as_millis(),
|
spawning_ms = spawned_in.as_millis(),
|
||||||
collection_ms = collected_in.as_millis(),
|
collection_ms = collected_in.as_millis(),
|
||||||
encoding_ms = encoded_in.as_millis(),
|
encoding_ms = encoded_in.as_millis(),
|
||||||
|
stalenss_ms = staleness_ms,
|
||||||
"responded /metrics"
|
"responded /metrics"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,17 +41,35 @@ pub fn get_query_param<'a>(
|
|||||||
Some(q) => q,
|
Some(q) => q,
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
};
|
};
|
||||||
let mut values = url::form_urlencoded::parse(query.as_bytes())
|
let values = url::form_urlencoded::parse(query.as_bytes())
|
||||||
.filter_map(|(k, v)| if k == param_name { Some(v) } else { None })
|
.filter_map(|(k, v)| if k == param_name { Some(v) } else { None })
|
||||||
// we call .next() twice below. If it's None the first time, .fuse() ensures it's None afterwards
|
// we call .next() twice below. If it's None the first time, .fuse() ensures it's None afterwards
|
||||||
.fuse();
|
.fuse();
|
||||||
|
|
||||||
let value1 = values.next();
|
// Work around an issue with Alloy's pyroscope scrape where the "seconds"
|
||||||
if values.next().is_some() {
|
// parameter is added several times. https://github.com/grafana/alloy/issues/3026
|
||||||
return Err(ApiError::BadRequest(anyhow!(
|
// TODO: revert after Alloy is fixed.
|
||||||
"param {param_name} specified more than once"
|
let value1 = values
|
||||||
)));
|
.map(Ok)
|
||||||
}
|
.reduce(|acc, i| {
|
||||||
|
match acc {
|
||||||
|
Err(_) => acc,
|
||||||
|
|
||||||
|
// It's okay to have duplicates as along as they have the same value.
|
||||||
|
Ok(ref a) if a == &i.unwrap() => acc,
|
||||||
|
|
||||||
|
_ => Err(ApiError::BadRequest(anyhow!(
|
||||||
|
"param {param_name} specified more than once"
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.transpose()?;
|
||||||
|
// if values.next().is_some() {
|
||||||
|
// return Err(ApiError::BadRequest(anyhow!(
|
||||||
|
// "param {param_name} specified more than once"
|
||||||
|
// )));
|
||||||
|
// }
|
||||||
|
|
||||||
Ok(value1)
|
Ok(value1)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -92,3 +110,39 @@ pub async fn ensure_no_body(request: &mut Request<Body>) -> Result<(), ApiError>
|
|||||||
None => Ok(()),
|
None => Ok(()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_query_param_duplicate() {
|
||||||
|
let req = Request::builder()
|
||||||
|
.uri("http://localhost:12345/testuri?testparam=1")
|
||||||
|
.body(hyper::Body::empty())
|
||||||
|
.unwrap();
|
||||||
|
let value = get_query_param(&req, "testparam").unwrap();
|
||||||
|
assert_eq!(value.unwrap(), "1");
|
||||||
|
|
||||||
|
let req = Request::builder()
|
||||||
|
.uri("http://localhost:12345/testuri?testparam=1&testparam=1")
|
||||||
|
.body(hyper::Body::empty())
|
||||||
|
.unwrap();
|
||||||
|
let value = get_query_param(&req, "testparam").unwrap();
|
||||||
|
assert_eq!(value.unwrap(), "1");
|
||||||
|
|
||||||
|
let req = Request::builder()
|
||||||
|
.uri("http://localhost:12345/testuri")
|
||||||
|
.body(hyper::Body::empty())
|
||||||
|
.unwrap();
|
||||||
|
let value = get_query_param(&req, "testparam").unwrap();
|
||||||
|
assert!(value.is_none());
|
||||||
|
|
||||||
|
let req = Request::builder()
|
||||||
|
.uri("http://localhost:12345/testuri?testparam=1&testparam=2&testparam=3")
|
||||||
|
.body(hyper::Body::empty())
|
||||||
|
.unwrap();
|
||||||
|
let value = get_query_param(&req, "testparam");
|
||||||
|
assert!(value.is_err());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -4,12 +4,14 @@
|
|||||||
//! a default registry.
|
//! a default registry.
|
||||||
#![deny(clippy::undocumented_unsafe_blocks)]
|
#![deny(clippy::undocumented_unsafe_blocks)]
|
||||||
|
|
||||||
|
use std::sync::RwLock;
|
||||||
|
|
||||||
use measured::label::{LabelGroupSet, LabelGroupVisitor, LabelName, NoLabels};
|
use measured::label::{LabelGroupSet, LabelGroupVisitor, LabelName, NoLabels};
|
||||||
use measured::metric::counter::CounterState;
|
use measured::metric::counter::CounterState;
|
||||||
use measured::metric::gauge::GaugeState;
|
use measured::metric::gauge::GaugeState;
|
||||||
use measured::metric::group::Encoding;
|
use measured::metric::group::Encoding;
|
||||||
use measured::metric::name::{MetricName, MetricNameEncoder};
|
use measured::metric::name::{MetricName, MetricNameEncoder};
|
||||||
use measured::metric::{MetricEncoding, MetricFamilyEncoding};
|
use measured::metric::{MetricEncoding, MetricFamilyEncoding, MetricType};
|
||||||
use measured::{FixedCardinalityLabel, LabelGroup, MetricGroup};
|
use measured::{FixedCardinalityLabel, LabelGroup, MetricGroup};
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use prometheus::Registry;
|
use prometheus::Registry;
|
||||||
@@ -116,12 +118,52 @@ pub fn pow2_buckets(start: usize, end: usize) -> Vec<f64> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct InfoMetric<L: LabelGroup, M: MetricType = GaugeState> {
|
||||||
|
label: RwLock<L>,
|
||||||
|
metric: M,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<L: LabelGroup> InfoMetric<L> {
|
||||||
|
pub fn new(label: L) -> Self {
|
||||||
|
Self::with_metric(label, GaugeState::new(1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<L: LabelGroup, M: MetricType<Metadata = ()>> InfoMetric<L, M> {
|
||||||
|
pub fn with_metric(label: L, metric: M) -> Self {
|
||||||
|
Self {
|
||||||
|
label: RwLock::new(label),
|
||||||
|
metric,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_label(&self, label: L) {
|
||||||
|
*self.label.write().unwrap() = label;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<L, M, E> MetricFamilyEncoding<E> for InfoMetric<L, M>
|
||||||
|
where
|
||||||
|
L: LabelGroup,
|
||||||
|
M: MetricEncoding<E, Metadata = ()>,
|
||||||
|
E: Encoding,
|
||||||
|
{
|
||||||
|
fn collect_family_into(
|
||||||
|
&self,
|
||||||
|
name: impl measured::metric::name::MetricNameEncoder,
|
||||||
|
enc: &mut E,
|
||||||
|
) -> Result<(), E::Err> {
|
||||||
|
M::write_type(&name, enc)?;
|
||||||
|
self.metric
|
||||||
|
.collect_into(&(), &*self.label.read().unwrap(), name, enc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct BuildInfo {
|
pub struct BuildInfo {
|
||||||
pub revision: &'static str,
|
pub revision: &'static str,
|
||||||
pub build_tag: &'static str,
|
pub build_tag: &'static str,
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo: allow label group without the set
|
|
||||||
impl LabelGroup for BuildInfo {
|
impl LabelGroup for BuildInfo {
|
||||||
fn visit_values(&self, v: &mut impl LabelGroupVisitor) {
|
fn visit_values(&self, v: &mut impl LabelGroupVisitor) {
|
||||||
const REVISION: &LabelName = LabelName::from_str("revision");
|
const REVISION: &LabelName = LabelName::from_str("revision");
|
||||||
@@ -131,24 +173,6 @@ impl LabelGroup for BuildInfo {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Encoding> MetricFamilyEncoding<T> for BuildInfo
|
|
||||||
where
|
|
||||||
GaugeState: MetricEncoding<T>,
|
|
||||||
{
|
|
||||||
fn collect_family_into(
|
|
||||||
&self,
|
|
||||||
name: impl measured::metric::name::MetricNameEncoder,
|
|
||||||
enc: &mut T,
|
|
||||||
) -> Result<(), T::Err> {
|
|
||||||
enc.write_help(&name, "Build/version information")?;
|
|
||||||
GaugeState::write_type(&name, enc)?;
|
|
||||||
GaugeState {
|
|
||||||
count: std::sync::atomic::AtomicI64::new(1),
|
|
||||||
}
|
|
||||||
.collect_into(&(), self, name, enc)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(MetricGroup)]
|
#[derive(MetricGroup)]
|
||||||
#[metric(new(build_info: BuildInfo))]
|
#[metric(new(build_info: BuildInfo))]
|
||||||
pub struct NeonMetrics {
|
pub struct NeonMetrics {
|
||||||
@@ -165,8 +189,8 @@ pub struct NeonMetrics {
|
|||||||
#[derive(MetricGroup)]
|
#[derive(MetricGroup)]
|
||||||
#[metric(new(build_info: BuildInfo))]
|
#[metric(new(build_info: BuildInfo))]
|
||||||
pub struct LibMetrics {
|
pub struct LibMetrics {
|
||||||
#[metric(init = build_info)]
|
#[metric(init = InfoMetric::new(build_info))]
|
||||||
build_info: BuildInfo,
|
build_info: InfoMetric<BuildInfo>,
|
||||||
|
|
||||||
#[metric(flatten)]
|
#[metric(flatten)]
|
||||||
rusage: Rusage,
|
rusage: Rusage,
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
//!
|
//!
|
||||||
//! Concurrency is managed very simply: the entire map is guarded by one shared-memory RwLock.
|
//! Concurrency is managed very simply: the entire map is guarded by one shared-memory RwLock.
|
||||||
|
|
||||||
|
use std::fmt::Debug;
|
||||||
use std::hash::{BuildHasher, Hash};
|
use std::hash::{BuildHasher, Hash};
|
||||||
use std::mem::MaybeUninit;
|
use std::mem::MaybeUninit;
|
||||||
|
|
||||||
@@ -56,6 +57,22 @@ pub struct HashMapInit<'a, K, V, S = rustc_hash::FxBuildHasher> {
|
|||||||
num_buckets: u32,
|
num_buckets: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a, K, V, S> Debug for HashMapInit<'a, K, V, S>
|
||||||
|
where
|
||||||
|
K: Debug,
|
||||||
|
V: Debug,
|
||||||
|
{
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("HashMapInit")
|
||||||
|
.field("shmem_handle", &self.shmem_handle)
|
||||||
|
.field("shared_ptr", &self.shared_ptr)
|
||||||
|
.field("shared_size", &self.shared_size)
|
||||||
|
// .field("hasher", &self.hasher)
|
||||||
|
.field("num_buckets", &self.num_buckets)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// This is a per-process handle to a hash table that (possibly) lives in shared memory.
|
/// This is a per-process handle to a hash table that (possibly) lives in shared memory.
|
||||||
/// If a child process is launched with fork(), the child process should
|
/// If a child process is launched with fork(), the child process should
|
||||||
/// get its own HashMapAccess by calling HashMapInit::attach_writer/reader().
|
/// get its own HashMapAccess by calling HashMapInit::attach_writer/reader().
|
||||||
@@ -71,6 +88,20 @@ pub struct HashMapAccess<'a, K, V, S = rustc_hash::FxBuildHasher> {
|
|||||||
unsafe impl<K: Sync, V: Sync, S> Sync for HashMapAccess<'_, K, V, S> {}
|
unsafe impl<K: Sync, V: Sync, S> Sync for HashMapAccess<'_, K, V, S> {}
|
||||||
unsafe impl<K: Send, V: Send, S> Send for HashMapAccess<'_, K, V, S> {}
|
unsafe impl<K: Send, V: Send, S> Send for HashMapAccess<'_, K, V, S> {}
|
||||||
|
|
||||||
|
impl<'a, K, V, S> Debug for HashMapAccess<'a, K, V, S>
|
||||||
|
where
|
||||||
|
K: Debug,
|
||||||
|
V: Debug,
|
||||||
|
{
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("HashMapAccess")
|
||||||
|
.field("shmem_handle", &self.shmem_handle)
|
||||||
|
.field("shared_ptr", &self.shared_ptr)
|
||||||
|
// .field("hasher", &self.hasher)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
|
impl<'a, K: Clone + Hash + Eq, V, S> HashMapInit<'a, K, V, S> {
|
||||||
/// Change the 'hasher' used by the hash table.
|
/// Change the 'hasher' used by the hash table.
|
||||||
///
|
///
|
||||||
@@ -298,7 +329,7 @@ where
|
|||||||
|
|
||||||
/// Get a reference to the entry containing a key.
|
/// Get a reference to the entry containing a key.
|
||||||
///
|
///
|
||||||
/// NB: THis takes a write lock as there's no way to distinguish whether the intention
|
/// NB: This takes a write lock as there's no way to distinguish whether the intention
|
||||||
/// is to use the entry for reading or for writing in advance.
|
/// is to use the entry for reading or for writing in advance.
|
||||||
pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> {
|
pub fn entry(&self, key: K) -> Entry<'a, '_, K, V> {
|
||||||
let hash = self.get_hash_value(&key);
|
let hash = self.get_hash_value(&key);
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
//! Simple hash table with chaining.
|
//! Simple hash table with chaining.
|
||||||
|
|
||||||
|
use std::fmt::Debug;
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
use std::mem::MaybeUninit;
|
use std::mem::MaybeUninit;
|
||||||
|
|
||||||
@@ -17,6 +18,19 @@ pub(crate) struct Bucket<K, V> {
|
|||||||
pub(crate) inner: Option<(K, V)>,
|
pub(crate) inner: Option<(K, V)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<K, V> Debug for Bucket<K, V>
|
||||||
|
where
|
||||||
|
K: Debug,
|
||||||
|
V: Debug,
|
||||||
|
{
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("Bucket")
|
||||||
|
.field("next", &self.next)
|
||||||
|
.field("inner", &self.inner)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Core hash table implementation.
|
/// Core hash table implementation.
|
||||||
pub(crate) struct CoreHashMap<'a, K, V> {
|
pub(crate) struct CoreHashMap<'a, K, V> {
|
||||||
/// Dictionary used to map hashes to bucket indices.
|
/// Dictionary used to map hashes to bucket indices.
|
||||||
@@ -31,6 +45,22 @@ pub(crate) struct CoreHashMap<'a, K, V> {
|
|||||||
pub(crate) buckets_in_use: u32,
|
pub(crate) buckets_in_use: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a, K, V> Debug for CoreHashMap<'a, K, V>
|
||||||
|
where
|
||||||
|
K: Debug,
|
||||||
|
V: Debug,
|
||||||
|
{
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("CoreHashMap")
|
||||||
|
.field("dictionary", &self.dictionary)
|
||||||
|
.field("buckets", &self.buckets)
|
||||||
|
.field("free_head", &self.free_head)
|
||||||
|
.field("alloc_limit", &self.alloc_limit)
|
||||||
|
.field("buckets_in_use", &self.buckets_in_use)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Error for when there are no empty buckets left but one is needed.
|
/// Error for when there are no empty buckets left but one is needed.
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub struct FullError;
|
pub struct FullError;
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user