mirror of
https://github.com/neondatabase/neon.git
synced 2026-02-15 08:30:37 +00:00
Compare commits
1 Commits
problame/r
...
RFC_merged
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
efaf2c6663 |
@@ -1,2 +1,2 @@
|
||||
[profile.default]
|
||||
slow-timeout = { period = "60s", terminate-after = 3 }
|
||||
slow-timeout = { period = "20s", terminate-after = 3 }
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
!scripts/combine_control_files.py
|
||||
!scripts/ninstall.sh
|
||||
!vm-cgconfig.conf
|
||||
!docker-compose/run-tests.sh
|
||||
|
||||
# Directories
|
||||
!.cargo/
|
||||
@@ -18,13 +17,11 @@
|
||||
!libs/
|
||||
!neon_local/
|
||||
!pageserver/
|
||||
!patches/
|
||||
!pgxn/
|
||||
!proxy/
|
||||
!storage_scrubber/
|
||||
!s3_scrubber/
|
||||
!safekeeper/
|
||||
!storage_broker/
|
||||
!storage_controller/
|
||||
!trace/
|
||||
!vendor/postgres-*/
|
||||
!workspace_hack/
|
||||
|
||||
5
.github/actionlint.yml
vendored
5
.github/actionlint.yml
vendored
@@ -1,11 +1,12 @@
|
||||
self-hosted-runner:
|
||||
labels:
|
||||
- arm64
|
||||
- dev
|
||||
- gen3
|
||||
- large
|
||||
- large-arm64
|
||||
# Remove `macos-14` from the list after https://github.com/rhysd/actionlint/pull/392 is merged.
|
||||
- macos-14
|
||||
- small
|
||||
- small-arm64
|
||||
- us-east-2
|
||||
config-variables:
|
||||
- REMOTE_STORAGE_AZURE_CONTAINER
|
||||
|
||||
@@ -150,7 +150,7 @@ runs:
|
||||
|
||||
# Use aws s3 cp (instead of aws s3 sync) to keep files from previous runs to make old URLs work,
|
||||
# and to keep files on the host to upload them to the database
|
||||
time s5cmd --log error cp "${WORKDIR}/report/*" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}/"
|
||||
time aws s3 cp --recursive --only-show-errors "${WORKDIR}/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
|
||||
|
||||
# Generate redirect
|
||||
cat <<EOF > ${WORKDIR}/index.html
|
||||
|
||||
@@ -3,14 +3,14 @@ description: 'Create Branch using API'
|
||||
|
||||
inputs:
|
||||
api_key:
|
||||
description: 'Neon API key'
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
project_id:
|
||||
description: 'ID of the Project to create Branch in'
|
||||
desctiption: 'ID of the Project to create Branch in'
|
||||
required: true
|
||||
api_host:
|
||||
description: 'Neon API host'
|
||||
default: console-stage.neon.build
|
||||
desctiption: 'Neon API host'
|
||||
default: console.stage.neon.tech
|
||||
outputs:
|
||||
dsn:
|
||||
description: 'Created Branch DSN (for main database)'
|
||||
|
||||
10
.github/actions/neon-branch-delete/action.yml
vendored
10
.github/actions/neon-branch-delete/action.yml
vendored
@@ -3,17 +3,17 @@ description: 'Delete Branch using API'
|
||||
|
||||
inputs:
|
||||
api_key:
|
||||
description: 'Neon API key'
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
project_id:
|
||||
description: 'ID of the Project which should be deleted'
|
||||
desctiption: 'ID of the Project which should be deleted'
|
||||
required: true
|
||||
branch_id:
|
||||
description: 'ID of the branch to delete'
|
||||
desctiption: 'ID of the branch to delete'
|
||||
required: true
|
||||
api_host:
|
||||
description: 'Neon API host'
|
||||
default: console-stage.neon.build
|
||||
desctiption: 'Neon API host'
|
||||
default: console.stage.neon.tech
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
|
||||
16
.github/actions/neon-project-create/action.yml
vendored
16
.github/actions/neon-project-create/action.yml
vendored
@@ -3,22 +3,22 @@ description: 'Create Neon Project using API'
|
||||
|
||||
inputs:
|
||||
api_key:
|
||||
description: 'Neon API key'
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
region_id:
|
||||
description: 'Region ID, if not set the project will be created in the default region'
|
||||
desctiption: 'Region ID, if not set the project will be created in the default region'
|
||||
default: aws-us-east-2
|
||||
postgres_version:
|
||||
description: 'Postgres version; default is 15'
|
||||
default: '15'
|
||||
desctiption: 'Postgres version; default is 15'
|
||||
default: 15
|
||||
api_host:
|
||||
description: 'Neon API host'
|
||||
default: console-stage.neon.build
|
||||
desctiption: 'Neon API host'
|
||||
default: console.stage.neon.tech
|
||||
provisioner:
|
||||
description: 'k8s-pod or k8s-neonvm'
|
||||
desctiption: 'k8s-pod or k8s-neonvm'
|
||||
default: 'k8s-pod'
|
||||
compute_units:
|
||||
description: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
|
||||
desctiption: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
|
||||
default: '[1, 1]'
|
||||
|
||||
outputs:
|
||||
|
||||
@@ -3,14 +3,14 @@ description: 'Delete Neon Project using API'
|
||||
|
||||
inputs:
|
||||
api_key:
|
||||
description: 'Neon API key'
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
project_id:
|
||||
description: 'ID of the Project to delete'
|
||||
desctiption: 'ID of the Project to delete'
|
||||
required: true
|
||||
api_host:
|
||||
description: 'Neon API host'
|
||||
default: console-stage.neon.build
|
||||
desctiption: 'Neon API host'
|
||||
default: console.stage.neon.tech
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
|
||||
14
.github/workflows/actionlint.yml
vendored
14
.github/workflows/actionlint.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
|
||||
actionlint:
|
||||
needs: [ check-permissions ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: reviewdog/action-actionlint@v1
|
||||
@@ -36,15 +36,3 @@ jobs:
|
||||
fail_on_error: true
|
||||
filter_mode: nofilter
|
||||
level: error
|
||||
- run: |
|
||||
PAT='^\s*runs-on:.*-latest'
|
||||
if grep -ERq $PAT .github/workflows
|
||||
then
|
||||
grep -ERl $PAT .github/workflows |\
|
||||
while read -r f
|
||||
do
|
||||
l=$(grep -nE $PAT .github/workflows/release.yml | awk -F: '{print $1}' | head -1)
|
||||
echo "::error file=$f,line=$l::Please, do not use ubuntu-latest images to run on, use LTS instead."
|
||||
done
|
||||
exit 1
|
||||
fi
|
||||
|
||||
60
.github/workflows/approved-for-ci-run.yml
vendored
60
.github/workflows/approved-for-ci-run.yml
vendored
@@ -18,7 +18,6 @@ on:
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
|
||||
cancel-in-progress: false
|
||||
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -44,7 +43,7 @@ jobs:
|
||||
contains(fromJSON('["opened", "synchronize", "reopened", "closed"]'), github.event.action) &&
|
||||
contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
|
||||
@@ -60,7 +59,7 @@ jobs:
|
||||
github.event.action == 'labeled' &&
|
||||
contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
|
||||
@@ -69,41 +68,15 @@ jobs:
|
||||
with:
|
||||
ref: main
|
||||
token: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
|
||||
- name: Look for existing PR
|
||||
id: get-pr
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
|
||||
echo "ALREADY_CREATED=${ALREADY_CREATED}" >> ${GITHUB_OUTPUT}
|
||||
|
||||
- name: Get changed labels
|
||||
id: get-labels
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
env:
|
||||
ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
LABELS_TO_REMOVE=$(comm -23 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) | sort) \
|
||||
<(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
|
||||
( grep -v run-e2e-tests-in-draft || true ) | paste -sd , -)
|
||||
LABELS_TO_ADD=$(comm -13 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) |sort) \
|
||||
<(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
|
||||
paste -sd , -)
|
||||
echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
|
||||
echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
|
||||
|
||||
- run: gh pr checkout "${PR_NUMBER}"
|
||||
|
||||
- run: git checkout -b "${BRANCH}"
|
||||
|
||||
- run: git push --force origin "${BRANCH}"
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED == ''
|
||||
|
||||
- name: Create a Pull Request for CI run (if required)
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED == ''
|
||||
env:
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
cat << EOF > body.md
|
||||
@@ -114,33 +87,16 @@ jobs:
|
||||
Feel free to review/comment/discuss the original PR #${PR_NUMBER}.
|
||||
EOF
|
||||
|
||||
LABELS=$( (gh pr --repo "${GITHUB_REPOSITORY}" view ${PR_NUMBER} --json labels --jq '.labels.[].name'; echo run-e2e-tests-in-draft )| \
|
||||
grep -E '^run' | paste -sd , -)
|
||||
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
|
||||
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
|
||||
if [ -z "${ALREADY_CREATED}" ]; then
|
||||
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
|
||||
--body-file "body.md" \
|
||||
--head "${BRANCH}" \
|
||||
--base "main" \
|
||||
--label ${LABELS} \
|
||||
--label "run-e2e-tests-in-draft" \
|
||||
--draft
|
||||
- name: Modify the existing pull request (if required)
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
LABELS_TO_ADD: ${{ steps.get-labels.outputs.LABELS_TO_ADD }}
|
||||
LABELS_TO_REMOVE: ${{ steps.get-labels.outputs.LABELS_TO_REMOVE }}
|
||||
ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
|
||||
run: |
|
||||
ADD_CMD=
|
||||
REMOVE_CMD=
|
||||
[ -z "${LABELS_TO_ADD}" ] || ADD_CMD="--add-label ${LABELS_TO_ADD}"
|
||||
[ -z "${LABELS_TO_REMOVE}" ] || REMOVE_CMD="--remove-label ${LABELS_TO_REMOVE}"
|
||||
if [ -n "${ADD_CMD}" ] || [ -n "${REMOVE_CMD}" ]; then
|
||||
gh pr --repo "${GITHUB_REPOSITORY}" edit ${ALREADY_CREATED} ${ADD_CMD} ${REMOVE_CMD}
|
||||
fi
|
||||
|
||||
- run: git push --force origin "${BRANCH}"
|
||||
if: steps.get-pr.outputs.ALREADY_CREATED != ''
|
||||
|
||||
cleanup:
|
||||
# Close PRs and delete branchs if the original PR is closed.
|
||||
|
||||
@@ -152,7 +108,7 @@ jobs:
|
||||
github.event.action == 'closed' &&
|
||||
github.event.pull_request.head.repo.full_name != github.repository
|
||||
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Close PR and delete `ci-run/pr-${{ env.PR_NUMBER }}` branch
|
||||
|
||||
102
.github/workflows/benchmarking.yml
vendored
102
.github/workflows/benchmarking.yml
vendored
@@ -38,11 +38,6 @@ on:
|
||||
description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch'
|
||||
required: false
|
||||
default: false
|
||||
run_only_pgvector_tests:
|
||||
type: boolean
|
||||
description: 'Run pgvector tests but no other tests. If not set, all tests including pgvector tests will be run'
|
||||
required: false
|
||||
default: false
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -55,7 +50,6 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
bench:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
||||
@@ -126,7 +120,6 @@ jobs:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
generate-matrices:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
|
||||
#
|
||||
# Available platforms:
|
||||
@@ -137,7 +130,7 @@ jobs:
|
||||
# - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
|
||||
env:
|
||||
RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
|
||||
olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
|
||||
@@ -204,7 +197,6 @@ jobs:
|
||||
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
|
||||
|
||||
pgbench-compare:
|
||||
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
|
||||
needs: [ generate-matrices ]
|
||||
|
||||
strategy:
|
||||
@@ -351,92 +343,6 @@ jobs:
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
pgbench-pgvector:
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "1"
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 16
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
|
||||
PLATFORM: "neon-captest-pgvector"
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Add Postgres binaries to PATH
|
||||
run: |
|
||||
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
|
||||
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
QUERIES=("SELECT version()")
|
||||
QUERIES+=("SHOW neon.tenant_id")
|
||||
QUERIES+=("SHOW neon.timeline_id")
|
||||
|
||||
for q in "${QUERIES[@]}"; do
|
||||
psql ${CONNSTR} -c "${q}"
|
||||
done
|
||||
|
||||
- name: Benchmark pgvector hnsw indexing
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_perf_olap.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
- name: Benchmark pgvector hnsw queries
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_pgvector
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: "Periodic perf testing neon-captest-pgvector: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
|
||||
clickbench-compare:
|
||||
# ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters
|
||||
# we use for performance testing in pgbench-compare.
|
||||
@@ -445,7 +351,7 @@ jobs:
|
||||
#
|
||||
# *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
|
||||
# *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [ generate-matrices, pgbench-compare ]
|
||||
|
||||
strategy:
|
||||
@@ -549,7 +455,7 @@ jobs:
|
||||
# We might change it after https://github.com/neondatabase/neon/issues/2900.
|
||||
#
|
||||
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [ generate-matrices, clickbench-compare ]
|
||||
|
||||
strategy:
|
||||
@@ -651,7 +557,7 @@ jobs:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
user-examples-compare:
|
||||
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [ generate-matrices, tpch-compare ]
|
||||
|
||||
strategy:
|
||||
|
||||
@@ -21,7 +21,6 @@ defaults:
|
||||
|
||||
concurrency:
|
||||
group: build-build-tools-image-${{ inputs.image-tag }}
|
||||
cancel-in-progress: false
|
||||
|
||||
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
|
||||
permissions: {}
|
||||
@@ -39,7 +38,7 @@ jobs:
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "gen3", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "dev", "{0}"]', matrix.arch)) }}
|
||||
|
||||
env:
|
||||
IMAGE_TAG: ${{ inputs.image-tag }}
|
||||
@@ -55,7 +54,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
# Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
|
||||
# The default value is ~/.docker
|
||||
@@ -88,7 +87,7 @@ jobs:
|
||||
|
||||
merge-images:
|
||||
needs: [ build-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
IMAGE_TAG: ${{ inputs.image-tag }}
|
||||
|
||||
336
.github/workflows/build_and_test.yml
vendored
336
.github/workflows/build_and_test.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
cancel-previous-e2e-tests:
|
||||
needs: [ check-permissions ]
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Cancel previous e2e-tests runs for this PR
|
||||
@@ -236,6 +236,27 @@ jobs:
|
||||
submodules: true
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Check Postgres submodules revision
|
||||
shell: bash -euo pipefail {0}
|
||||
run: |
|
||||
# This is a temporary solution to ensure that the Postgres submodules revision is correct (i.e. the updated intentionally).
|
||||
# Eventually it will be replaced by a regression test https://github.com/neondatabase/neon/pull/4603
|
||||
|
||||
FAILED=false
|
||||
for postgres in postgres-v14 postgres-v15 postgres-v16; do
|
||||
expected=$(cat vendor/revisions.json | jq --raw-output '."'"${postgres}"'"')
|
||||
actual=$(git rev-parse "HEAD:vendor/${postgres}")
|
||||
if [ "${expected}" != "${actual}" ]; then
|
||||
echo >&2 "Expected ${postgres} rev to be at '${expected}', but it is at '${actual}'"
|
||||
FAILED=true
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "${FAILED}" = "true" ]; then
|
||||
echo >&2 "Please update vendor/revisions.json if these changes are intentional"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set pg 14 revision for caching
|
||||
id: pg_v14_rev
|
||||
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
|
||||
@@ -341,9 +362,6 @@ jobs:
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
#nextest does not yet support running doctests
|
||||
cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
||||
|
||||
for io_engine in std-fs tokio-epoll-uring ; do
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
|
||||
done
|
||||
@@ -459,8 +477,6 @@ jobs:
|
||||
BUILD_TAG: ${{ needs.tag.outputs.build-tag }}
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_IMPL: vectored
|
||||
PAGESERVER_GET_IMPL: vectored
|
||||
PAGESERVER_VALIDATE_VEC_GET: true
|
||||
|
||||
# Temporary disable this step until we figure out why it's so flaky
|
||||
# Ref https://github.com/neondatabase/neon/issues/4540
|
||||
@@ -540,33 +556,12 @@ jobs:
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
|
||||
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
|
||||
PAGESERVER_GET_VECTORED_IMPL: vectored
|
||||
PAGESERVER_GET_IMPL: vectored
|
||||
PAGESERVER_VALIDATE_VEC_GET: false
|
||||
# XXX: no coverage data handling here, since benchmarks are run on release builds,
|
||||
# while coverage is currently collected for the debug ones
|
||||
|
||||
report-benchmarks-failures:
|
||||
needs: [ benchmarks, create-test-report ]
|
||||
if: github.ref_name == 'main' && failure() && needs.benchmarks.result == 'failure'
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: C060CNA47S9 # on-call-staging-storage-stream
|
||||
slack-message: |
|
||||
Benchmarks failed on main: ${{ github.event.head_commit.url }}
|
||||
|
||||
Allure report: ${{ needs.create-test-report.outputs.report-url }}
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
create-test-report:
|
||||
needs: [ check-permissions, regress-tests, coverage-report, benchmarks, build-build-tools-image ]
|
||||
if: ${{ !cancelled() && contains(fromJSON('["skipped", "success"]'), needs.check-permissions.result) }}
|
||||
outputs:
|
||||
report-url: ${{ steps.create-allure-report.outputs.report-url }}
|
||||
|
||||
runs-on: [ self-hosted, gen3, small ]
|
||||
container:
|
||||
@@ -723,13 +718,9 @@ jobs:
|
||||
uses: ./.github/workflows/trigger-e2e-tests.yml
|
||||
secrets: inherit
|
||||
|
||||
neon-image-arch:
|
||||
neon-image:
|
||||
needs: [ check-permissions, build-build-tools-image, tag ]
|
||||
strategy:
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "gen3", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
runs-on: [ self-hosted, gen3, large ]
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -744,13 +735,19 @@ jobs:
|
||||
run: |
|
||||
mkdir -p .docker-custom
|
||||
echo DOCKER_CONFIG=$(pwd)/.docker-custom >> $GITHUB_ENV
|
||||
- uses: docker/setup-buildx-action@v2
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
|
||||
- uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
@@ -762,52 +759,25 @@ jobs:
|
||||
push: true
|
||||
pull: true
|
||||
file: Dockerfile
|
||||
cache-from: type=registry,ref=neondatabase/neon:cache-${{ matrix.arch }}
|
||||
cache-to: type=registry,ref=neondatabase/neon:cache-${{ matrix.arch }},mode=max
|
||||
cache-from: type=registry,ref=neondatabase/neon:cache
|
||||
cache-to: type=registry,ref=neondatabase/neon:cache,mode=max
|
||||
tags: |
|
||||
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
||||
369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
|
||||
neondatabase/neon:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Remove custom docker config directory
|
||||
if: always()
|
||||
run: |
|
||||
rm -rf .docker-custom
|
||||
|
||||
neon-image:
|
||||
needs: [ neon-image-arch, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Create multi-arch image
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/neon:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-x64 \
|
||||
neondatabase/neon:${{ needs.tag.outputs.build-tag }}-arm64
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
|
||||
- name: Push multi-arch image to ECR
|
||||
run: |
|
||||
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/neon:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
compute-node-image-arch:
|
||||
compute-node-image:
|
||||
needs: [ check-permissions, build-build-tools-image, tag ]
|
||||
runs-on: [ self-hosted, gen3, large ]
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: [ v14, v15, v16 ]
|
||||
arch: [ x64, arm64 ]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "gen3", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -822,7 +792,7 @@ jobs:
|
||||
run: |
|
||||
mkdir -p .docker-custom
|
||||
echo DOCKER_CONFIG=$(pwd)/.docker-custom >> $GITHUB_ENV
|
||||
- uses: docker/setup-buildx-action@v2
|
||||
- uses: docker/setup-buildx-action@v3
|
||||
with:
|
||||
# Disable parallelism for docker buildkit.
|
||||
# As we already build everything with `make -j$(nproc)`, running it in additional level of parallelisam blows up the Runner.
|
||||
@@ -854,34 +824,15 @@ jobs:
|
||||
push: true
|
||||
pull: true
|
||||
file: Dockerfile.compute-node
|
||||
cache-from: type=registry,ref=neondatabase/compute-node-${{ matrix.version }}:cache-${{ matrix.arch }}
|
||||
cache-to: type=registry,ref=neondatabase/compute-node-${{ matrix.version }}:cache-${{ matrix.arch }},mode=max
|
||||
cache-from: type=registry,ref=neondatabase/compute-node-${{ matrix.version }}:cache
|
||||
cache-to: type=registry,ref=neondatabase/compute-node-${{ matrix.version }}:cache,mode=max
|
||||
tags: |
|
||||
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
||||
|
||||
- name: Build neon extensions test image
|
||||
if: matrix.version == 'v16'
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
build-args: |
|
||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
PG_VERSION=${{ matrix.version }}
|
||||
BUILD_TAG=${{ needs.tag.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
|
||||
provenance: false
|
||||
push: true
|
||||
pull: true
|
||||
file: Dockerfile.compute-node
|
||||
target: neon-pg-ext-test
|
||||
cache-from: type=registry,ref=neondatabase/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }}
|
||||
cache-to: type=registry,ref=neondatabase/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }},mode=max
|
||||
tags: |
|
||||
neondatabase/neon-test-extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }}
|
||||
369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||
neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Build compute-tools image
|
||||
# compute-tools are Postgres independent, so build it only once
|
||||
if: matrix.version == 'v16'
|
||||
if: ${{ matrix.version == 'v16' }}
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
target: compute-tools-image
|
||||
@@ -895,64 +846,14 @@ jobs:
|
||||
pull: true
|
||||
file: Dockerfile.compute-node
|
||||
tags: |
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
|
||||
369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }}
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
- name: Remove custom docker config directory
|
||||
if: always()
|
||||
run: |
|
||||
rm -rf .docker-custom
|
||||
|
||||
compute-node-image:
|
||||
needs: [ compute-node-image-arch, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
version: [ v14, v15, v16 ]
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- name: Create multi-arch compute-node image
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-x64 \
|
||||
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64
|
||||
|
||||
- name: Create multi-arch neon-test-extensions image
|
||||
if: matrix.version == 'v16'
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-x64 \
|
||||
neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64
|
||||
|
||||
- name: Create multi-arch compute-tools image
|
||||
if: matrix.version == 'v16'
|
||||
run: |
|
||||
docker buildx imagetools create -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-x64 \
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-arm64
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
|
||||
- name: Push multi-arch compute-node-${{ matrix.version }} image to ECR
|
||||
run: |
|
||||
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
- name: Push multi-arch compute-tools image to ECR
|
||||
if: matrix.version == 'v16'
|
||||
run: |
|
||||
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
vm-compute-node-image:
|
||||
needs: [ check-permissions, tag, compute-node-image ]
|
||||
runs-on: [ self-hosted, gen3, large ]
|
||||
@@ -960,12 +861,15 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
version: [ v14, v15, v16 ]
|
||||
defaults:
|
||||
run:
|
||||
shell: sh -eu {0}
|
||||
env:
|
||||
VM_BUILDER_VERSION: v0.29.3
|
||||
VM_BUILDER_VERSION: v0.23.2
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v1
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -974,48 +878,26 @@ jobs:
|
||||
curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
|
||||
chmod +x vm-builder
|
||||
|
||||
# Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
|
||||
# The default value is ~/.docker
|
||||
- name: Set custom docker config directory
|
||||
run: |
|
||||
mkdir -p .docker-custom
|
||||
echo DOCKER_CONFIG=$(pwd)/.docker-custom >> $GITHUB_ENV
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
# Note: we need a separate pull step here because otherwise vm-builder will try to pull, and
|
||||
# it won't have the proper authentication (written at v0.6.0)
|
||||
- name: Pulling compute-node image
|
||||
run: |
|
||||
docker pull neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
|
||||
docker pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Build vm image
|
||||
run: |
|
||||
./vm-builder \
|
||||
-spec=vm-image-spec.yaml \
|
||||
-src=neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
|
||||
-dst=neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
|
||||
-src=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} \
|
||||
-dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Pushing vm-compute-node image
|
||||
run: |
|
||||
docker push neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
|
||||
|
||||
- name: Remove custom docker config directory
|
||||
if: always()
|
||||
run: |
|
||||
rm -rf .docker-custom
|
||||
docker push 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
test-images:
|
||||
needs: [ check-permissions, tag, neon-image, compute-node-image ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
|
||||
runs-on: ${{ fromJson(format('["self-hosted", "gen3", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
|
||||
runs-on: [ self-hosted, gen3, small ]
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -1033,7 +915,7 @@ jobs:
|
||||
- name: Verify image versions
|
||||
shell: bash # ensure no set -e for better error messages
|
||||
run: |
|
||||
pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.tag.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
|
||||
pageserver_version=$(docker run --rm 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
|
||||
|
||||
echo "Pageserver version string: $pageserver_version"
|
||||
|
||||
@@ -1047,7 +929,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Verify docker-compose example and test extensions
|
||||
- name: Verify docker-compose example
|
||||
timeout-minutes: 20
|
||||
run: env TAG=${{needs.tag.outputs.build-tag}} ./docker-compose/docker_compose_test.sh
|
||||
|
||||
@@ -1059,54 +941,82 @@ jobs:
|
||||
|
||||
promote-images:
|
||||
needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
env:
|
||||
VERSIONS: v14 v15 v16
|
||||
runs-on: [ self-hosted, gen3, small ]
|
||||
container: golang:1.19-bullseye
|
||||
# Don't add if-condition here.
|
||||
# The job should always be run because we have dependant other jobs that shouldn't be skipped
|
||||
|
||||
steps:
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
|
||||
- uses: docker/login-action@v3
|
||||
with:
|
||||
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
|
||||
password: ${{ secrets.AWS_SECRET_KEY_DEV }}
|
||||
|
||||
- name: Copy vm-compute-node images to ECR
|
||||
- name: Install Crane & ECR helper
|
||||
run: |
|
||||
for version in ${VERSIONS}; do
|
||||
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }} \
|
||||
neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
|
||||
done
|
||||
go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
|
||||
go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
|
||||
|
||||
- name: Configure ECR login
|
||||
run: |
|
||||
mkdir /github/home/.docker/
|
||||
echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
|
||||
|
||||
- name: Copy vm-compute-node images to Docker Hub
|
||||
run: |
|
||||
crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14
|
||||
crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15
|
||||
crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} vm-compute-node-v16
|
||||
|
||||
- name: Add latest tag to images
|
||||
if: github.ref_name == 'main'
|
||||
if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy'
|
||||
run: |
|
||||
for repo in neondatabase 369495373322.dkr.ecr.eu-central-1.amazonaws.com; do
|
||||
docker buildx imagetools create -t $repo/neon:latest \
|
||||
$repo/neon:${{ needs.tag.outputs.build-tag }}
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v16:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} latest
|
||||
|
||||
docker buildx imagetools create -t $repo/compute-tools:latest \
|
||||
$repo/compute-tools:${{ needs.tag.outputs.build-tag }}
|
||||
- name: Push images to production ECR
|
||||
if: github.ref_name == 'main' || github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
||||
run: |
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/neon:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v16:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v16:latest
|
||||
crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v16:latest
|
||||
|
||||
for version in ${VERSIONS}; do
|
||||
docker buildx imagetools create -t $repo/compute-node-${version}:latest \
|
||||
$repo/compute-node-${version}:${{ needs.tag.outputs.build-tag }}
|
||||
- name: Configure Docker Hub login
|
||||
run: |
|
||||
# ECR Credential Helper & Docker Hub don't work together in config, hence reset
|
||||
echo "" > /github/home/.docker/config.json
|
||||
crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io
|
||||
|
||||
docker buildx imagetools create -t $repo/vm-compute-node-${version}:latest \
|
||||
$repo/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
|
||||
done
|
||||
done
|
||||
docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
|
||||
neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
|
||||
- name: Push vm-compute-node to Docker Hub
|
||||
run: |
|
||||
crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}}
|
||||
crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}}
|
||||
crane push vm-compute-node-v16 neondatabase/vm-compute-node-v16:${{needs.tag.outputs.build-tag}}
|
||||
|
||||
- name: Push latest tags to Docker Hub
|
||||
if: github.ref_name == 'main' || github.ref_name == 'release'|| github.ref_name == 'release-proxy'
|
||||
run: |
|
||||
crane tag neondatabase/neon:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/compute-node-v16:${{needs.tag.outputs.build-tag}} latest
|
||||
crane tag neondatabase/vm-compute-node-v16:${{needs.tag.outputs.build-tag}} latest
|
||||
|
||||
- name: Cleanup ECR folder
|
||||
run: rm -rf ~/.ecr
|
||||
|
||||
trigger-custom-extensions-build-and-wait:
|
||||
needs: [ check-permissions, tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Set PR's status to pending and request a remote CI test
|
||||
run: |
|
||||
@@ -1223,6 +1133,8 @@ jobs:
|
||||
-f deployPreprodRegion=true
|
||||
|
||||
gh workflow --repo neondatabase/aws run deploy-prod.yml --ref main \
|
||||
-f deployPgSniRouter=false \
|
||||
-f deployProxy=false \
|
||||
-f deployStorage=true \
|
||||
-f deployStorageBroker=true \
|
||||
-f deployStorageController=true \
|
||||
|
||||
@@ -19,7 +19,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
check-image:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
|
||||
found: ${{ steps.check-image.outputs.found }}
|
||||
@@ -28,9 +28,7 @@ jobs:
|
||||
- name: Get build-tools image tag for the current commit
|
||||
id: get-build-tools-tag
|
||||
env:
|
||||
# Usually, for COMMIT_SHA, we use `github.event.pull_request.head.sha || github.sha`, but here, even for PRs,
|
||||
# we want to use `github.sha` i.e. point to a phantom merge commit to determine the image tag correctly.
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
LAST_BUILD_TOOLS_SHA=$(
|
||||
|
||||
2
.github/workflows/check-permissions.yml
vendored
2
.github/workflows/check-permissions.yml
vendored
@@ -16,7 +16,7 @@ permissions: {}
|
||||
|
||||
jobs:
|
||||
check-permissions:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Disallow CI runs on PRs from forks
|
||||
if: |
|
||||
|
||||
@@ -9,7 +9,7 @@ on:
|
||||
|
||||
jobs:
|
||||
cleanup:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Cleanup
|
||||
run: |
|
||||
|
||||
33
.github/workflows/neon_extra_builds.yml
vendored
33
.github/workflows/neon_extra_builds.yml
vendored
@@ -136,7 +136,7 @@ jobs:
|
||||
check-linux-arm-build:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
timeout-minutes: 90
|
||||
runs-on: [ self-hosted, small-arm64 ]
|
||||
runs-on: [ self-hosted, dev, arm64 ]
|
||||
|
||||
env:
|
||||
# Use release build only, to have less debug info around
|
||||
@@ -232,20 +232,20 @@ jobs:
|
||||
|
||||
- name: Run cargo build
|
||||
run: |
|
||||
mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests -j$(nproc)
|
||||
mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests
|
||||
|
||||
- name: Run cargo test
|
||||
env:
|
||||
NEXTEST_RETRIES: 3
|
||||
run: |
|
||||
cargo nextest run $CARGO_FEATURES -j$(nproc)
|
||||
cargo nextest run $CARGO_FEATURES
|
||||
|
||||
# Run separate tests for real S3
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
cargo nextest run --package remote_storage --test test_real_s3 -j$(nproc)
|
||||
cargo nextest run --package remote_storage --test test_real_s3
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
@@ -255,12 +255,12 @@ jobs:
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
|
||||
cargo nextest run --package remote_storage --test test_real_azure -j$(nproc)
|
||||
cargo nextest run --package remote_storage --test test_real_azure
|
||||
|
||||
check-codestyle-rust-arm:
|
||||
needs: [ check-permissions, build-build-tools-image ]
|
||||
timeout-minutes: 90
|
||||
runs-on: [ self-hosted, small-arm64 ]
|
||||
runs-on: [ self-hosted, dev, arm64 ]
|
||||
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
@@ -269,11 +269,6 @@ jobs:
|
||||
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
|
||||
options: --init
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build_type: [ debug, release ]
|
||||
|
||||
steps:
|
||||
- name: Fix git ownership
|
||||
run: |
|
||||
@@ -310,35 +305,31 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
|
||||
|
||||
- name: Run cargo clippy (debug)
|
||||
if: matrix.build_type == 'debug'
|
||||
run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
|
||||
- name: Run cargo clippy (release)
|
||||
if: matrix.build_type == 'release'
|
||||
run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
|
||||
|
||||
- name: Check documentation generation
|
||||
if: matrix.build_type == 'release'
|
||||
run: cargo doc --workspace --no-deps --document-private-items -j$(nproc)
|
||||
run: cargo doc --workspace --no-deps --document-private-items
|
||||
env:
|
||||
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
|
||||
|
||||
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
|
||||
- name: Check formatting
|
||||
if: ${{ !cancelled() && matrix.build_type == 'release' }}
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo fmt --all -- --check
|
||||
|
||||
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
|
||||
- name: Check rust dependencies
|
||||
if: ${{ !cancelled() && matrix.build_type == 'release' }}
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
|
||||
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
|
||||
|
||||
# https://github.com/EmbarkStudios/cargo-deny
|
||||
- name: Check rust licenses/bans/advisories/sources
|
||||
if: ${{ !cancelled() && matrix.build_type == 'release' }}
|
||||
if: ${{ !cancelled() }}
|
||||
run: cargo deny check
|
||||
|
||||
gather-rust-build-stats:
|
||||
@@ -347,7 +338,7 @@ jobs:
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
runs-on: [ self-hosted, large ]
|
||||
runs-on: [ self-hosted, gen3, large ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}
|
||||
credentials:
|
||||
@@ -378,7 +369,7 @@ jobs:
|
||||
run: make walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Produce the build stats
|
||||
run: cargo build --all --release --timings -j$(nproc)
|
||||
run: cargo build --all --release --timings
|
||||
|
||||
- name: Upload the build stats
|
||||
id: upload-stats
|
||||
|
||||
2
.github/workflows/pg_clients.yml
vendored
2
.github/workflows/pg_clients.yml
vendored
@@ -20,7 +20,7 @@ concurrency:
|
||||
jobs:
|
||||
test-postgres-client-libs:
|
||||
# TODO: switch to gen2 runner, requires docker
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [ ubuntu-latest ]
|
||||
|
||||
env:
|
||||
DEFAULT_PG_VERSION: 14
|
||||
|
||||
3
.github/workflows/pin-build-tools-image.yml
vendored
3
.github/workflows/pin-build-tools-image.yml
vendored
@@ -20,13 +20,12 @@ defaults:
|
||||
|
||||
concurrency:
|
||||
group: pin-build-tools-image-${{ inputs.from-tag }}
|
||||
cancel-in-progress: false
|
||||
|
||||
permissions: {}
|
||||
|
||||
jobs:
|
||||
tag-image:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
FROM_TAG: ${{ inputs.from-tag }}
|
||||
|
||||
2
.github/workflows/release-notify.yml
vendored
2
.github/workflows/release-notify.yml
vendored
@@ -19,7 +19,7 @@ on:
|
||||
|
||||
jobs:
|
||||
notify:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [ ubuntu-latest ]
|
||||
|
||||
steps:
|
||||
- uses: neondatabase/dev-actions/release-pr-notify@main
|
||||
|
||||
6
.github/workflows/release.yml
vendored
6
.github/workflows/release.yml
vendored
@@ -26,7 +26,7 @@ defaults:
|
||||
jobs:
|
||||
create-storage-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }}
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: write # for `git push`
|
||||
@@ -53,7 +53,7 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
|
||||
run: |
|
||||
cat << EOF > body.md
|
||||
## Storage & Compute release ${RELEASE_DATE}
|
||||
## Release ${RELEASE_DATE}
|
||||
|
||||
**Please merge this Pull Request using 'Create a merge commit' button**
|
||||
EOF
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
|
||||
create-proxy-release-branch:
|
||||
if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }}
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
permissions:
|
||||
contents: write # for `git push`
|
||||
|
||||
6
.github/workflows/trigger-e2e-tests.yml
vendored
6
.github/workflows/trigger-e2e-tests.yml
vendored
@@ -19,7 +19,7 @@ env:
|
||||
jobs:
|
||||
cancel-previous-e2e-tests:
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Cancel previous e2e-tests runs for this PR
|
||||
@@ -31,7 +31,7 @@ jobs:
|
||||
--field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
|
||||
|
||||
tag:
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: [ ubuntu-latest ]
|
||||
outputs:
|
||||
build-tag: ${{ steps.build-tag.outputs.tag }}
|
||||
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
|
||||
trigger-e2e-tests:
|
||||
needs: [ tag ]
|
||||
runs-on: ubuntu-22.04
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
TAG: ${{ needs.tag.outputs.build-tag }}
|
||||
steps:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/compute_tools/ @neondatabase/control-plane @neondatabase/compute
|
||||
/storage_controller @neondatabase/storage
|
||||
/control_plane/attachment_service @neondatabase/storage
|
||||
/libs/pageserver_api/ @neondatabase/storage
|
||||
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/safekeepers
|
||||
/libs/remote_storage/ @neondatabase/storage
|
||||
|
||||
1285
Cargo.lock
generated
1285
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
80
Cargo.toml
80
Cargo.toml
@@ -3,7 +3,7 @@ resolver = "2"
|
||||
members = [
|
||||
"compute_tools",
|
||||
"control_plane",
|
||||
"control_plane/storcon_cli",
|
||||
"control_plane/attachment_service",
|
||||
"pageserver",
|
||||
"pageserver/compaction",
|
||||
"pageserver/ctl",
|
||||
@@ -12,8 +12,7 @@ members = [
|
||||
"proxy",
|
||||
"safekeeper",
|
||||
"storage_broker",
|
||||
"storage_controller",
|
||||
"storage_scrubber",
|
||||
"s3_scrubber",
|
||||
"workspace_hack",
|
||||
"trace",
|
||||
"libs/compute_api",
|
||||
@@ -41,26 +40,24 @@ license = "Apache-2.0"
|
||||
|
||||
## All dependency versions, used in the project
|
||||
[workspace.dependencies]
|
||||
ahash = "0.8"
|
||||
anyhow = { version = "1.0", features = ["backtrace"] }
|
||||
arc-swap = "1.6"
|
||||
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
|
||||
atomic-take = "1.1.0"
|
||||
azure_core = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls", "hmac_rust"] }
|
||||
azure_identity = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_storage = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_storage_blobs = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
|
||||
azure_core = "0.18"
|
||||
azure_identity = "0.18"
|
||||
azure_storage = "0.18"
|
||||
azure_storage_blobs = "0.18"
|
||||
flate2 = "1.0.26"
|
||||
async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
aws-config = { version = "1.3", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "1.26"
|
||||
aws-config = { version = "1.1.4", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "1.14"
|
||||
aws-sdk-iam = "1.15.0"
|
||||
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
|
||||
aws-smithy-types = "1.1.9"
|
||||
aws-credential-types = "1.2.0"
|
||||
aws-sigv4 = { version = "1.2.1", features = ["sign-http"] }
|
||||
aws-types = "1.2.0"
|
||||
aws-smithy-async = { version = "1.1.4", default-features = false, features=["rt-tokio"] }
|
||||
aws-smithy-types = "1.1.4"
|
||||
aws-credential-types = "1.1.4"
|
||||
aws-sigv4 = { version = "1.2.0", features = ["sign-http"] }
|
||||
aws-types = "1.1.7"
|
||||
axum = { version = "0.6.20", features = ["ws"] }
|
||||
base64 = "0.13.0"
|
||||
bincode = "1.3"
|
||||
@@ -75,7 +72,6 @@ clap = { version = "4.0", features = ["derive"] }
|
||||
comfy-table = "6.1"
|
||||
const_format = "0.2"
|
||||
crc32c = "0.6"
|
||||
crossbeam-deque = "0.8.5"
|
||||
crossbeam-utils = "0.8.5"
|
||||
dashmap = { version = "5.5.0", features = ["raw-api"] }
|
||||
either = "1.8"
|
||||
@@ -83,14 +79,13 @@ enum-map = "2.4.2"
|
||||
enumset = "1.0.12"
|
||||
fail = "0.5.0"
|
||||
fallible-iterator = "0.2"
|
||||
framed-websockets = { version = "0.1.0", git = "https://github.com/neondatabase/framed-websockets" }
|
||||
fs2 = "0.4.3"
|
||||
futures = "0.3"
|
||||
futures-core = "0.3"
|
||||
futures-util = "0.3"
|
||||
git-version = "0.3"
|
||||
hashbrown = "0.14"
|
||||
hashlink = "0.9.1"
|
||||
hashbrown = "0.13"
|
||||
hashlink = "0.8.4"
|
||||
hdrhistogram = "7.5.2"
|
||||
hex = "0.4"
|
||||
hex-literal = "0.4"
|
||||
@@ -101,8 +96,7 @@ http-types = { version = "2", default-features = false }
|
||||
humantime = "2.1"
|
||||
humantime-serde = "1.1.1"
|
||||
hyper = "0.14"
|
||||
tokio-tungstenite = "0.20.0"
|
||||
indexmap = "2"
|
||||
hyper-tungstenite = "0.11"
|
||||
inotify = "0.10.2"
|
||||
ipnet = "2.9.0"
|
||||
itertools = "0.10"
|
||||
@@ -111,32 +105,32 @@ lasso = "0.7"
|
||||
leaky-bucket = "1.0.1"
|
||||
libc = "0.2"
|
||||
md5 = "0.7.0"
|
||||
measured = { version = "0.0.21", features=["lasso"] }
|
||||
measured-process = { version = "0.0.21" }
|
||||
measured = { version = "0.0.13", features=["default", "lasso"] }
|
||||
memoffset = "0.8"
|
||||
native-tls = "0.2"
|
||||
nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
|
||||
notify = "6.0.0"
|
||||
num_cpus = "1.15"
|
||||
num-traits = "0.2.15"
|
||||
once_cell = "1.13"
|
||||
opentelemetry = "0.20.0"
|
||||
opentelemetry-otlp = { version = "0.13.0", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-otlp = { version = "0.13.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
|
||||
opentelemetry-semantic-conventions = "0.12.0"
|
||||
parking_lot = "0.12"
|
||||
parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "51.0.0"
|
||||
parquet = { version = "49.0.0", default-features = false, features = ["zstd"] }
|
||||
parquet_derive = "49.0.0"
|
||||
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
|
||||
pin-project-lite = "0.2"
|
||||
procfs = "0.14"
|
||||
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
||||
prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
|
||||
prost = "0.11"
|
||||
rand = "0.8"
|
||||
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
|
||||
regex = "1.10.2"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
|
||||
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_20"] }
|
||||
reqwest-middleware = "0.3.0"
|
||||
reqwest-retry = "0.5"
|
||||
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
|
||||
reqwest-tracing = { version = "0.4.7", features = ["opentelemetry_0_20"] }
|
||||
reqwest-middleware = "0.2.0"
|
||||
reqwest-retry = "0.2.2"
|
||||
routerify = "3"
|
||||
rpds = "0.13"
|
||||
rustc-hash = "1.1.0"
|
||||
@@ -146,7 +140,7 @@ rustls-split = "0.3"
|
||||
scopeguard = "1.1"
|
||||
sysinfo = "0.29.2"
|
||||
sd-notify = "0.4.1"
|
||||
sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
serde_path_to_error = "0.1"
|
||||
@@ -160,12 +154,11 @@ socket2 = "0.5"
|
||||
strum = "0.24"
|
||||
strum_macros = "0.24"
|
||||
"subtle" = "2.5.0"
|
||||
# Our PR https://github.com/nical/rust_debug/pull/4 has been merged but no new version released yet
|
||||
svg_fmt = { git = "https://github.com/nical/rust_debug", rev = "28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4" }
|
||||
svg_fmt = "0.4.1"
|
||||
sync_wrapper = "0.1.2"
|
||||
tar = "0.4"
|
||||
task-local-extensions = "0.1.4"
|
||||
test-context = "0.3"
|
||||
test-context = "0.1"
|
||||
thiserror = "1.0"
|
||||
tikv-jemallocator = "0.5"
|
||||
tikv-jemalloc-ctl = "0.5"
|
||||
@@ -180,17 +173,16 @@ tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
toml = "0.7"
|
||||
toml_edit = "0.19"
|
||||
tonic = {version = "0.9", features = ["tls", "tls-roots"]}
|
||||
tower-service = "0.3.2"
|
||||
tracing = "0.1"
|
||||
tracing-error = "0.2.0"
|
||||
tracing-opentelemetry = "0.21.0"
|
||||
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json", "ansi"] }
|
||||
tracing-opentelemetry = "0.20.0"
|
||||
tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
|
||||
twox-hash = { version = "1.6.3", default-features = false }
|
||||
url = "2.2"
|
||||
urlencoding = "2.1"
|
||||
uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
|
||||
walkdir = "2.3.2"
|
||||
rustls-native-certs = "0.7"
|
||||
webpki-roots = "0.25"
|
||||
x509-parser = "0.15"
|
||||
|
||||
## TODO replace this with tracing
|
||||
@@ -199,6 +191,7 @@ log = "0.4"
|
||||
|
||||
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
|
||||
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
@@ -239,12 +232,13 @@ tonic-build = "0.9"
|
||||
|
||||
[patch.crates-io]
|
||||
|
||||
# Needed to get `tokio-postgres-rustls` to depend on our fork.
|
||||
# This is only needed for proxy's tests.
|
||||
# TODO: we should probably fork `tokio-postgres-rustls` instead.
|
||||
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
|
||||
|
||||
# bug fixes for UUID
|
||||
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
|
||||
parquet_derive = { git = "https://github.com/apache/arrow-rs", branch = "master" }
|
||||
parquet = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
|
||||
parquet_derive = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
|
||||
|
||||
################# Binary contents sections
|
||||
|
||||
|
||||
@@ -58,14 +58,8 @@ RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v$
|
||||
&& mv protoc/include/google /usr/local/include/google \
|
||||
&& rm -rf protoc.zip protoc
|
||||
|
||||
# s5cmd
|
||||
ENV S5CMD_VERSION=2.2.2
|
||||
RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/s5cmd_${S5CMD_VERSION}_Linux-$(uname -m | sed 's/x86_64/64bit/g' | sed 's/aarch64/arm64/g').tar.gz" | tar zxvf - s5cmd \
|
||||
&& chmod +x s5cmd \
|
||||
&& mv s5cmd /usr/local/bin/s5cmd
|
||||
|
||||
# LLVM
|
||||
ENV LLVM_VERSION=18
|
||||
ENV LLVM_VERSION=17
|
||||
RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
|
||||
&& echo "deb http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
|
||||
&& apt update \
|
||||
@@ -87,7 +81,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
|
||||
&& rm awscliv2.zip
|
||||
|
||||
# Mold: A Modern Linker
|
||||
ENV MOLD_VERSION v2.31.0
|
||||
ENV MOLD_VERSION v2.4.0
|
||||
RUN set -e \
|
||||
&& git clone https://github.com/rui314/mold.git \
|
||||
&& mkdir mold/build \
|
||||
@@ -141,7 +135,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.79.0
|
||||
ENV RUSTC_VERSION=1.77.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
|
||||
|
||||
@@ -89,7 +89,7 @@ RUN apt update && \
|
||||
# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
|
||||
RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
|
||||
echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
|
||||
mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
|
||||
mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make clean && cp -R /sfcgal/* /
|
||||
@@ -98,7 +98,7 @@ ENV PATH "/usr/local/pgsql/bin:$PATH"
|
||||
|
||||
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
|
||||
echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
|
||||
mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
|
||||
mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
./autogen.sh && \
|
||||
./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
|
||||
@@ -124,7 +124,7 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postg
|
||||
|
||||
RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
|
||||
echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
|
||||
mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
|
||||
mkdir build && cd build && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release .. && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -149,7 +149,7 @@ RUN apt update && \
|
||||
|
||||
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
|
||||
echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
|
||||
mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
|
||||
mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
|
||||
# generate and copy upgrade scripts
|
||||
mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \
|
||||
cp upgrade/* /usr/local/pgsql/share/extension/ && \
|
||||
@@ -194,7 +194,7 @@ RUN case "$(uname -m)" in \
|
||||
|
||||
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
|
||||
echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
|
||||
mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
|
||||
mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
|
||||
mkdir build && cd build && \
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -204,7 +204,7 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz
|
||||
|
||||
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
|
||||
echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
|
||||
mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
|
||||
mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
|
||||
export PATH="/usr/local/pgsql/bin:$PATH" && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
@@ -222,7 +222,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
|
||||
echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
|
||||
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
|
||||
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
# unit extension's "create extension" script relies on absolute install path to fill some reference tables.
|
||||
@@ -241,17 +241,11 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
|
||||
FROM build-deps AS vector-pg-build
|
||||
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
COPY patches/pgvector.patch /pgvector.patch
|
||||
|
||||
# By default, pgvector Makefile uses `-march=native`. We don't want that,
|
||||
# because we build the images on different machines than where we run them.
|
||||
# Pass OPTFLAGS="" to remove it.
|
||||
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
|
||||
echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \
|
||||
mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < /pgvector.patch && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.5.1.tar.gz -O pgvector.tar.gz && \
|
||||
echo "cc7a8e034a96e30a819911ac79d32f6bc47bdd1aa2de4d7d4904e26b83209dc8 pgvector.tar.gz" | sha256sum --check && \
|
||||
mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
|
||||
|
||||
#########################################################################################
|
||||
@@ -266,7 +260,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
|
||||
RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
|
||||
echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
|
||||
mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
|
||||
|
||||
@@ -281,7 +275,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
|
||||
echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
|
||||
mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
|
||||
mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
|
||||
@@ -297,7 +291,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
|
||||
echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
|
||||
@@ -313,7 +307,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
|
||||
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
|
||||
mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
|
||||
mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
|
||||
@@ -329,7 +323,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
|
||||
echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
|
||||
mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
|
||||
@@ -345,7 +339,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
|
||||
echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
|
||||
mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
|
||||
mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
|
||||
@@ -361,7 +355,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
|
||||
echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
|
||||
mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
|
||||
mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
|
||||
@@ -377,7 +371,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
|
||||
echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
|
||||
mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
|
||||
mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
|
||||
@@ -393,7 +387,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
|
||||
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
|
||||
echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
|
||||
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
|
||||
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
|
||||
@@ -424,7 +418,7 @@ RUN case "${PG_VERSION}" in \
|
||||
apt-get install -y cmake && \
|
||||
wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
|
||||
echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
|
||||
mkdir timescaledb-src && cd timescaledb-src && tar xzf ../timescaledb.tar.gz --strip-components=1 -C . && \
|
||||
mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
|
||||
./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
|
||||
cd build && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
@@ -462,7 +456,7 @@ RUN case "${PG_VERSION}" in \
|
||||
esac && \
|
||||
wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \
|
||||
echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make install -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control
|
||||
@@ -481,7 +475,7 @@ RUN apt-get update && \
|
||||
apt-get install -y git libgtk2.0-dev libpq-dev libpam-dev libxslt-dev libkrb5-dev cmake && \
|
||||
wget https://github.com/ketteq-neon/postgres-exts/archive/e0bd1a9d9313d7120c1b9c7bb15c48c0dede4c4e.tar.gz -O kq_imcx.tar.gz && \
|
||||
echo "dc93a97ff32d152d32737ba7e196d9687041cda15e58ab31344c2f2de8855336 kq_imcx.tar.gz" | sha256sum --check && \
|
||||
mkdir kq_imcx-src && cd kq_imcx-src && tar xzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
|
||||
mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
mkdir build && cd build && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release .. && \
|
||||
@@ -505,7 +499,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
|
||||
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_cron-src && cd pg_cron-src && tar xvzf ../pg_cron.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
|
||||
@@ -531,7 +525,7 @@ RUN apt-get update && \
|
||||
ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
|
||||
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
|
||||
echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
|
||||
mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
|
||||
mkdir rdkit-src && cd rdkit-src && tar xvzf ../rdkit.tar.gz --strip-components=1 -C . && \
|
||||
cmake \
|
||||
-D RDK_BUILD_CAIRO_SUPPORT=OFF \
|
||||
-D RDK_BUILD_INCHI_SUPPORT=ON \
|
||||
@@ -571,7 +565,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
|
||||
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xvzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
|
||||
@@ -588,7 +582,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
|
||||
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xvzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
|
||||
@@ -605,7 +599,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
|
||||
echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_semver-src && cd pg_semver-src && tar xvzf ../pg_semver.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control
|
||||
@@ -631,7 +625,7 @@ RUN case "${PG_VERSION}" in \
|
||||
esac && \
|
||||
wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/${PG_EMBEDDING_VERSION}.tar.gz -O pg_embedding.tar.gz && \
|
||||
echo "${PG_EMBEDDING_CHECKSUM} pg_embedding.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_embedding-src && cd pg_embedding-src && tar xzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_embedding-src && cd pg_embedding-src && tar xvzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install
|
||||
|
||||
@@ -647,7 +641,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
|
||||
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_anon-src && cd pg_anon-src && tar xvzf ../pg_anon.tar.gz --strip-components=1 -C . && \
|
||||
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \
|
||||
@@ -696,7 +690,7 @@ ARG PG_VERSION
|
||||
|
||||
RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.2.0.tar.gz -O pg_jsonschema.tar.gz && \
|
||||
echo "9118fc508a6e231e7a39acaa6f066fcd79af17a5db757b47d2eefbe14f7794f0 pg_jsonschema.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
|
||||
@@ -713,7 +707,7 @@ ARG PG_VERSION
|
||||
|
||||
RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.4.0.tar.gz -O pg_graphql.tar.gz && \
|
||||
echo "bd8dc7230282b3efa9ae5baf053a54151ed0e66881c7c53750e2d0c765776edc pg_graphql.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
|
||||
sed -i 's/pgrx = "=0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
|
||||
cargo pgrx install --release && \
|
||||
# it's needed to enable extension because it uses untrusted C language
|
||||
@@ -733,7 +727,7 @@ ARG PG_VERSION
|
||||
# 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
|
||||
RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
|
||||
echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
|
||||
cargo pgrx install --release && \
|
||||
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
|
||||
|
||||
@@ -749,7 +743,7 @@ ARG PG_VERSION
|
||||
|
||||
RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -O pgx_ulid.tar.gz && \
|
||||
echo "ee5db82945d2d9f2d15597a80cf32de9dca67b897f605beb830561705f12683c pgx_ulid.tar.gz" | sha256sum --check && \
|
||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xvzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
|
||||
echo "******************* Apply a patch for Postgres 16 support; delete in the next release ******************" && \
|
||||
wget https://github.com/pksunkara/pgx_ulid/commit/f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
|
||||
patch -p1 < f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
|
||||
@@ -771,7 +765,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
|
||||
echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
|
||||
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
|
||||
mkdir wal2json-src && cd wal2json-src && tar xvzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install
|
||||
|
||||
@@ -787,7 +781,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
|
||||
echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_ivm-src && cd pg_ivm-src && tar xvzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_ivm.control
|
||||
@@ -804,7 +798,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
|
||||
ENV PATH "/usr/local/pgsql/bin/:$PATH"
|
||||
RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
|
||||
echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
|
||||
mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
|
||||
mkdir pg_partman-src && cd pg_partman-src && tar xvzf ../pg_partman.tar.gz --strip-components=1 -C . && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) install && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control
|
||||
@@ -928,69 +922,6 @@ RUN rm -r /usr/local/pgsql/include
|
||||
# if they were to be used by other libraries.
|
||||
RUN rm /usr/local/pgsql/lib/lib*.a
|
||||
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer neon-pg-ext-test
|
||||
#
|
||||
#########################################################################################
|
||||
|
||||
FROM neon-pg-ext-build AS neon-pg-ext-test
|
||||
ARG PG_VERSION
|
||||
RUN mkdir /ext-src
|
||||
|
||||
#COPY --from=postgis-build /postgis.tar.gz /ext-src/
|
||||
#COPY --from=postgis-build /sfcgal/* /usr
|
||||
COPY --from=plv8-build /plv8.tar.gz /ext-src/
|
||||
COPY --from=h3-pg-build /h3-pg.tar.gz /ext-src/
|
||||
COPY --from=unit-pg-build /postgresql-unit.tar.gz /ext-src/
|
||||
COPY --from=vector-pg-build /pgvector.tar.gz /ext-src/
|
||||
COPY --from=vector-pg-build /pgvector.patch /ext-src/
|
||||
COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
|
||||
#COPY --from=pg-jsonschema-pg-build /home/nonroot/pg_jsonschema.tar.gz /ext-src
|
||||
#COPY --from=pg-graphql-pg-build /home/nonroot/pg_graphql.tar.gz /ext-src
|
||||
#COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
|
||||
COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
|
||||
COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
|
||||
#COPY --from=rum-pg-build /rum.tar.gz /ext-src
|
||||
#COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
|
||||
COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
|
||||
COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
|
||||
COPY --from=hll-pg-build /hll.tar.gz /ext-src
|
||||
COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
|
||||
#COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
|
||||
COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
|
||||
COPY patches/pg_hintplan.patch /ext-src
|
||||
#COPY --from=kq-imcx-pg-build /kq_imcx.tar.gz /ext-src
|
||||
COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
|
||||
COPY patches/pg_cron.patch /ext-src
|
||||
#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
|
||||
COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
|
||||
COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
|
||||
COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
|
||||
COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
|
||||
#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
|
||||
#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
|
||||
COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
|
||||
COPY patches/pg_anon.patch /ext-src
|
||||
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
|
||||
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
|
||||
RUN cd /ext-src/ && for f in *.tar.gz; \
|
||||
do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
|
||||
rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
|
||||
|| exit 1; rm -f $f; done
|
||||
RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
|
||||
# cmake is required for the h3 test
|
||||
RUN apt-get update && apt-get install -y cmake
|
||||
RUN patch -p1 < /ext-src/pg_hintplan.patch
|
||||
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
|
||||
RUN patch -p1 </ext-src/pg_anon.patch
|
||||
RUN patch -p1 </ext-src/pg_cron.patch
|
||||
ENV PATH=/usr/local/pgsql/bin:$PATH
|
||||
ENV PGHOST=compute
|
||||
ENV PGPORT=55433
|
||||
ENV PGUSER=cloud_admin
|
||||
ENV PGDATABASE=postgres
|
||||
#########################################################################################
|
||||
#
|
||||
# Final layer
|
||||
|
||||
31
Makefile
31
Makefile
@@ -25,16 +25,14 @@ ifeq ($(UNAME_S),Linux)
|
||||
# Seccomp BPF is only available for Linux
|
||||
PG_CONFIGURE_OPTS += --with-libseccomp
|
||||
else ifeq ($(UNAME_S),Darwin)
|
||||
ifndef DISABLE_HOMEBREW
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
# It can be configured with OPENSSL_PREFIX variable
|
||||
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||
PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
|
||||
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
|
||||
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
|
||||
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
|
||||
endif
|
||||
# macOS with brew-installed openssl requires explicit paths
|
||||
# It can be configured with OPENSSL_PREFIX variable
|
||||
OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
|
||||
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
|
||||
PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
|
||||
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
|
||||
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
|
||||
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
|
||||
endif
|
||||
|
||||
# Use -C option so that when PostgreSQL "make install" installs the
|
||||
@@ -81,14 +79,11 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
||||
echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
|
||||
exit 1; }
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
|
||||
|
||||
VERSION=$*; \
|
||||
EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
|
||||
CFLAGS='$(PG_CFLAGS)' \
|
||||
$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
|
||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
|
||||
$(PG_CONFIGURE_OPTS) \
|
||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log)
|
||||
|
||||
# nicer alias to run 'configure'
|
||||
# Note: I've been unable to use templates for this part of our configuration.
|
||||
@@ -124,8 +119,6 @@ postgres-%: postgres-configure-% \
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
|
||||
+@echo "Compiling amcheck $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
|
||||
+@echo "Compiling test_decoding $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
|
||||
|
||||
.PHONY: postgres-clean-%
|
||||
postgres-clean-%:
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
[](https://neon.tech)
|
||||
|
||||
|
||||
[](https://neon.tech)
|
||||
|
||||
# Neon
|
||||
|
||||
|
||||
@@ -27,12 +27,10 @@ reqwest = { workspace = true, features = ["json"] }
|
||||
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
tokio-postgres.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-opentelemetry.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
thiserror.workspace = true
|
||||
url.workspace = true
|
||||
|
||||
compute_api.workspace = true
|
||||
|
||||
@@ -47,11 +47,10 @@ use chrono::Utc;
|
||||
use clap::Arg;
|
||||
use signal_hook::consts::{SIGQUIT, SIGTERM};
|
||||
use signal_hook::{consts::SIGINT, iterator::Signals};
|
||||
use tracing::{error, info, warn};
|
||||
use tracing::{error, info};
|
||||
use url::Url;
|
||||
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use compute_api::spec::ComputeSpec;
|
||||
|
||||
use compute_tools::compute::{
|
||||
forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
|
||||
@@ -63,41 +62,12 @@ use compute_tools::logger::*;
|
||||
use compute_tools::monitor::launch_monitor;
|
||||
use compute_tools::params::*;
|
||||
use compute_tools::spec::*;
|
||||
use compute_tools::swap::resize_swap;
|
||||
|
||||
// this is an arbitrary build tag. Fine as a default / for testing purposes
|
||||
// in-case of not-set environment var
|
||||
const BUILD_TAG_DEFAULT: &str = "latest";
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let (build_tag, clap_args) = init()?;
|
||||
|
||||
let (pg_handle, start_pg_result) = {
|
||||
// Enter startup tracing context
|
||||
let _startup_context_guard = startup_context_from_env();
|
||||
|
||||
let cli_args = process_cli(&clap_args)?;
|
||||
|
||||
let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
|
||||
|
||||
let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
|
||||
|
||||
start_postgres(&clap_args, wait_spec_result)?
|
||||
|
||||
// Startup is finished, exit the startup tracing span
|
||||
};
|
||||
|
||||
// PostgreSQL is now running, if startup was successful. Wait until it exits.
|
||||
let wait_pg_result = wait_postgres(pg_handle)?;
|
||||
|
||||
let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
|
||||
|
||||
maybe_delay_exit(delay_exit);
|
||||
|
||||
deinit_and_exit(wait_pg_result);
|
||||
}
|
||||
|
||||
fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
@@ -112,15 +82,9 @@ fn init() -> Result<(String, clap::ArgMatches)> {
|
||||
.to_string();
|
||||
info!("build_tag: {build_tag}");
|
||||
|
||||
Ok((build_tag, cli().get_matches()))
|
||||
}
|
||||
|
||||
fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
|
||||
let pgbin_default = "postgres";
|
||||
let pgbin = matches
|
||||
.get_one::<String>("pgbin")
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or(pgbin_default);
|
||||
let matches = cli().get_matches();
|
||||
let pgbin_default = String::from("postgres");
|
||||
let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);
|
||||
|
||||
let ext_remote_storage = matches
|
||||
.get_one::<String>("remote-ext-config")
|
||||
@@ -146,32 +110,7 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
|
||||
.expect("Postgres connection string is required");
|
||||
let spec_json = matches.get_one::<String>("spec");
|
||||
let spec_path = matches.get_one::<String>("spec-path");
|
||||
let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
|
||||
|
||||
Ok(ProcessCliResult {
|
||||
connstr,
|
||||
pgdata,
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
http_port,
|
||||
spec_json,
|
||||
spec_path,
|
||||
resize_swap_on_bind,
|
||||
})
|
||||
}
|
||||
|
||||
struct ProcessCliResult<'clap> {
|
||||
connstr: &'clap str,
|
||||
pgdata: &'clap str,
|
||||
pgbin: &'clap str,
|
||||
ext_remote_storage: Option<&'clap str>,
|
||||
http_port: u16,
|
||||
spec_json: Option<&'clap String>,
|
||||
spec_path: Option<&'clap String>,
|
||||
resize_swap_on_bind: bool,
|
||||
}
|
||||
|
||||
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
// Extract OpenTelemetry context for the startup actions from the
|
||||
// TRACEPARENT and TRACESTATE env variables, and attach it to the current
|
||||
// tracing context.
|
||||
@@ -208,7 +147,7 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
if let Ok(val) = std::env::var("TRACESTATE") {
|
||||
startup_tracing_carrier.insert("tracestate".to_string(), val);
|
||||
}
|
||||
if !startup_tracing_carrier.is_empty() {
|
||||
let startup_context_guard = if !startup_tracing_carrier.is_empty() {
|
||||
use opentelemetry::propagation::TextMapPropagator;
|
||||
use opentelemetry::sdk::propagation::TraceContextPropagator;
|
||||
let guard = TraceContextPropagator::new()
|
||||
@@ -218,17 +157,8 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
|
||||
Some(guard)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
fn try_spec_from_cli(
|
||||
matches: &clap::ArgMatches,
|
||||
ProcessCliResult {
|
||||
spec_json,
|
||||
spec_path,
|
||||
..
|
||||
}: &ProcessCliResult,
|
||||
) -> Result<CliSpecParams> {
|
||||
let compute_id = matches.get_one::<String>("compute-id");
|
||||
let control_plane_uri = matches.get_one::<String>("control-plane-uri");
|
||||
|
||||
@@ -269,34 +199,6 @@ fn try_spec_from_cli(
|
||||
}
|
||||
};
|
||||
|
||||
Ok(CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
})
|
||||
}
|
||||
|
||||
struct CliSpecParams {
|
||||
/// If a spec was provided via CLI or file, the [`ComputeSpec`]
|
||||
spec: Option<ComputeSpec>,
|
||||
live_config_allowed: bool,
|
||||
}
|
||||
|
||||
fn wait_spec(
|
||||
build_tag: String,
|
||||
ProcessCliResult {
|
||||
connstr,
|
||||
pgdata,
|
||||
pgbin,
|
||||
ext_remote_storage,
|
||||
resize_swap_on_bind,
|
||||
http_port,
|
||||
..
|
||||
}: ProcessCliResult,
|
||||
CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
}: CliSpecParams,
|
||||
) -> Result<WaitSpecResult> {
|
||||
let mut new_state = ComputeState::new();
|
||||
let spec_set;
|
||||
|
||||
@@ -324,17 +226,19 @@ fn wait_spec(
|
||||
|
||||
// If this is a pooled VM, prewarm before starting HTTP server and becoming
|
||||
// available for binding. Prewarming helps Postgres start quicker later,
|
||||
// because QEMU will already have its memory allocated from the host, and
|
||||
// because QEMU will already have it's memory allocated from the host, and
|
||||
// the necessary binaries will already be cached.
|
||||
if !spec_set {
|
||||
compute.prewarm_postgres()?;
|
||||
}
|
||||
|
||||
// Launch http service first, so that we can serve control-plane requests
|
||||
// while configuration is still in progress.
|
||||
// Launch http service first, so we were able to serve control-plane
|
||||
// requests, while configuration is still in progress.
|
||||
let _http_handle =
|
||||
launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
|
||||
|
||||
let extension_server_port: u16 = http_port;
|
||||
|
||||
if !spec_set {
|
||||
// No spec provided, hang waiting for it.
|
||||
info!("no compute spec provided, waiting");
|
||||
@@ -349,45 +253,21 @@ fn wait_spec(
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Record for how long we slept waiting for the spec.
|
||||
let now = Utc::now();
|
||||
state.metrics.wait_for_spec_ms = now
|
||||
.signed_duration_since(state.start_time)
|
||||
.to_std()
|
||||
.unwrap()
|
||||
.as_millis() as u64;
|
||||
|
||||
// Reset start time, so that the total startup time that is calculated later will
|
||||
// not include the time that we waited for the spec.
|
||||
state.start_time = now;
|
||||
}
|
||||
|
||||
Ok(WaitSpecResult {
|
||||
compute,
|
||||
http_port,
|
||||
resize_swap_on_bind,
|
||||
})
|
||||
}
|
||||
|
||||
struct WaitSpecResult {
|
||||
compute: Arc<ComputeNode>,
|
||||
// passed through from ProcessCliResult
|
||||
http_port: u16,
|
||||
resize_swap_on_bind: bool,
|
||||
}
|
||||
|
||||
fn start_postgres(
|
||||
// need to allow unused because `matches` is only used if target_os = "linux"
|
||||
#[allow(unused_variables)] matches: &clap::ArgMatches,
|
||||
WaitSpecResult {
|
||||
compute,
|
||||
http_port,
|
||||
resize_swap_on_bind,
|
||||
}: WaitSpecResult,
|
||||
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
|
||||
// We got all we need, update the state.
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
|
||||
// Record for how long we slept waiting for the spec.
|
||||
state.metrics.wait_for_spec_ms = Utc::now()
|
||||
.signed_duration_since(state.start_time)
|
||||
.to_std()
|
||||
.unwrap()
|
||||
.as_millis() as u64;
|
||||
// Reset start time to the actual start of the configuration, so that
|
||||
// total startup time was properly measured at the end.
|
||||
state.start_time = Utc::now();
|
||||
|
||||
state.status = ComputeStatus::Init;
|
||||
compute.state_changed.notify_all();
|
||||
|
||||
@@ -395,72 +275,33 @@ fn start_postgres(
|
||||
"running compute with features: {:?}",
|
||||
state.pspec.as_ref().unwrap().spec.features
|
||||
);
|
||||
// before we release the mutex, fetch the swap size (if any) for later.
|
||||
let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
|
||||
drop(state);
|
||||
|
||||
// Launch remaining service threads
|
||||
let _monitor_handle = launch_monitor(&compute);
|
||||
let _configurator_handle = launch_configurator(&compute);
|
||||
|
||||
let mut prestartup_failed = false;
|
||||
let mut delay_exit = false;
|
||||
|
||||
// Resize swap to the desired size if the compute spec says so
|
||||
if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
|
||||
// To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
|
||||
// *before* starting postgres.
|
||||
//
|
||||
// In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
|
||||
// carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
|
||||
// OOM-killed during startup because swap wasn't available yet.
|
||||
match resize_swap(size_bytes) {
|
||||
Ok(()) => {
|
||||
let size_gib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
|
||||
info!(%size_bytes, %size_gib, "resized swap");
|
||||
}
|
||||
Err(err) => {
|
||||
let err = err.context("failed to resize swap");
|
||||
error!("{err:#}");
|
||||
|
||||
// Mark compute startup as failed; don't try to start postgres, and report this
|
||||
// error to the control plane when it next asks.
|
||||
prestartup_failed = true;
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
state.error = Some(format!("{err:?}"));
|
||||
state.status = ComputeStatus::Failed;
|
||||
compute.state_changed.notify_all();
|
||||
delay_exit = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let extension_server_port: u16 = http_port;
|
||||
|
||||
// Start Postgres
|
||||
let mut pg = None;
|
||||
if !prestartup_failed {
|
||||
pg = match compute.start_compute(extension_server_port) {
|
||||
Ok(pg) => Some(pg),
|
||||
Err(err) => {
|
||||
error!("could not start the compute node: {:#}", err);
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
state.error = Some(format!("{:?}", err));
|
||||
state.status = ComputeStatus::Failed;
|
||||
// Notify others that Postgres failed to start. In case of configuring the
|
||||
// empty compute, it's likely that API handler is still waiting for compute
|
||||
// state change. With this we will notify it that compute is in Failed state,
|
||||
// so control plane will know about it earlier and record proper error instead
|
||||
// of timeout.
|
||||
compute.state_changed.notify_all();
|
||||
drop(state); // unlock
|
||||
delay_exit = true;
|
||||
None
|
||||
}
|
||||
};
|
||||
} else {
|
||||
warn!("skipping postgres startup because pre-startup step failed");
|
||||
}
|
||||
let mut delay_exit = false;
|
||||
let mut exit_code = None;
|
||||
let pg = match compute.start_compute(extension_server_port) {
|
||||
Ok(pg) => Some(pg),
|
||||
Err(err) => {
|
||||
error!("could not start the compute node: {:#}", err);
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
state.error = Some(format!("{:?}", err));
|
||||
state.status = ComputeStatus::Failed;
|
||||
// Notify others that Postgres failed to start. In case of configuring the
|
||||
// empty compute, it's likely that API handler is still waiting for compute
|
||||
// state change. With this we will notify it that compute is in Failed state,
|
||||
// so control plane will know about it earlier and record proper error instead
|
||||
// of timeout.
|
||||
compute.state_changed.notify_all();
|
||||
drop(state); // unlock
|
||||
delay_exit = true;
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// Start the vm-monitor if directed to. The vm-monitor only runs on linux
|
||||
// because it requires cgroups.
|
||||
@@ -493,7 +334,7 @@ fn start_postgres(
|
||||
// This token is used internally by the monitor to clean up all threads
|
||||
let token = CancellationToken::new();
|
||||
|
||||
let vm_monitor = rt.as_ref().map(|rt| {
|
||||
let vm_monitor = &rt.as_ref().map(|rt| {
|
||||
rt.spawn(vm_monitor::start(
|
||||
Box::leak(Box::new(vm_monitor::Args {
|
||||
cgroup: cgroup.cloned(),
|
||||
@@ -506,41 +347,12 @@ fn start_postgres(
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
pg,
|
||||
StartPostgresResult {
|
||||
delay_exit,
|
||||
compute,
|
||||
#[cfg(target_os = "linux")]
|
||||
rt,
|
||||
#[cfg(target_os = "linux")]
|
||||
token,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
|
||||
|
||||
struct StartPostgresResult {
|
||||
delay_exit: bool,
|
||||
// passed through from WaitSpecResult
|
||||
compute: Arc<ComputeNode>,
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
rt: Option<tokio::runtime::Runtime>,
|
||||
#[cfg(target_os = "linux")]
|
||||
token: tokio_util::sync::CancellationToken,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
|
||||
}
|
||||
|
||||
fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
|
||||
// Wait for the child Postgres process forever. In this state Ctrl+C will
|
||||
// propagate to Postgres and it will be shut down as well.
|
||||
let mut exit_code = None;
|
||||
if let Some((mut pg, logs_handle)) = pg {
|
||||
// Startup is finished, exit the startup tracing span
|
||||
drop(startup_context_guard);
|
||||
|
||||
let ecode = pg
|
||||
.wait()
|
||||
.expect("failed to start waiting on Postgres process");
|
||||
@@ -555,25 +367,6 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
|
||||
exit_code = ecode.code()
|
||||
}
|
||||
|
||||
Ok(WaitPostgresResult { exit_code })
|
||||
}
|
||||
|
||||
struct WaitPostgresResult {
|
||||
exit_code: Option<i32>,
|
||||
}
|
||||
|
||||
fn cleanup_after_postgres_exit(
|
||||
StartPostgresResult {
|
||||
mut delay_exit,
|
||||
compute,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor,
|
||||
#[cfg(target_os = "linux")]
|
||||
token,
|
||||
#[cfg(target_os = "linux")]
|
||||
rt,
|
||||
}: StartPostgresResult,
|
||||
) -> Result<bool> {
|
||||
// Terminate the vm_monitor so it releases the file watcher on
|
||||
// /sys/fs/cgroup/neon-postgres.
|
||||
// Note: the vm-monitor only runs on linux because it requires cgroups.
|
||||
@@ -615,19 +408,13 @@ fn cleanup_after_postgres_exit(
|
||||
error!("error while checking for core dumps: {err:?}");
|
||||
}
|
||||
|
||||
Ok(delay_exit)
|
||||
}
|
||||
|
||||
fn maybe_delay_exit(delay_exit: bool) {
|
||||
// If launch failed, keep serving HTTP requests for a while, so the cloud
|
||||
// control plane can get the actual error.
|
||||
if delay_exit {
|
||||
info!("giving control plane 30s to collect the error before shutdown");
|
||||
thread::sleep(Duration::from_secs(30));
|
||||
}
|
||||
}
|
||||
|
||||
fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
|
||||
// Shutdown trace pipeline gracefully, so that it has a chance to send any
|
||||
// pending traces before we exit. Shutting down OTEL tracing provider may
|
||||
// hang for quite some time, see, for example:
|
||||
@@ -735,15 +522,10 @@ fn cli() -> clap::Command {
|
||||
Arg::new("filecache-connstr")
|
||||
.long("filecache-connstr")
|
||||
.default_value(
|
||||
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
|
||||
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable",
|
||||
)
|
||||
.value_name("FILECACHE_CONNSTR"),
|
||||
)
|
||||
.arg(
|
||||
Arg::new("resize-swap-on-bind")
|
||||
.long("resize-swap-on-bind")
|
||||
.action(clap::ArgAction::SetTrue),
|
||||
)
|
||||
}
|
||||
|
||||
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
|
||||
|
||||
@@ -1,116 +0,0 @@
|
||||
use compute_api::{
|
||||
responses::CatalogObjects,
|
||||
spec::{Database, Role},
|
||||
};
|
||||
use futures::Stream;
|
||||
use postgres::{Client, NoTls};
|
||||
use std::{path::Path, process::Stdio, result::Result, sync::Arc};
|
||||
use tokio::{
|
||||
io::{AsyncBufReadExt, BufReader},
|
||||
process::Command,
|
||||
task,
|
||||
};
|
||||
use tokio_stream::{self as stream, StreamExt};
|
||||
use tokio_util::codec::{BytesCodec, FramedRead};
|
||||
use tracing::warn;
|
||||
|
||||
use crate::{
|
||||
compute::ComputeNode,
|
||||
pg_helpers::{get_existing_dbs, get_existing_roles},
|
||||
};
|
||||
|
||||
pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
|
||||
let connstr = compute.connstr.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let mut client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
let roles: Vec<Role>;
|
||||
{
|
||||
let mut xact = client.transaction()?;
|
||||
roles = get_existing_roles(&mut xact)?;
|
||||
}
|
||||
let databases: Vec<Database> = get_existing_dbs(&mut client)?.values().cloned().collect();
|
||||
|
||||
Ok(CatalogObjects { roles, databases })
|
||||
})
|
||||
.await?
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum SchemaDumpError {
|
||||
#[error("Database does not exist.")]
|
||||
DatabaseDoesNotExist,
|
||||
#[error("Failed to execute pg_dump.")]
|
||||
IO(#[from] std::io::Error),
|
||||
}
|
||||
|
||||
// It uses the pg_dump utility to dump the schema of the specified database.
|
||||
// The output is streamed back to the caller and supposed to be streamed via HTTP.
|
||||
//
|
||||
// Before return the result with the output, it checks that pg_dump produced any output.
|
||||
// If not, it tries to parse the stderr output to determine if the database does not exist
|
||||
// and special error is returned.
|
||||
//
|
||||
// To make sure that the process is killed when the caller drops the stream, we use tokio kill_on_drop feature.
|
||||
pub async fn get_database_schema(
|
||||
compute: &Arc<ComputeNode>,
|
||||
dbname: &str,
|
||||
) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>>, SchemaDumpError> {
|
||||
let pgbin = &compute.pgbin;
|
||||
let basepath = Path::new(pgbin).parent().unwrap();
|
||||
let pgdump = basepath.join("pg_dump");
|
||||
let mut connstr = compute.connstr.clone();
|
||||
connstr.set_path(dbname);
|
||||
let mut cmd = Command::new(pgdump)
|
||||
.arg("--schema-only")
|
||||
.arg(connstr.as_str())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.kill_on_drop(true)
|
||||
.spawn()?;
|
||||
|
||||
let stdout = cmd.stdout.take().ok_or_else(|| {
|
||||
std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stdout.")
|
||||
})?;
|
||||
|
||||
let stderr = cmd.stderr.take().ok_or_else(|| {
|
||||
std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stderr.")
|
||||
})?;
|
||||
|
||||
let mut stdout_reader = FramedRead::new(stdout, BytesCodec::new());
|
||||
let stderr_reader = BufReader::new(stderr);
|
||||
|
||||
let first_chunk = match stdout_reader.next().await {
|
||||
Some(Ok(bytes)) if !bytes.is_empty() => bytes,
|
||||
Some(Err(e)) => {
|
||||
return Err(SchemaDumpError::IO(e));
|
||||
}
|
||||
_ => {
|
||||
let mut lines = stderr_reader.lines();
|
||||
if let Some(line) = lines.next_line().await? {
|
||||
if line.contains(&format!("FATAL: database \"{}\" does not exist", dbname)) {
|
||||
return Err(SchemaDumpError::DatabaseDoesNotExist);
|
||||
}
|
||||
warn!("pg_dump stderr: {}", line)
|
||||
}
|
||||
tokio::spawn(async move {
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
warn!("pg_dump stderr: {}", line)
|
||||
}
|
||||
});
|
||||
|
||||
return Err(SchemaDumpError::IO(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
"failed to start pg_dump",
|
||||
)));
|
||||
}
|
||||
};
|
||||
let initial_stream = stream::once(Ok(first_chunk.freeze()));
|
||||
// Consume stderr and log warnings
|
||||
tokio::spawn(async move {
|
||||
let mut lines = stderr_reader.lines();
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
warn!("pg_dump stderr: {}", line)
|
||||
}
|
||||
});
|
||||
Ok(initial_stream.chain(stdout_reader.map(|res| res.map(|b| b.freeze()))))
|
||||
}
|
||||
@@ -818,15 +818,9 @@ impl ComputeNode {
|
||||
Client::connect(zenith_admin_connstr.as_str(), NoTls)
|
||||
.context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
|
||||
// Disable forwarding so that users don't get a cloud_admin role
|
||||
|
||||
let mut func = || {
|
||||
client.simple_query("SET neon.forward_ddl = false")?;
|
||||
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
|
||||
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
|
||||
Ok::<_, anyhow::Error>(())
|
||||
};
|
||||
func().context("apply_config setup cloud_admin")?;
|
||||
|
||||
client.simple_query("SET neon.forward_ddl = false")?;
|
||||
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
|
||||
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
|
||||
drop(client);
|
||||
|
||||
// reconnect with connstring with expected name
|
||||
@@ -838,29 +832,24 @@ impl ComputeNode {
|
||||
};
|
||||
|
||||
// Disable DDL forwarding because control plane already knows about these roles/databases.
|
||||
client
|
||||
.simple_query("SET neon.forward_ddl = false")
|
||||
.context("apply_config SET neon.forward_ddl = false")?;
|
||||
client.simple_query("SET neon.forward_ddl = false")?;
|
||||
|
||||
// Proceed with post-startup configuration. Note, that order of operations is important.
|
||||
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
|
||||
create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?;
|
||||
cleanup_instance(&mut client).context("apply_config cleanup_instance")?;
|
||||
handle_roles(spec, &mut client).context("apply_config handle_roles")?;
|
||||
handle_databases(spec, &mut client).context("apply_config handle_databases")?;
|
||||
handle_role_deletions(spec, connstr.as_str(), &mut client)
|
||||
.context("apply_config handle_role_deletions")?;
|
||||
create_neon_superuser(spec, &mut client)?;
|
||||
cleanup_instance(&mut client)?;
|
||||
handle_roles(spec, &mut client)?;
|
||||
handle_databases(spec, &mut client)?;
|
||||
handle_role_deletions(spec, connstr.as_str(), &mut client)?;
|
||||
handle_grants(
|
||||
spec,
|
||||
&mut client,
|
||||
connstr.as_str(),
|
||||
self.has_feature(ComputeFeature::AnonExtension),
|
||||
)
|
||||
.context("apply_config handle_grants")?;
|
||||
handle_extensions(spec, &mut client).context("apply_config handle_extensions")?;
|
||||
handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?;
|
||||
create_availability_check_data(&mut client)
|
||||
.context("apply_config create_availability_check_data")?;
|
||||
)?;
|
||||
handle_extensions(spec, &mut client)?;
|
||||
handle_extension_neon(&mut client)?;
|
||||
create_availability_check_data(&mut client)?;
|
||||
|
||||
// 'Close' connection
|
||||
drop(client);
|
||||
@@ -868,7 +857,7 @@ impl ComputeNode {
|
||||
// Run migrations separately to not hold up cold starts
|
||||
thread::spawn(move || {
|
||||
let mut client = Client::connect(connstr.as_str(), NoTls)?;
|
||||
handle_migrations(&mut client).context("apply_config handle_migrations")
|
||||
handle_migrations(&mut client)
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -6,8 +6,8 @@ use std::path::Path;
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::pg_helpers::escape_conf_value;
|
||||
use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize};
|
||||
use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};
|
||||
use crate::pg_helpers::PgOptionsSerialize;
|
||||
use compute_api::spec::{ComputeMode, ComputeSpec};
|
||||
|
||||
/// Check that `line` is inside a text file and put it there if it is not.
|
||||
/// Create file if it doesn't exist.
|
||||
@@ -92,27 +92,6 @@ pub fn write_postgres_conf(
|
||||
}
|
||||
}
|
||||
|
||||
if cfg!(target_os = "linux") {
|
||||
// Check /proc/sys/vm/overcommit_memory -- if it equals 2 (i.e. linux memory overcommit is
|
||||
// disabled), then the control plane has enabled swap and we should set
|
||||
// dynamic_shared_memory_type = 'mmap'.
|
||||
//
|
||||
// This is (maybe?) temporary - for more, see https://github.com/neondatabase/cloud/issues/12047.
|
||||
let overcommit_memory_contents = std::fs::read_to_string("/proc/sys/vm/overcommit_memory")
|
||||
// ignore any errors - they may be expected to occur under certain situations (e.g. when
|
||||
// not running in Linux).
|
||||
.unwrap_or_else(|_| String::new());
|
||||
if overcommit_memory_contents.trim() == "2" {
|
||||
let opt = GenericOption {
|
||||
name: "dynamic_shared_memory_type".to_owned(),
|
||||
value: Some("mmap".to_owned()),
|
||||
vartype: "enum".to_owned(),
|
||||
};
|
||||
|
||||
write!(file, "{}", opt.to_pg_setting())?;
|
||||
}
|
||||
}
|
||||
|
||||
// If there are any extra options in the 'settings' field, append those
|
||||
if spec.cluster.settings.is_some() {
|
||||
writeln!(file, "# Managed by compute_ctl: begin")?;
|
||||
|
||||
@@ -5,21 +5,17 @@ use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
use crate::catalog::SchemaDumpError;
|
||||
use crate::catalog::{get_database_schema, get_dbs_and_roles};
|
||||
use crate::compute::forward_termination_signal;
|
||||
use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
|
||||
use compute_api::requests::ConfigurationRequest;
|
||||
use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
|
||||
|
||||
use anyhow::Result;
|
||||
use hyper::header::CONTENT_TYPE;
|
||||
use hyper::service::{make_service_fn, service_fn};
|
||||
use hyper::{Body, Method, Request, Response, Server, StatusCode};
|
||||
use tokio::task;
|
||||
use tracing::{error, info, warn};
|
||||
use tracing_utils::http::OtelName;
|
||||
use utils::http::request::must_get_query_param;
|
||||
|
||||
fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
|
||||
ComputeStatusResponse {
|
||||
@@ -137,34 +133,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/dbs_and_roles") => {
|
||||
info!("serving /dbs_and_roles GET request",);
|
||||
match get_dbs_and_roles(compute).await {
|
||||
Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
|
||||
Err(_) => {
|
||||
render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(&Method::GET, "/database_schema") => {
|
||||
let database = match must_get_query_param(&req, "database") {
|
||||
Err(e) => return e.into_response(),
|
||||
Ok(database) => database,
|
||||
};
|
||||
info!("serving /database_schema GET request with database: {database}",);
|
||||
match get_database_schema(compute, &database).await {
|
||||
Ok(res) => render_plain(Body::wrap_stream(res)),
|
||||
Err(SchemaDumpError::DatabaseDoesNotExist) => {
|
||||
render_json_error("database does not exist", StatusCode::NOT_FOUND)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("can't get schema dump: {}", e);
|
||||
render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// download extension files from remote extension storage on demand
|
||||
(&Method::POST, route) if route.starts_with("/extension_server/") => {
|
||||
info!("serving {:?} POST request", route);
|
||||
@@ -335,25 +303,10 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
|
||||
};
|
||||
Response::builder()
|
||||
.status(status)
|
||||
.header(CONTENT_TYPE, "application/json")
|
||||
.body(Body::from(serde_json::to_string(&error).unwrap()))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn render_json(body: Body) -> Response<Body> {
|
||||
Response::builder()
|
||||
.header(CONTENT_TYPE, "application/json")
|
||||
.body(body)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn render_plain(body: Body) -> Response<Body> {
|
||||
Response::builder()
|
||||
.header(CONTENT_TYPE, "text/plain")
|
||||
.body(body)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
|
||||
@@ -68,51 +68,6 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Info"
|
||||
|
||||
/dbs_and_roles:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
summary: Get databases and roles in the catalog.
|
||||
description: ""
|
||||
operationId: getDbsAndRoles
|
||||
responses:
|
||||
200:
|
||||
description: Compute schema objects
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/DbsAndRoles"
|
||||
|
||||
/database_schema:
|
||||
get:
|
||||
tags:
|
||||
- Info
|
||||
summary: Get schema dump
|
||||
parameters:
|
||||
- name: database
|
||||
in: query
|
||||
description: Database name to dump.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
example: "postgres"
|
||||
description: Get schema dump in SQL format.
|
||||
operationId: getDatabaseSchema
|
||||
responses:
|
||||
200:
|
||||
description: Schema dump
|
||||
content:
|
||||
text/plain:
|
||||
schema:
|
||||
type: string
|
||||
description: Schema dump in SQL format.
|
||||
404:
|
||||
description: Non existing database.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/GenericError"
|
||||
|
||||
/check_writability:
|
||||
post:
|
||||
tags:
|
||||
@@ -274,73 +229,6 @@ components:
|
||||
num_cpus:
|
||||
type: integer
|
||||
|
||||
DbsAndRoles:
|
||||
type: object
|
||||
description: Databases and Roles
|
||||
required:
|
||||
- roles
|
||||
- databases
|
||||
properties:
|
||||
roles:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/Role"
|
||||
databases:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/Database"
|
||||
|
||||
Database:
|
||||
type: object
|
||||
description: Database
|
||||
required:
|
||||
- name
|
||||
- owner
|
||||
- restrict_conn
|
||||
- invalid
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
owner:
|
||||
type: string
|
||||
options:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/GenericOption"
|
||||
restrict_conn:
|
||||
type: boolean
|
||||
invalid:
|
||||
type: boolean
|
||||
|
||||
Role:
|
||||
type: object
|
||||
description: Role
|
||||
required:
|
||||
- name
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
encrypted_password:
|
||||
type: string
|
||||
options:
|
||||
type: array
|
||||
items:
|
||||
$ref: "#/components/schemas/GenericOption"
|
||||
|
||||
GenericOption:
|
||||
type: object
|
||||
description: Schema Generic option
|
||||
required:
|
||||
- name
|
||||
- vartype
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
type: string
|
||||
vartype:
|
||||
type: string
|
||||
|
||||
ComputeState:
|
||||
type: object
|
||||
required:
|
||||
|
||||
@@ -8,12 +8,10 @@ pub mod configurator;
|
||||
pub mod http;
|
||||
#[macro_use]
|
||||
pub mod logger;
|
||||
pub mod catalog;
|
||||
pub mod compute;
|
||||
pub mod extension_server;
|
||||
pub mod monitor;
|
||||
pub mod params;
|
||||
pub mod pg_helpers;
|
||||
pub mod spec;
|
||||
pub mod swap;
|
||||
pub mod sync_sk;
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
ALTER ROLE neon_superuser BYPASSRLS;
|
||||
@@ -1,18 +0,0 @@
|
||||
DO $$
|
||||
DECLARE
|
||||
role_name text;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
|
||||
END LOOP;
|
||||
|
||||
FOR role_name IN SELECT rolname FROM pg_roles
|
||||
WHERE
|
||||
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
|
||||
END LOOP;
|
||||
END $$;
|
||||
@@ -1,6 +0,0 @@
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -1 +0,0 @@
|
||||
GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;
|
||||
@@ -1,4 +0,0 @@
|
||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||
-- interacted with by neon_superuser without permission issues.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
|
||||
@@ -1,4 +0,0 @@
|
||||
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
|
||||
-- interacted with by neon_superuser without permission issues.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
|
||||
@@ -1,3 +0,0 @@
|
||||
-- SKIP: Moved inline to the handle_grants() functions.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
|
||||
@@ -1,3 +0,0 @@
|
||||
-- SKIP: Moved inline to the handle_grants() functions.
|
||||
|
||||
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
|
||||
@@ -1,13 +0,0 @@
|
||||
-- SKIP: The original goal of this migration was to prevent creating
|
||||
-- subscriptions, but this migration was insufficient.
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
role_name TEXT;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
|
||||
END LOOP;
|
||||
END $$;
|
||||
@@ -44,7 +44,7 @@ pub fn escape_conf_value(s: &str) -> String {
|
||||
format!("'{}'", res)
|
||||
}
|
||||
|
||||
pub trait GenericOptionExt {
|
||||
trait GenericOptionExt {
|
||||
fn to_pg_option(&self) -> String;
|
||||
fn to_pg_setting(&self) -> String;
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::fs::File;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use postgres::config::Config;
|
||||
use postgres::{Client, NoTls};
|
||||
use reqwest::StatusCode;
|
||||
@@ -490,7 +490,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
|
||||
"rename_db" => {
|
||||
let new_name = op.new_name.as_ref().unwrap();
|
||||
|
||||
if existing_dbs.contains_key(&op.name) {
|
||||
if existing_dbs.get(&op.name).is_some() {
|
||||
let query: String = format!(
|
||||
"ALTER DATABASE {} RENAME TO {}",
|
||||
op.name.pg_quote(),
|
||||
@@ -698,8 +698,7 @@ pub fn handle_grants(
|
||||
|
||||
// it is important to run this after all grants
|
||||
if enable_anon_extension {
|
||||
handle_extension_anon(spec, &db.owner, &mut db_client, false)
|
||||
.context("handle_grants handle_extension_anon")?;
|
||||
handle_extension_anon(spec, &db.owner, &mut db_client, false)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -774,66 +773,76 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
|
||||
// !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!
|
||||
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
// Add new migrations in numerical order.
|
||||
let migrations = [
|
||||
include_str!("./migrations/0000-neon_superuser_bypass_rls.sql"),
|
||||
include_str!("./migrations/0001-alter_roles.sql"),
|
||||
include_str!("./migrations/0002-grant_pg_create_subscription_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0003-grant_pg_monitor_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0004-grant_all_on_tables_to_neon_superuser.sql"),
|
||||
include_str!("./migrations/0005-grant_all_on_sequences_to_neon_superuser.sql"),
|
||||
include_str!(
|
||||
"./migrations/0006-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
|
||||
),
|
||||
include_str!(
|
||||
"./migrations/0007-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
|
||||
),
|
||||
include_str!("./migrations/0008-revoke_replication_for_previously_allowed_roles.sql"),
|
||||
"ALTER ROLE neon_superuser BYPASSRLS",
|
||||
r#"
|
||||
DO $$
|
||||
DECLARE
|
||||
role_name text;
|
||||
BEGIN
|
||||
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
|
||||
END LOOP;
|
||||
|
||||
FOR role_name IN SELECT rolname FROM pg_roles
|
||||
WHERE
|
||||
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
|
||||
LOOP
|
||||
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
|
||||
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
|
||||
END LOOP;
|
||||
END $$;
|
||||
"#,
|
||||
r#"
|
||||
DO $$
|
||||
BEGIN
|
||||
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
|
||||
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
|
||||
END IF;
|
||||
END
|
||||
$$;"#,
|
||||
"GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION",
|
||||
// Don't remove: these are some SQLs that we originally applied in migrations but turned out to execute somewhere else.
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
// Add new migrations below.
|
||||
];
|
||||
|
||||
let mut func = || {
|
||||
let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
|
||||
client.simple_query(query)?;
|
||||
let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
|
||||
client.simple_query(query)?;
|
||||
|
||||
let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
|
||||
client.simple_query(query)?;
|
||||
query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
|
||||
client.simple_query(query)?;
|
||||
|
||||
let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
|
||||
client.simple_query(query)?;
|
||||
query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
|
||||
client.simple_query(query)?;
|
||||
|
||||
let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
|
||||
client.simple_query(query)?;
|
||||
query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
|
||||
client.simple_query(query)?;
|
||||
|
||||
let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
|
||||
client.simple_query(query)?;
|
||||
Ok::<_, anyhow::Error>(())
|
||||
};
|
||||
func().context("handle_migrations prepare")?;
|
||||
query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
|
||||
client.simple_query(query)?;
|
||||
|
||||
let query = "SELECT id FROM neon_migration.migration_id";
|
||||
let row = client
|
||||
.query_one(query, &[])
|
||||
.context("handle_migrations get migration_id")?;
|
||||
query = "SELECT id FROM neon_migration.migration_id";
|
||||
let row = client.query_one(query, &[])?;
|
||||
let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
|
||||
let starting_migration_id = current_migration;
|
||||
|
||||
let query = "BEGIN";
|
||||
client
|
||||
.simple_query(query)
|
||||
.context("handle_migrations begin")?;
|
||||
query = "BEGIN";
|
||||
client.simple_query(query)?;
|
||||
|
||||
while current_migration < migrations.len() {
|
||||
let migration = &migrations[current_migration];
|
||||
if migration.starts_with("-- SKIP") {
|
||||
info!("Skipping migration id={}", current_migration);
|
||||
if migration.is_empty() {
|
||||
info!("Skip migration id={}", current_migration);
|
||||
} else {
|
||||
info!(
|
||||
"Running migration id={}:\n{}\n",
|
||||
current_migration, migration
|
||||
);
|
||||
client.simple_query(migration).with_context(|| {
|
||||
format!("handle_migrations current_migration={}", current_migration)
|
||||
})?;
|
||||
info!("Running migration:\n{}\n", migration);
|
||||
client.simple_query(migration)?;
|
||||
}
|
||||
current_migration += 1;
|
||||
}
|
||||
@@ -841,14 +850,10 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
|
||||
"UPDATE neon_migration.migration_id SET id={}",
|
||||
migrations.len()
|
||||
);
|
||||
client
|
||||
.simple_query(&setval)
|
||||
.context("handle_migrations update id")?;
|
||||
client.simple_query(&setval)?;
|
||||
|
||||
let query = "COMMIT";
|
||||
client
|
||||
.simple_query(query)
|
||||
.context("handle_migrations commit")?;
|
||||
query = "COMMIT";
|
||||
client.simple_query(query)?;
|
||||
|
||||
info!(
|
||||
"Ran {} migrations",
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use tracing::warn;
|
||||
|
||||
pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
|
||||
|
||||
pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
|
||||
// run `/neonvm/bin/resize-swap --once {size_bytes}`
|
||||
//
|
||||
// Passing '--once' causes resize-swap to delete itself after successful completion, which
|
||||
// means that if compute_ctl restarts later, we won't end up calling 'swapoff' while
|
||||
// postgres is running.
|
||||
//
|
||||
// NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.
|
||||
let child_result = std::process::Command::new("/usr/bin/sudo")
|
||||
.arg(RESIZE_SWAP_BIN)
|
||||
.arg("--once")
|
||||
.arg(size_bytes.to_string())
|
||||
.spawn();
|
||||
|
||||
child_result
|
||||
.context("spawn() failed")
|
||||
.and_then(|mut child| child.wait().context("wait() failed"))
|
||||
.and_then(|status| match status.success() {
|
||||
true => Ok(()),
|
||||
false => {
|
||||
// The command failed. Maybe it was because the resize-swap file doesn't exist?
|
||||
// The --once flag causes it to delete itself on success so we don't disable swap
|
||||
// while postgres is running; maybe this is fine.
|
||||
match Path::new(RESIZE_SWAP_BIN).try_exists() {
|
||||
Err(_) | Ok(true) => Err(anyhow!("process exited with {status}")),
|
||||
// The path doesn't exist; we're actually ok
|
||||
Ok(false) => {
|
||||
warn!("ignoring \"not found\" error from resize-swap to avoid swapoff while compute is running");
|
||||
Ok(())
|
||||
},
|
||||
}
|
||||
}
|
||||
})
|
||||
// wrap any prior error with the overall context that we couldn't run the command
|
||||
.with_context(|| {
|
||||
format!("could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`")
|
||||
})
|
||||
}
|
||||
@@ -17,7 +17,6 @@ nix.workspace = true
|
||||
once_cell.workspace = true
|
||||
postgres.workspace = true
|
||||
hex.workspace = true
|
||||
humantime-serde.workspace = true
|
||||
hyper.workspace = true
|
||||
regex.workspace = true
|
||||
reqwest = { workspace = true, features = ["blocking", "json"] }
|
||||
@@ -28,7 +27,6 @@ serde_with.workspace = true
|
||||
tar.workspace = true
|
||||
thiserror.workspace = true
|
||||
toml.workspace = true
|
||||
toml_edit.workspace = true
|
||||
tokio.workspace = true
|
||||
tokio-postgres.workspace = true
|
||||
tokio-util.workspace = true
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "storage_controller"
|
||||
name = "attachment_service"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
@@ -25,13 +25,12 @@ git-version.workspace = true
|
||||
hex.workspace = true
|
||||
hyper.workspace = true
|
||||
humantime.workspace = true
|
||||
itertools.workspace = true
|
||||
lasso.workspace = true
|
||||
once_cell.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
pageserver_client.workspace = true
|
||||
postgres_connection.workspace = true
|
||||
reqwest = { workspace = true, features = ["stream"] }
|
||||
reqwest.workspace = true
|
||||
routerify.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
@@ -40,15 +39,13 @@ tokio.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tracing.workspace = true
|
||||
measured.workspace = true
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
|
||||
diesel = { version = "2.1.4", features = ["serde_json", "postgres", "r2d2"] }
|
||||
diesel_migrations = { version = "2.1.0" }
|
||||
r2d2 = { version = "0.8.10" }
|
||||
|
||||
utils = { path = "../libs/utils/" }
|
||||
metrics = { path = "../libs/metrics/" }
|
||||
control_plane = { path = "../control_plane" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
utils = { path = "../../libs/utils/" }
|
||||
metrics = { path = "../../libs/metrics/" }
|
||||
control_plane = { path = ".." }
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
465
control_plane/attachment_service/src/compute_hook.rs
Normal file
465
control_plane/attachment_service/src/compute_hook.rs
Normal file
@@ -0,0 +1,465 @@
|
||||
use std::{collections::HashMap, time::Duration};
|
||||
|
||||
use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
|
||||
use control_plane::local_env::LocalEnv;
|
||||
use hyper::{Method, StatusCode};
|
||||
use pageserver_api::shard::{ShardCount, ShardNumber, ShardStripeSize, TenantShardId};
|
||||
use postgres_connection::parse_host_port;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::{
|
||||
backoff::{self},
|
||||
id::{NodeId, TenantId},
|
||||
};
|
||||
|
||||
use crate::service::Config;
|
||||
|
||||
const SLOWDOWN_DELAY: Duration = Duration::from_secs(5);
|
||||
|
||||
pub(crate) const API_CONCURRENCY: usize = 32;
|
||||
|
||||
struct ShardedComputeHookTenant {
|
||||
stripe_size: ShardStripeSize,
|
||||
shard_count: ShardCount,
|
||||
shards: Vec<(ShardNumber, NodeId)>,
|
||||
}
|
||||
|
||||
enum ComputeHookTenant {
|
||||
Unsharded(NodeId),
|
||||
Sharded(ShardedComputeHookTenant),
|
||||
}
|
||||
|
||||
impl ComputeHookTenant {
|
||||
/// Construct with at least one shard's information
|
||||
fn new(tenant_shard_id: TenantShardId, stripe_size: ShardStripeSize, node_id: NodeId) -> Self {
|
||||
if tenant_shard_id.shard_count.count() > 1 {
|
||||
Self::Sharded(ShardedComputeHookTenant {
|
||||
shards: vec![(tenant_shard_id.shard_number, node_id)],
|
||||
stripe_size,
|
||||
shard_count: tenant_shard_id.shard_count,
|
||||
})
|
||||
} else {
|
||||
Self::Unsharded(node_id)
|
||||
}
|
||||
}
|
||||
|
||||
/// Set one shard's location. If stripe size or shard count have changed, Self is reset
|
||||
/// and drops existing content.
|
||||
fn update(
|
||||
&mut self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
stripe_size: ShardStripeSize,
|
||||
node_id: NodeId,
|
||||
) {
|
||||
match self {
|
||||
Self::Unsharded(existing_node_id) if tenant_shard_id.shard_count.count() == 1 => {
|
||||
*existing_node_id = node_id
|
||||
}
|
||||
Self::Sharded(sharded_tenant)
|
||||
if sharded_tenant.stripe_size == stripe_size
|
||||
&& sharded_tenant.shard_count == tenant_shard_id.shard_count =>
|
||||
{
|
||||
if let Some(existing) = sharded_tenant
|
||||
.shards
|
||||
.iter()
|
||||
.position(|s| s.0 == tenant_shard_id.shard_number)
|
||||
{
|
||||
sharded_tenant.shards.get_mut(existing).unwrap().1 = node_id;
|
||||
} else {
|
||||
sharded_tenant
|
||||
.shards
|
||||
.push((tenant_shard_id.shard_number, node_id));
|
||||
sharded_tenant.shards.sort_by_key(|s| s.0)
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Shard count changed: reset struct.
|
||||
*self = Self::new(tenant_shard_id, stripe_size, node_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct ComputeHookNotifyRequestShard {
|
||||
node_id: NodeId,
|
||||
shard_number: ShardNumber,
|
||||
}
|
||||
|
||||
/// Request body that we send to the control plane to notify it of where a tenant is attached
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
struct ComputeHookNotifyRequest {
|
||||
tenant_id: TenantId,
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
shards: Vec<ComputeHookNotifyRequestShard>,
|
||||
}
|
||||
|
||||
/// Error type for attempts to call into the control plane compute notification hook
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub(crate) enum NotifyError {
|
||||
// Request was not send successfully, e.g. transport error
|
||||
#[error("Sending request: {0}")]
|
||||
Request(#[from] reqwest::Error),
|
||||
// Request could not be serviced right now due to ongoing Operation in control plane, but should be possible soon.
|
||||
#[error("Control plane tenant busy")]
|
||||
Busy,
|
||||
// Explicit 429 response asking us to retry less frequently
|
||||
#[error("Control plane overloaded")]
|
||||
SlowDown,
|
||||
// A 503 response indicates the control plane can't handle the request right now
|
||||
#[error("Control plane unavailable (status {0})")]
|
||||
Unavailable(StatusCode),
|
||||
// API returned unexpected non-success status. We will retry, but log a warning.
|
||||
#[error("Control plane returned unexpected status {0}")]
|
||||
Unexpected(StatusCode),
|
||||
// We shutdown while sending
|
||||
#[error("Shutting down")]
|
||||
ShuttingDown,
|
||||
// A response indicates we will never succeed, such as 400 or 404
|
||||
#[error("Non-retryable error {0}")]
|
||||
Fatal(StatusCode),
|
||||
}
|
||||
|
||||
impl ComputeHookTenant {
|
||||
fn maybe_reconfigure(&self, tenant_id: TenantId) -> Option<ComputeHookNotifyRequest> {
|
||||
match self {
|
||||
Self::Unsharded(node_id) => Some(ComputeHookNotifyRequest {
|
||||
tenant_id,
|
||||
shards: vec![ComputeHookNotifyRequestShard {
|
||||
shard_number: ShardNumber(0),
|
||||
node_id: *node_id,
|
||||
}],
|
||||
stripe_size: None,
|
||||
}),
|
||||
Self::Sharded(sharded_tenant)
|
||||
if sharded_tenant.shards.len() == sharded_tenant.shard_count.count() as usize =>
|
||||
{
|
||||
Some(ComputeHookNotifyRequest {
|
||||
tenant_id,
|
||||
shards: sharded_tenant
|
||||
.shards
|
||||
.iter()
|
||||
.map(|(shard_number, node_id)| ComputeHookNotifyRequestShard {
|
||||
shard_number: *shard_number,
|
||||
node_id: *node_id,
|
||||
})
|
||||
.collect(),
|
||||
stripe_size: Some(sharded_tenant.stripe_size),
|
||||
})
|
||||
}
|
||||
Self::Sharded(sharded_tenant) => {
|
||||
// Sharded tenant doesn't yet have information for all its shards
|
||||
|
||||
tracing::info!(
|
||||
"ComputeHookTenant::maybe_reconfigure: not enough shards ({}/{})",
|
||||
sharded_tenant.shards.len(),
|
||||
sharded_tenant.shard_count.count()
|
||||
);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The compute hook is a destination for notifications about changes to tenant:pageserver
|
||||
/// mapping. It aggregates updates for the shards in a tenant, and when appropriate reconfigures
|
||||
/// the compute connection string.
|
||||
pub(super) struct ComputeHook {
|
||||
config: Config,
|
||||
state: tokio::sync::Mutex<HashMap<TenantId, ComputeHookTenant>>,
|
||||
authorization_header: Option<String>,
|
||||
}
|
||||
|
||||
impl ComputeHook {
|
||||
pub(super) fn new(config: Config) -> Self {
|
||||
let authorization_header = config
|
||||
.control_plane_jwt_token
|
||||
.clone()
|
||||
.map(|jwt| format!("Bearer {}", jwt));
|
||||
|
||||
Self {
|
||||
state: Default::default(),
|
||||
config,
|
||||
authorization_header,
|
||||
}
|
||||
}
|
||||
|
||||
/// For test environments: use neon_local's LocalEnv to update compute
|
||||
async fn do_notify_local(
|
||||
&self,
|
||||
reconfigure_request: ComputeHookNotifyRequest,
|
||||
) -> anyhow::Result<()> {
|
||||
let env = match LocalEnv::load_config() {
|
||||
Ok(e) => e,
|
||||
Err(e) => {
|
||||
tracing::warn!("Couldn't load neon_local config, skipping compute update ({e})");
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
let cplane =
|
||||
ComputeControlPlane::load(env.clone()).expect("Error loading compute control plane");
|
||||
let ComputeHookNotifyRequest {
|
||||
tenant_id,
|
||||
shards,
|
||||
stripe_size,
|
||||
} = reconfigure_request;
|
||||
|
||||
let compute_pageservers = shards
|
||||
.into_iter()
|
||||
.map(|shard| {
|
||||
let ps_conf = env
|
||||
.get_pageserver_conf(shard.node_id)
|
||||
.expect("Unknown pageserver");
|
||||
let (pg_host, pg_port) = parse_host_port(&ps_conf.listen_pg_addr)
|
||||
.expect("Unable to parse listen_pg_addr");
|
||||
(pg_host, pg_port.unwrap_or(5432))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for (endpoint_name, endpoint) in &cplane.endpoints {
|
||||
if endpoint.tenant_id == tenant_id && endpoint.status() == EndpointStatus::Running {
|
||||
tracing::info!("Reconfiguring endpoint {}", endpoint_name,);
|
||||
endpoint
|
||||
.reconfigure(compute_pageservers.clone(), stripe_size)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn do_notify_iteration(
|
||||
&self,
|
||||
client: &reqwest::Client,
|
||||
url: &String,
|
||||
reconfigure_request: &ComputeHookNotifyRequest,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(), NotifyError> {
|
||||
let req = client.request(Method::PUT, url);
|
||||
let req = if let Some(value) = &self.authorization_header {
|
||||
req.header(reqwest::header::AUTHORIZATION, value)
|
||||
} else {
|
||||
req
|
||||
};
|
||||
|
||||
tracing::info!(
|
||||
"Sending notify request to {} ({:?})",
|
||||
url,
|
||||
reconfigure_request
|
||||
);
|
||||
let send_result = req.json(&reconfigure_request).send().await;
|
||||
let response = match send_result {
|
||||
Ok(r) => r,
|
||||
Err(e) => return Err(e.into()),
|
||||
};
|
||||
|
||||
// Treat all 2xx responses as success
|
||||
if response.status() >= StatusCode::OK && response.status() < StatusCode::MULTIPLE_CHOICES {
|
||||
if response.status() != StatusCode::OK {
|
||||
// Non-200 2xx response: it doesn't make sense to retry, but this is unexpected, so
|
||||
// log a warning.
|
||||
tracing::warn!(
|
||||
"Unexpected 2xx response code {} from control plane",
|
||||
response.status()
|
||||
);
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Error response codes
|
||||
match response.status() {
|
||||
StatusCode::TOO_MANY_REQUESTS => {
|
||||
// TODO: 429 handling should be global: set some state visible to other requests
|
||||
// so that they will delay before starting, rather than all notifications trying
|
||||
// once before backing off.
|
||||
tokio::time::timeout(SLOWDOWN_DELAY, cancel.cancelled())
|
||||
.await
|
||||
.ok();
|
||||
Err(NotifyError::SlowDown)
|
||||
}
|
||||
StatusCode::LOCKED => {
|
||||
// We consider this fatal, because it's possible that the operation blocking the control one is
|
||||
// also the one that is waiting for this reconcile. We should let the reconciler calling
|
||||
// this hook fail, to give control plane a chance to un-lock.
|
||||
tracing::info!("Control plane reports tenant is locked, dropping out of notify");
|
||||
Err(NotifyError::Busy)
|
||||
}
|
||||
StatusCode::SERVICE_UNAVAILABLE
|
||||
| StatusCode::GATEWAY_TIMEOUT
|
||||
| StatusCode::BAD_GATEWAY => Err(NotifyError::Unavailable(response.status())),
|
||||
StatusCode::BAD_REQUEST | StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
|
||||
Err(NotifyError::Fatal(response.status()))
|
||||
}
|
||||
_ => Err(NotifyError::Unexpected(response.status())),
|
||||
}
|
||||
}
|
||||
|
||||
async fn do_notify(
|
||||
&self,
|
||||
url: &String,
|
||||
reconfigure_request: ComputeHookNotifyRequest,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(), NotifyError> {
|
||||
let client = reqwest::Client::new();
|
||||
backoff::retry(
|
||||
|| self.do_notify_iteration(&client, url, &reconfigure_request, cancel),
|
||||
|e| {
|
||||
matches!(
|
||||
e,
|
||||
NotifyError::Fatal(_) | NotifyError::Unexpected(_) | NotifyError::Busy
|
||||
)
|
||||
},
|
||||
3,
|
||||
10,
|
||||
"Send compute notification",
|
||||
cancel,
|
||||
)
|
||||
.await
|
||||
.ok_or_else(|| NotifyError::ShuttingDown)
|
||||
.and_then(|x| x)
|
||||
}
|
||||
|
||||
/// Call this to notify the compute (postgres) tier of new pageservers to use
|
||||
/// for a tenant. notify() is called by each shard individually, and this function
|
||||
/// will decide whether an update to the tenant is sent. An update is sent on the
|
||||
/// condition that:
|
||||
/// - We know a pageserver for every shard.
|
||||
/// - All the shards have the same shard_count (i.e. we are not mid-split)
|
||||
///
|
||||
/// Cancellation token enables callers to drop out, e.g. if calling from a Reconciler
|
||||
/// that is cancelled.
|
||||
///
|
||||
/// This function is fallible, including in the case that the control plane is transiently
|
||||
/// unavailable. A limited number of retries are done internally to efficiently hide short unavailability
|
||||
/// periods, but we don't retry forever. The **caller** is responsible for handling failures and
|
||||
/// ensuring that they eventually call again to ensure that the compute is eventually notified of
|
||||
/// the proper pageserver nodes for a tenant.
|
||||
#[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), node_id))]
|
||||
pub(super) async fn notify(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
node_id: NodeId,
|
||||
stripe_size: ShardStripeSize,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<(), NotifyError> {
|
||||
let mut locked = self.state.lock().await;
|
||||
|
||||
use std::collections::hash_map::Entry;
|
||||
let tenant = match locked.entry(tenant_shard_id.tenant_id) {
|
||||
Entry::Vacant(e) => e.insert(ComputeHookTenant::new(
|
||||
tenant_shard_id,
|
||||
stripe_size,
|
||||
node_id,
|
||||
)),
|
||||
Entry::Occupied(e) => {
|
||||
let tenant = e.into_mut();
|
||||
tenant.update(tenant_shard_id, stripe_size, node_id);
|
||||
tenant
|
||||
}
|
||||
};
|
||||
|
||||
let reconfigure_request = tenant.maybe_reconfigure(tenant_shard_id.tenant_id);
|
||||
let Some(reconfigure_request) = reconfigure_request else {
|
||||
// The tenant doesn't yet have pageservers for all its shards: we won't notify anything
|
||||
// until it does.
|
||||
tracing::info!("Tenant isn't yet ready to emit a notification");
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
if let Some(notify_url) = &self.config.compute_hook_url {
|
||||
self.do_notify(notify_url, reconfigure_request, cancel)
|
||||
.await
|
||||
} else {
|
||||
self.do_notify_local(reconfigure_request)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
// This path is for testing only, so munge the error into our prod-style error type.
|
||||
tracing::error!("Local notification hook failed: {e}");
|
||||
NotifyError::Fatal(StatusCode::INTERNAL_SERVER_ERROR)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use pageserver_api::shard::{ShardCount, ShardNumber};
|
||||
use utils::id::TenantId;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn tenant_updates() -> anyhow::Result<()> {
|
||||
let tenant_id = TenantId::generate();
|
||||
let mut tenant_state = ComputeHookTenant::new(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_count: ShardCount::new(0),
|
||||
shard_number: ShardNumber(0),
|
||||
},
|
||||
ShardStripeSize(12345),
|
||||
NodeId(1),
|
||||
);
|
||||
|
||||
// An unsharded tenant is always ready to emit a notification
|
||||
assert!(tenant_state.maybe_reconfigure(tenant_id).is_some());
|
||||
assert_eq!(
|
||||
tenant_state
|
||||
.maybe_reconfigure(tenant_id)
|
||||
.unwrap()
|
||||
.shards
|
||||
.len(),
|
||||
1
|
||||
);
|
||||
assert!(tenant_state
|
||||
.maybe_reconfigure(tenant_id)
|
||||
.unwrap()
|
||||
.stripe_size
|
||||
.is_none());
|
||||
|
||||
// Writing the first shard of a multi-sharded situation (i.e. in a split)
|
||||
// resets the tenant state and puts it in an non-notifying state (need to
|
||||
// see all shards)
|
||||
tenant_state.update(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_count: ShardCount::new(2),
|
||||
shard_number: ShardNumber(1),
|
||||
},
|
||||
ShardStripeSize(32768),
|
||||
NodeId(1),
|
||||
);
|
||||
assert!(tenant_state.maybe_reconfigure(tenant_id).is_none());
|
||||
|
||||
// Writing the second shard makes it ready to notify
|
||||
tenant_state.update(
|
||||
TenantShardId {
|
||||
tenant_id,
|
||||
shard_count: ShardCount::new(2),
|
||||
shard_number: ShardNumber(0),
|
||||
},
|
||||
ShardStripeSize(32768),
|
||||
NodeId(1),
|
||||
);
|
||||
|
||||
assert!(tenant_state.maybe_reconfigure(tenant_id).is_some());
|
||||
assert_eq!(
|
||||
tenant_state
|
||||
.maybe_reconfigure(tenant_id)
|
||||
.unwrap()
|
||||
.shards
|
||||
.len(),
|
||||
2
|
||||
);
|
||||
assert_eq!(
|
||||
tenant_state
|
||||
.maybe_reconfigure(tenant_id)
|
||||
.unwrap()
|
||||
.stripe_size,
|
||||
Some(ShardStripeSize(32768))
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -184,19 +184,6 @@ impl HeartbeaterTask {
|
||||
}
|
||||
}
|
||||
}
|
||||
tracing::info!(
|
||||
"Heartbeat round complete for {} nodes, {} offline",
|
||||
new_state.len(),
|
||||
new_state
|
||||
.values()
|
||||
.filter(|s| match s {
|
||||
PageserverState::Available { .. } => {
|
||||
false
|
||||
}
|
||||
PageserverState::Offline => true,
|
||||
})
|
||||
.count()
|
||||
);
|
||||
|
||||
let mut deltas = Vec::new();
|
||||
let now = Instant::now();
|
||||
@@ -4,12 +4,10 @@ use crate::metrics::{
|
||||
};
|
||||
use crate::reconciler::ReconcileError;
|
||||
use crate::service::{Service, STARTUP_RECONCILE_TIMEOUT};
|
||||
use anyhow::Context;
|
||||
use futures::Future;
|
||||
use hyper::header::CONTENT_TYPE;
|
||||
use hyper::{Body, Request, Response};
|
||||
use hyper::{StatusCode, Uri};
|
||||
use metrics::{BuildInfo, NeonMetrics};
|
||||
use pageserver_api::models::{
|
||||
TenantConfigRequest, TenantCreateRequest, TenantLocationConfigRequest, TenantShardSplitRequest,
|
||||
TenantTimeTravelRequest, TimelineCreateRequest,
|
||||
@@ -46,19 +44,15 @@ use control_plane::storage_controller::{AttachHookRequest, InspectRequest};
|
||||
use routerify::Middleware;
|
||||
|
||||
/// State available to HTTP request handlers
|
||||
#[derive(Clone)]
|
||||
pub struct HttpState {
|
||||
service: Arc<crate::service::Service>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
neon_metrics: NeonMetrics,
|
||||
allowlist_routes: Vec<Uri>,
|
||||
}
|
||||
|
||||
impl HttpState {
|
||||
pub fn new(
|
||||
service: Arc<crate::service::Service>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
build_info: BuildInfo,
|
||||
) -> Self {
|
||||
pub fn new(service: Arc<crate::service::Service>, auth: Option<Arc<SwappableJwtAuth>>) -> Self {
|
||||
let allowlist_routes = ["/status", "/ready", "/metrics"]
|
||||
.iter()
|
||||
.map(|v| v.parse().unwrap())
|
||||
@@ -66,7 +60,6 @@ impl HttpState {
|
||||
Self {
|
||||
service,
|
||||
auth,
|
||||
neon_metrics: NeonMetrics::new(build_info),
|
||||
allowlist_routes,
|
||||
}
|
||||
}
|
||||
@@ -142,6 +135,52 @@ async fn handle_tenant_create(
|
||||
)
|
||||
}
|
||||
|
||||
// For tenant and timeline deletions, which both implement an "initially return 202, then 404 once
|
||||
// we're done" semantic, we wrap with a retry loop to expose a simpler API upstream. This avoids
|
||||
// needing to track a "deleting" state for tenants.
|
||||
async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
|
||||
where
|
||||
R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
|
||||
F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
|
||||
{
|
||||
let started_at = Instant::now();
|
||||
// To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
|
||||
// completed.
|
||||
let mut retry_period = Duration::from_secs(1);
|
||||
// On subsequent retries, wait longer.
|
||||
let max_retry_period = Duration::from_secs(5);
|
||||
// Enable callers with a 30 second request timeout to reliably get a response
|
||||
let max_wait = Duration::from_secs(25);
|
||||
|
||||
loop {
|
||||
let status = f(service.clone()).await?;
|
||||
match status {
|
||||
StatusCode::ACCEPTED => {
|
||||
tracing::info!("Deletion accepted, waiting to try again...");
|
||||
tokio::time::sleep(retry_period).await;
|
||||
retry_period = max_retry_period;
|
||||
}
|
||||
StatusCode::NOT_FOUND => {
|
||||
tracing::info!("Deletion complete");
|
||||
return json_response(StatusCode::OK, ());
|
||||
}
|
||||
_ => {
|
||||
tracing::warn!("Unexpected status {status}");
|
||||
return json_response(status, ());
|
||||
}
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
if now + retry_period > started_at + max_wait {
|
||||
tracing::info!("Deletion timed out waiting for 404");
|
||||
// REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
|
||||
// the pageserver's swagger definition for this endpoint, and has the same desired
|
||||
// effect of causing the control plane to retry later.
|
||||
return json_response(StatusCode::CONFLICT, ());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_tenant_location_config(
|
||||
service: Arc<Service>,
|
||||
mut req: Request<Body>,
|
||||
@@ -213,12 +252,6 @@ async fn handle_tenant_time_travel_remote_storage(
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
fn map_reqwest_hyper_status(status: reqwest::StatusCode) -> Result<hyper::StatusCode, ApiError> {
|
||||
hyper::StatusCode::from_u16(status.as_u16())
|
||||
.context("invalid status code")
|
||||
.map_err(ApiError::InternalServerError)
|
||||
}
|
||||
|
||||
async fn handle_tenant_secondary_download(
|
||||
service: Arc<Service>,
|
||||
req: Request<Body>,
|
||||
@@ -227,7 +260,7 @@ async fn handle_tenant_secondary_download(
|
||||
let wait = parse_query_param(&req, "wait_ms")?.map(Duration::from_millis);
|
||||
|
||||
let (status, progress) = service.tenant_secondary_download(tenant_id, wait).await?;
|
||||
json_response(map_reqwest_hyper_status(status)?, progress)
|
||||
json_response(status, progress)
|
||||
}
|
||||
|
||||
async fn handle_tenant_delete(
|
||||
@@ -237,17 +270,10 @@ async fn handle_tenant_delete(
|
||||
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
|
||||
check_permissions(&req, Scope::PageServerApi)?;
|
||||
|
||||
let status_code = service
|
||||
.tenant_delete(tenant_id)
|
||||
.await
|
||||
.and_then(map_reqwest_hyper_status)?;
|
||||
|
||||
if status_code == StatusCode::NOT_FOUND {
|
||||
// The pageserver uses 404 for successful deletion, but we use 200
|
||||
json_response(StatusCode::OK, ())
|
||||
} else {
|
||||
json_response(status_code, ())
|
||||
}
|
||||
deletion_wrapper(service, move |service| async move {
|
||||
service.tenant_delete(tenant_id).await
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn handle_tenant_timeline_create(
|
||||
@@ -275,56 +301,8 @@ async fn handle_tenant_timeline_delete(
|
||||
|
||||
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
|
||||
|
||||
// For timeline deletions, which both implement an "initially return 202, then 404 once
|
||||
// we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.
|
||||
async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
|
||||
where
|
||||
R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
|
||||
F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
|
||||
{
|
||||
let started_at = Instant::now();
|
||||
// To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
|
||||
// completed.
|
||||
let mut retry_period = Duration::from_secs(1);
|
||||
// On subsequent retries, wait longer.
|
||||
let max_retry_period = Duration::from_secs(5);
|
||||
// Enable callers with a 30 second request timeout to reliably get a response
|
||||
let max_wait = Duration::from_secs(25);
|
||||
|
||||
loop {
|
||||
let status = f(service.clone()).await?;
|
||||
match status {
|
||||
StatusCode::ACCEPTED => {
|
||||
tracing::info!("Deletion accepted, waiting to try again...");
|
||||
tokio::time::sleep(retry_period).await;
|
||||
retry_period = max_retry_period;
|
||||
}
|
||||
StatusCode::NOT_FOUND => {
|
||||
tracing::info!("Deletion complete");
|
||||
return json_response(StatusCode::OK, ());
|
||||
}
|
||||
_ => {
|
||||
tracing::warn!("Unexpected status {status}");
|
||||
return json_response(status, ());
|
||||
}
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
if now + retry_period > started_at + max_wait {
|
||||
tracing::info!("Deletion timed out waiting for 404");
|
||||
// REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
|
||||
// the pageserver's swagger definition for this endpoint, and has the same desired
|
||||
// effect of causing the control plane to retry later.
|
||||
return json_response(StatusCode::CONFLICT, ());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
deletion_wrapper(service, move |service| async move {
|
||||
service
|
||||
.tenant_timeline_delete(tenant_id, timeline_id)
|
||||
.await
|
||||
.and_then(map_reqwest_hyper_status)
|
||||
service.tenant_timeline_delete(tenant_id, timeline_id).await
|
||||
})
|
||||
.await
|
||||
}
|
||||
@@ -387,9 +365,11 @@ async fn handle_tenant_timeline_passthrough(
|
||||
}
|
||||
|
||||
// We have a reqest::Response, would like a http::Response
|
||||
let mut builder = hyper::Response::builder().status(map_reqwest_hyper_status(resp.status())?);
|
||||
let mut builder = hyper::Response::builder()
|
||||
.status(resp.status())
|
||||
.version(resp.version());
|
||||
for (k, v) in resp.headers() {
|
||||
builder = builder.header(k.as_str(), v.as_bytes());
|
||||
builder = builder.header(k, v);
|
||||
}
|
||||
|
||||
let response = builder
|
||||
@@ -419,15 +399,6 @@ async fn handle_tenant_describe(
|
||||
json_response(StatusCode::OK, service.tenant_describe(tenant_id)?)
|
||||
}
|
||||
|
||||
async fn handle_tenant_list(
|
||||
service: Arc<Service>,
|
||||
req: Request<Body>,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
check_permissions(&req, Scope::Admin)?;
|
||||
|
||||
json_response(StatusCode::OK, service.tenant_list())
|
||||
}
|
||||
|
||||
async fn handle_node_register(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
check_permissions(&req, Scope::Admin)?;
|
||||
|
||||
@@ -441,10 +412,7 @@ async fn handle_node_list(req: Request<Body>) -> Result<Response<Body>, ApiError
|
||||
check_permissions(&req, Scope::Admin)?;
|
||||
|
||||
let state = get_state(&req);
|
||||
let nodes = state.service.node_list().await?;
|
||||
let api_nodes = nodes.into_iter().map(|n| n.describe()).collect::<Vec<_>>();
|
||||
|
||||
json_response(StatusCode::OK, api_nodes)
|
||||
json_response(StatusCode::OK, state.service.node_list().await?)
|
||||
}
|
||||
|
||||
async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
@@ -536,18 +504,6 @@ async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiErr
|
||||
json_response(StatusCode::OK, state.service.tenant_drop(tenant_id).await?)
|
||||
}
|
||||
|
||||
async fn handle_tenant_import(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
|
||||
check_permissions(&req, Scope::PageServerApi)?;
|
||||
|
||||
let state = get_state(&req);
|
||||
|
||||
json_response(
|
||||
StatusCode::OK,
|
||||
state.service.tenant_import(tenant_id).await?,
|
||||
)
|
||||
}
|
||||
|
||||
async fn handle_tenants_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
check_permissions(&req, Scope::Admin)?;
|
||||
|
||||
@@ -634,17 +590,9 @@ where
|
||||
.await
|
||||
}
|
||||
|
||||
/// Check if the required scope is held in the request's token, or if the request has
|
||||
/// a token with 'admin' scope then always permit it.
|
||||
fn check_permissions(request: &Request<Body>, required_scope: Scope) -> Result<(), ApiError> {
|
||||
check_permission_with(request, |claims| {
|
||||
match crate::auth::check_permission(claims, required_scope) {
|
||||
Err(e) => match crate::auth::check_permission(claims, Scope::Admin) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(_) => Err(e),
|
||||
},
|
||||
Ok(()) => Ok(()),
|
||||
}
|
||||
crate::auth::check_permission(claims, required_scope)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -704,11 +652,10 @@ fn epilogue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn measured_metrics_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
pub async fn measured_metrics_handler(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
pub const TEXT_FORMAT: &str = "text/plain; version=0.0.4";
|
||||
|
||||
let state = get_state(&req);
|
||||
let payload = crate::metrics::METRICS_REGISTRY.encode(&state.neon_metrics);
|
||||
let payload = crate::metrics::METRICS_REGISTRY.encode();
|
||||
let response = Response::builder()
|
||||
.status(200)
|
||||
.header(CONTENT_TYPE, TEXT_FORMAT)
|
||||
@@ -737,7 +684,6 @@ where
|
||||
pub fn make_router(
|
||||
service: Arc<Service>,
|
||||
auth: Option<Arc<SwappableJwtAuth>>,
|
||||
build_info: BuildInfo,
|
||||
) -> RouterBuilder<hyper::Body, ApiError> {
|
||||
let mut router = endpoint::make_router()
|
||||
.middleware(prologue_metrics_middleware())
|
||||
@@ -754,7 +700,7 @@ pub fn make_router(
|
||||
}
|
||||
|
||||
router
|
||||
.data(Arc::new(HttpState::new(service, auth, build_info)))
|
||||
.data(Arc::new(HttpState::new(service, auth)))
|
||||
.get("/metrics", |r| {
|
||||
named_request_span(r, measured_metrics_handler, RequestName("metrics"))
|
||||
})
|
||||
@@ -785,13 +731,6 @@ pub fn make_router(
|
||||
.post("/debug/v1/node/:node_id/drop", |r| {
|
||||
named_request_span(r, handle_node_drop, RequestName("debug_v1_node_drop"))
|
||||
})
|
||||
.post("/debug/v1/tenant/:tenant_id/import", |r| {
|
||||
named_request_span(
|
||||
r,
|
||||
handle_tenant_import,
|
||||
RequestName("debug_v1_tenant_import"),
|
||||
)
|
||||
})
|
||||
.get("/debug/v1/tenant", |r| {
|
||||
named_request_span(r, handle_tenants_dump, RequestName("debug_v1_tenant"))
|
||||
})
|
||||
@@ -854,9 +793,6 @@ pub fn make_router(
|
||||
RequestName("control_v1_tenant_describe"),
|
||||
)
|
||||
})
|
||||
.get("/control/v1/tenant", |r| {
|
||||
tenant_service_handler(r, handle_tenant_list, RequestName("control_v1_tenant_list"))
|
||||
})
|
||||
.put("/control/v1/tenant/:tenant_id/policy", |r| {
|
||||
named_request_span(
|
||||
r,
|
||||
@@ -915,7 +851,7 @@ pub fn make_router(
|
||||
RequestName("v1_tenant_timeline"),
|
||||
)
|
||||
})
|
||||
// Tenant detail GET passthrough to shard zero:
|
||||
// Tenant detail GET passthrough to shard zero
|
||||
.get("/v1/tenant/:tenant_id", |r| {
|
||||
tenant_service_handler(
|
||||
r,
|
||||
@@ -923,14 +859,13 @@ pub fn make_router(
|
||||
RequestName("v1_tenant_passthrough"),
|
||||
)
|
||||
})
|
||||
// The `*` in the URL is a wildcard: any tenant/timeline GET APIs on the pageserver
|
||||
// are implicitly exposed here. This must be last in the list to avoid
|
||||
// taking precedence over other GET methods we might implement by hand.
|
||||
.get("/v1/tenant/:tenant_id/*", |r| {
|
||||
// Timeline GET passthrough to shard zero. Note that the `*` in the URL is a wildcard: any future
|
||||
// timeline GET APIs will be implicitly included.
|
||||
.get("/v1/tenant/:tenant_id/timeline*", |r| {
|
||||
tenant_service_handler(
|
||||
r,
|
||||
handle_tenant_timeline_passthrough,
|
||||
RequestName("v1_tenant_passthrough"),
|
||||
RequestName("v1_tenant_timeline_passthrough"),
|
||||
)
|
||||
})
|
||||
}
|
||||
54
control_plane/attachment_service/src/id_lock_map.rs
Normal file
54
control_plane/attachment_service/src/id_lock_map.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
/// A map of locks covering some arbitrary identifiers. Useful if you have a collection of objects but don't
|
||||
/// want to embed a lock in each one, or if your locking granularity is different to your object granularity.
|
||||
/// For example, used in the storage controller where the objects are tenant shards, but sometimes locking
|
||||
/// is needed at a tenant-wide granularity.
|
||||
pub(crate) struct IdLockMap<T>
|
||||
where
|
||||
T: Eq + PartialEq + std::hash::Hash,
|
||||
{
|
||||
/// A synchronous lock for getting/setting the async locks that our callers will wait on.
|
||||
entities: std::sync::Mutex<std::collections::HashMap<T, Arc<tokio::sync::RwLock<()>>>>,
|
||||
}
|
||||
|
||||
impl<T> IdLockMap<T>
|
||||
where
|
||||
T: Eq + PartialEq + std::hash::Hash,
|
||||
{
|
||||
pub(crate) fn shared(
|
||||
&self,
|
||||
key: T,
|
||||
) -> impl std::future::Future<Output = tokio::sync::OwnedRwLockReadGuard<()>> {
|
||||
let mut locked = self.entities.lock().unwrap();
|
||||
let entry = locked.entry(key).or_default();
|
||||
entry.clone().read_owned()
|
||||
}
|
||||
|
||||
pub(crate) fn exclusive(
|
||||
&self,
|
||||
key: T,
|
||||
) -> impl std::future::Future<Output = tokio::sync::OwnedRwLockWriteGuard<()>> {
|
||||
let mut locked = self.entities.lock().unwrap();
|
||||
let entry = locked.entry(key).or_default();
|
||||
entry.clone().write_owned()
|
||||
}
|
||||
|
||||
/// Rather than building a lock guard that re-takes the [`Self::entities`] lock, we just do
|
||||
/// periodic housekeeping to avoid the map growing indefinitely
|
||||
pub(crate) fn housekeeping(&self) {
|
||||
let mut locked = self.entities.lock().unwrap();
|
||||
locked.retain(|_k, lock| lock.try_write().is_err())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Default for IdLockMap<T>
|
||||
where
|
||||
T: Eq + PartialEq + std::hash::Hash,
|
||||
{
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
entities: std::sync::Mutex::new(HashMap::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -14,7 +14,7 @@ mod reconciler;
|
||||
mod scheduler;
|
||||
mod schema;
|
||||
pub mod service;
|
||||
mod tenant_shard;
|
||||
mod tenant_state;
|
||||
|
||||
#[derive(Ord, PartialOrd, Eq, PartialEq, Copy, Clone, Serialize)]
|
||||
struct Sequence(u64);
|
||||
@@ -1,22 +1,18 @@
|
||||
use anyhow::{anyhow, Context};
|
||||
use attachment_service::http::make_router;
|
||||
use attachment_service::metrics::preinitialize_metrics;
|
||||
use attachment_service::persistence::Persistence;
|
||||
use attachment_service::service::{Config, Service, MAX_UNAVAILABLE_INTERVAL_DEFAULT};
|
||||
use camino::Utf8PathBuf;
|
||||
use clap::Parser;
|
||||
use diesel::Connection;
|
||||
use metrics::launch_timestamp::LaunchTimestamp;
|
||||
use metrics::BuildInfo;
|
||||
use std::sync::Arc;
|
||||
use storage_controller::http::make_router;
|
||||
use storage_controller::metrics::preinitialize_metrics;
|
||||
use storage_controller::persistence::Persistence;
|
||||
use storage_controller::service::{
|
||||
Config, Service, MAX_UNAVAILABLE_INTERVAL_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT,
|
||||
};
|
||||
use tokio::signal::unix::SignalKind;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::auth::{JwtAuth, SwappableJwtAuth};
|
||||
use utils::logging::{self, LogFormat};
|
||||
|
||||
use utils::sentry_init::init_sentry;
|
||||
use utils::{project_build_tag, project_git_version, tcp_listener};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
@@ -54,7 +50,7 @@ struct Cli {
|
||||
#[arg(short, long)]
|
||||
path: Option<Utf8PathBuf>,
|
||||
|
||||
/// URL to connect to postgres, like postgresql://localhost:1234/storage_controller
|
||||
/// URL to connect to postgres, like postgresql://localhost:1234/attachment_service
|
||||
#[arg(long)]
|
||||
database_url: Option<String>,
|
||||
|
||||
@@ -65,18 +61,6 @@ struct Cli {
|
||||
/// Grace period before marking unresponsive pageserver offline
|
||||
#[arg(long)]
|
||||
max_unavailable_interval: Option<humantime::Duration>,
|
||||
|
||||
/// Size threshold for automatically splitting shards (disabled by default)
|
||||
#[arg(long)]
|
||||
split_threshold: Option<u64>,
|
||||
|
||||
/// Maximum number of reconcilers that may run in parallel
|
||||
#[arg(long)]
|
||||
reconciler_concurrency: Option<usize>,
|
||||
|
||||
/// How long to wait for the initial database connection to be available.
|
||||
#[arg(long, default_value = "5s")]
|
||||
db_connect_timeout: humantime::Duration,
|
||||
}
|
||||
|
||||
enum StrictMode {
|
||||
@@ -174,8 +158,6 @@ fn main() -> anyhow::Result<()> {
|
||||
std::process::exit(1);
|
||||
}));
|
||||
|
||||
let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
|
||||
|
||||
tokio::runtime::Builder::new_current_thread()
|
||||
// We use spawn_blocking for database operations, so require approximately
|
||||
// as many blocking threads as we will open database connections.
|
||||
@@ -207,11 +189,6 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
args.listen
|
||||
);
|
||||
|
||||
let build_info = BuildInfo {
|
||||
revision: GIT_VERSION,
|
||||
build_tag: BUILD_TAG,
|
||||
};
|
||||
|
||||
let strict_mode = if args.dev {
|
||||
StrictMode::Dev
|
||||
} else {
|
||||
@@ -256,15 +233,9 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
.max_unavailable_interval
|
||||
.map(humantime::Duration::into)
|
||||
.unwrap_or(MAX_UNAVAILABLE_INTERVAL_DEFAULT),
|
||||
reconciler_concurrency: args
|
||||
.reconciler_concurrency
|
||||
.unwrap_or(RECONCILER_CONCURRENCY_DEFAULT),
|
||||
split_threshold: args.split_threshold,
|
||||
};
|
||||
|
||||
// After loading secrets & config, but before starting anything else, apply database migrations
|
||||
Persistence::await_connection(&secrets.database_url, args.db_connect_timeout.into()).await?;
|
||||
|
||||
migration_run(&secrets.database_url)
|
||||
.await
|
||||
.context("Running database migrations")?;
|
||||
@@ -279,7 +250,7 @@ async fn async_main() -> anyhow::Result<()> {
|
||||
let auth = secrets
|
||||
.public_key
|
||||
.map(|jwt_auth| Arc::new(SwappableJwtAuth::new(jwt_auth)));
|
||||
let router = make_router(service.clone(), auth, build_info)
|
||||
let router = make_router(service.clone(), auth)
|
||||
.build()
|
||||
.map_err(|err| anyhow!(err))?;
|
||||
let router_service = utils::http::RouterService::new(router).unwrap();
|
||||
@@ -8,8 +8,10 @@
|
||||
//! The rest of the code defines label group types and deals with converting outer types to labels.
|
||||
//!
|
||||
use bytes::Bytes;
|
||||
use measured::{label::LabelValue, metric::histogram, FixedCardinalityLabel, MetricGroup};
|
||||
use metrics::NeonMetrics;
|
||||
use measured::{
|
||||
label::{LabelValue, StaticLabelSet},
|
||||
FixedCardinalityLabel, MetricGroup,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
use std::sync::Mutex;
|
||||
|
||||
@@ -24,15 +26,13 @@ pub fn preinitialize_metrics() {
|
||||
|
||||
pub(crate) struct StorageControllerMetrics {
|
||||
pub(crate) metrics_group: StorageControllerMetricGroup,
|
||||
encoder: Mutex<measured::text::BufferedTextEncoder>,
|
||||
encoder: Mutex<measured::text::TextEncoder>,
|
||||
}
|
||||
|
||||
#[derive(measured::MetricGroup)]
|
||||
#[metric(new())]
|
||||
pub(crate) struct StorageControllerMetricGroup {
|
||||
/// Count of how many times we spawn a reconcile task
|
||||
pub(crate) storage_controller_reconcile_spawn: measured::Counter,
|
||||
|
||||
/// Reconciler tasks completed, broken down by success/failure/cancelled
|
||||
pub(crate) storage_controller_reconcile_complete:
|
||||
measured::CounterVec<ReconcileCompleteLabelGroupSet>,
|
||||
@@ -43,9 +43,7 @@ pub(crate) struct StorageControllerMetricGroup {
|
||||
/// HTTP request status counters for handled requests
|
||||
pub(crate) storage_controller_http_request_status:
|
||||
measured::CounterVec<HttpRequestStatusLabelGroupSet>,
|
||||
|
||||
/// HTTP request handler latency across all status codes
|
||||
#[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
|
||||
pub(crate) storage_controller_http_request_latency:
|
||||
measured::HistogramVec<HttpRequestLatencyLabelGroupSet, 5>,
|
||||
|
||||
@@ -57,7 +55,6 @@ pub(crate) struct StorageControllerMetricGroup {
|
||||
/// Latency of HTTP requests to the pageserver, broken down by pageserver
|
||||
/// node id, request name and method. This include both successful and unsuccessful
|
||||
/// requests.
|
||||
#[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
|
||||
pub(crate) storage_controller_pageserver_request_latency:
|
||||
measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
|
||||
|
||||
@@ -69,7 +66,6 @@ pub(crate) struct StorageControllerMetricGroup {
|
||||
/// Latency of pass-through HTTP requests to the pageserver, broken down by pageserver
|
||||
/// node id, request name and method. This include both successful and unsuccessful
|
||||
/// requests.
|
||||
#[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
|
||||
pub(crate) storage_controller_passthrough_request_latency:
|
||||
measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
|
||||
|
||||
@@ -78,34 +74,76 @@ pub(crate) struct StorageControllerMetricGroup {
|
||||
measured::CounterVec<DatabaseQueryErrorLabelGroupSet>,
|
||||
|
||||
/// Latency of database queries, broken down by operation.
|
||||
#[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
|
||||
pub(crate) storage_controller_database_query_latency:
|
||||
measured::HistogramVec<DatabaseQueryLatencyLabelGroupSet, 5>,
|
||||
}
|
||||
|
||||
impl StorageControllerMetrics {
|
||||
pub(crate) fn encode(&self, neon_metrics: &NeonMetrics) -> Bytes {
|
||||
pub(crate) fn encode(&self) -> Bytes {
|
||||
let mut encoder = self.encoder.lock().unwrap();
|
||||
neon_metrics
|
||||
.collect_group_into(&mut *encoder)
|
||||
.unwrap_or_else(|infallible| match infallible {});
|
||||
self.metrics_group
|
||||
.collect_group_into(&mut *encoder)
|
||||
.unwrap_or_else(|infallible| match infallible {});
|
||||
self.metrics_group.collect_into(&mut *encoder);
|
||||
encoder.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for StorageControllerMetrics {
|
||||
fn default() -> Self {
|
||||
let mut metrics_group = StorageControllerMetricGroup::new();
|
||||
metrics_group
|
||||
.storage_controller_reconcile_complete
|
||||
.init_all_dense();
|
||||
|
||||
Self {
|
||||
metrics_group,
|
||||
encoder: Mutex::new(measured::text::BufferedTextEncoder::new()),
|
||||
metrics_group: StorageControllerMetricGroup::new(),
|
||||
encoder: Mutex::new(measured::text::TextEncoder::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl StorageControllerMetricGroup {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
storage_controller_reconcile_spawn: measured::Counter::new(),
|
||||
storage_controller_reconcile_complete: measured::CounterVec::new(
|
||||
ReconcileCompleteLabelGroupSet {
|
||||
status: StaticLabelSet::new(),
|
||||
},
|
||||
),
|
||||
storage_controller_schedule_optimization: measured::Counter::new(),
|
||||
storage_controller_http_request_status: measured::CounterVec::new(
|
||||
HttpRequestStatusLabelGroupSet {
|
||||
path: lasso::ThreadedRodeo::new(),
|
||||
method: StaticLabelSet::new(),
|
||||
status: StaticLabelSet::new(),
|
||||
},
|
||||
),
|
||||
storage_controller_http_request_latency: measured::HistogramVec::new(
|
||||
measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
|
||||
),
|
||||
storage_controller_pageserver_request_error: measured::CounterVec::new(
|
||||
PageserverRequestLabelGroupSet {
|
||||
pageserver_id: lasso::ThreadedRodeo::new(),
|
||||
path: lasso::ThreadedRodeo::new(),
|
||||
method: StaticLabelSet::new(),
|
||||
},
|
||||
),
|
||||
storage_controller_pageserver_request_latency: measured::HistogramVec::new(
|
||||
measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
|
||||
),
|
||||
storage_controller_passthrough_request_error: measured::CounterVec::new(
|
||||
PageserverRequestLabelGroupSet {
|
||||
pageserver_id: lasso::ThreadedRodeo::new(),
|
||||
path: lasso::ThreadedRodeo::new(),
|
||||
method: StaticLabelSet::new(),
|
||||
},
|
||||
),
|
||||
storage_controller_passthrough_request_latency: measured::HistogramVec::new(
|
||||
measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
|
||||
),
|
||||
storage_controller_database_query_error: measured::CounterVec::new(
|
||||
DatabaseQueryErrorLabelGroupSet {
|
||||
operation: StaticLabelSet::new(),
|
||||
error_type: StaticLabelSet::new(),
|
||||
},
|
||||
),
|
||||
storage_controller_database_query_latency: measured::HistogramVec::new(
|
||||
measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -119,7 +157,7 @@ pub(crate) struct ReconcileCompleteLabelGroup {
|
||||
#[derive(measured::LabelGroup)]
|
||||
#[label(set = HttpRequestStatusLabelGroupSet)]
|
||||
pub(crate) struct HttpRequestStatusLabelGroup<'a> {
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo, default)]
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo)]
|
||||
pub(crate) path: &'a str,
|
||||
pub(crate) method: Method,
|
||||
pub(crate) status: StatusCode,
|
||||
@@ -128,21 +166,40 @@ pub(crate) struct HttpRequestStatusLabelGroup<'a> {
|
||||
#[derive(measured::LabelGroup)]
|
||||
#[label(set = HttpRequestLatencyLabelGroupSet)]
|
||||
pub(crate) struct HttpRequestLatencyLabelGroup<'a> {
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo, default)]
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo)]
|
||||
pub(crate) path: &'a str,
|
||||
pub(crate) method: Method,
|
||||
}
|
||||
|
||||
impl Default for HttpRequestLatencyLabelGroupSet {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
path: lasso::ThreadedRodeo::new(),
|
||||
method: StaticLabelSet::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(measured::LabelGroup, Clone)]
|
||||
#[label(set = PageserverRequestLabelGroupSet)]
|
||||
pub(crate) struct PageserverRequestLabelGroup<'a> {
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo, default)]
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo)]
|
||||
pub(crate) pageserver_id: &'a str,
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo, default)]
|
||||
#[label(dynamic_with = lasso::ThreadedRodeo)]
|
||||
pub(crate) path: &'a str,
|
||||
pub(crate) method: Method,
|
||||
}
|
||||
|
||||
impl Default for PageserverRequestLabelGroupSet {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
pageserver_id: lasso::ThreadedRodeo::new(),
|
||||
path: lasso::ThreadedRodeo::new(),
|
||||
method: StaticLabelSet::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(measured::LabelGroup)]
|
||||
#[label(set = DatabaseQueryErrorLabelGroupSet)]
|
||||
pub(crate) struct DatabaseQueryErrorLabelGroup {
|
||||
@@ -156,7 +213,7 @@ pub(crate) struct DatabaseQueryLatencyLabelGroup {
|
||||
pub(crate) operation: DatabaseOperation,
|
||||
}
|
||||
|
||||
#[derive(FixedCardinalityLabel, Clone, Copy)]
|
||||
#[derive(FixedCardinalityLabel)]
|
||||
pub(crate) enum ReconcileOutcome {
|
||||
#[label(rename = "ok")]
|
||||
Success,
|
||||
@@ -164,7 +221,7 @@ pub(crate) enum ReconcileOutcome {
|
||||
Cancel,
|
||||
}
|
||||
|
||||
#[derive(FixedCardinalityLabel, Copy, Clone)]
|
||||
#[derive(FixedCardinalityLabel, Clone)]
|
||||
pub(crate) enum Method {
|
||||
Get,
|
||||
Put,
|
||||
@@ -189,12 +246,11 @@ impl From<hyper::Method> for Method {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct StatusCode(pub(crate) hyper::http::StatusCode);
|
||||
|
||||
impl LabelValue for StatusCode {
|
||||
fn visit<V: measured::label::LabelVisitor>(&self, v: V) -> V::Output {
|
||||
v.write_int(self.0.as_u16() as i64)
|
||||
v.write_int(self.0.as_u16() as u64)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -212,7 +268,7 @@ impl FixedCardinalityLabel for StatusCode {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(FixedCardinalityLabel, Clone, Copy)]
|
||||
#[derive(FixedCardinalityLabel)]
|
||||
pub(crate) enum DatabaseErrorLabel {
|
||||
Query,
|
||||
Connection,
|
||||
@@ -1,14 +1,13 @@
|
||||
use std::{str::FromStr, time::Duration};
|
||||
|
||||
use hyper::StatusCode;
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy,
|
||||
TenantLocateResponseShard,
|
||||
NodeAvailability, NodeRegisterRequest, NodeSchedulingPolicy, TenantLocateResponseShard,
|
||||
},
|
||||
shard::TenantShardId,
|
||||
};
|
||||
use pageserver_client::mgmt_api;
|
||||
use reqwest::StatusCode;
|
||||
use serde::Serialize;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::{backoff, id::NodeId};
|
||||
@@ -257,19 +256,6 @@ impl Node {
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Generate the simplified API-friendly description of a node's state
|
||||
pub(crate) fn describe(&self) -> NodeDescribeResponse {
|
||||
NodeDescribeResponse {
|
||||
id: self.id,
|
||||
availability: self.availability.into(),
|
||||
scheduling: self.scheduling,
|
||||
listen_http_addr: self.listen_http_addr.clone(),
|
||||
listen_http_port: self.listen_http_port,
|
||||
listen_pg_addr: self.listen_pg_addr.clone(),
|
||||
listen_pg_port: self.listen_pg_port,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Node {
|
||||
@@ -1,14 +1,13 @@
|
||||
use pageserver_api::{
|
||||
models::{
|
||||
LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress,
|
||||
TenantScanRemoteStorageResponse, TenantShardSplitRequest, TenantShardSplitResponse,
|
||||
TimelineCreateRequest, TimelineInfo, TopTenantShardsRequest, TopTenantShardsResponse,
|
||||
TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
|
||||
},
|
||||
shard::TenantShardId,
|
||||
};
|
||||
use pageserver_client::mgmt_api::{Client, Result};
|
||||
use reqwest::StatusCode;
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
use utils::id::{NodeId, TimelineId};
|
||||
|
||||
/// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage
|
||||
/// controller to collect metrics in a non-intrusive manner.
|
||||
@@ -89,18 +88,6 @@ impl PageserverClient {
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn tenant_scan_remote_storage(
|
||||
&self,
|
||||
tenant_id: TenantId,
|
||||
) -> Result<TenantScanRemoteStorageResponse> {
|
||||
measured_request!(
|
||||
"tenant_scan_remote_storage",
|
||||
crate::metrics::Method::Get,
|
||||
&self.node_id_label,
|
||||
self.inner.tenant_scan_remote_storage(tenant_id).await
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn tenant_secondary_download(
|
||||
&self,
|
||||
tenant_id: TenantShardId,
|
||||
@@ -114,27 +101,6 @@ impl PageserverClient {
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn tenant_secondary_status(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
) -> Result<SecondaryProgress> {
|
||||
measured_request!(
|
||||
"tenant_secondary_status",
|
||||
crate::metrics::Method::Get,
|
||||
&self.node_id_label,
|
||||
self.inner.tenant_secondary_status(tenant_shard_id).await
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn tenant_heatmap_upload(&self, tenant_id: TenantShardId) -> Result<()> {
|
||||
measured_request!(
|
||||
"tenant_heatmap_upload",
|
||||
crate::metrics::Method::Post,
|
||||
&self.node_id_label,
|
||||
self.inner.tenant_heatmap_upload(tenant_id).await
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn location_config(
|
||||
&self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
@@ -234,16 +200,4 @@ impl PageserverClient {
|
||||
self.inner.get_utilization().await
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) async fn top_tenant_shards(
|
||||
&self,
|
||||
request: TopTenantShardsRequest,
|
||||
) -> Result<TopTenantShardsResponse> {
|
||||
measured_request!(
|
||||
"top_tenants",
|
||||
crate::metrics::Method::Post,
|
||||
&self.node_id_label,
|
||||
self.inner.top_tenant_shards(request).await
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,6 @@ pub(crate) mod split_state;
|
||||
use std::collections::HashMap;
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use self::split_state::SplitState;
|
||||
use camino::Utf8Path;
|
||||
@@ -80,7 +79,7 @@ pub(crate) enum DatabaseError {
|
||||
Logical(String),
|
||||
}
|
||||
|
||||
#[derive(measured::FixedCardinalityLabel, Copy, Clone)]
|
||||
#[derive(measured::FixedCardinalityLabel, Clone)]
|
||||
pub(crate) enum DatabaseOperation {
|
||||
InsertNode,
|
||||
UpdateNode,
|
||||
@@ -145,41 +144,18 @@ impl Persistence {
|
||||
}
|
||||
}
|
||||
|
||||
/// A helper for use during startup, where we would like to tolerate concurrent restarts of the
|
||||
/// database and the storage controller, therefore the database might not be available right away
|
||||
pub async fn await_connection(
|
||||
database_url: &str,
|
||||
timeout: Duration,
|
||||
) -> Result<(), diesel::ConnectionError> {
|
||||
let started_at = Instant::now();
|
||||
loop {
|
||||
match PgConnection::establish(database_url) {
|
||||
Ok(_) => {
|
||||
tracing::info!("Connected to database.");
|
||||
return Ok(());
|
||||
}
|
||||
Err(e) => {
|
||||
if started_at.elapsed() > timeout {
|
||||
return Err(e);
|
||||
} else {
|
||||
tracing::info!("Database not yet available, waiting... ({e})");
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wraps `with_conn` in order to collect latency and error metrics
|
||||
async fn with_measured_conn<F, R>(&self, op: DatabaseOperation, func: F) -> DatabaseResult<R>
|
||||
where
|
||||
F: Fn(&mut PgConnection) -> DatabaseResult<R> + Send + 'static,
|
||||
F: FnOnce(&mut PgConnection) -> DatabaseResult<R> + Send + 'static,
|
||||
R: Send + 'static,
|
||||
{
|
||||
let latency = &METRICS_REGISTRY
|
||||
.metrics_group
|
||||
.storage_controller_database_query_latency;
|
||||
let _timer = latency.start_timer(DatabaseQueryLatencyLabelGroup { operation: op });
|
||||
let _timer = latency.start_timer(DatabaseQueryLatencyLabelGroup {
|
||||
operation: op.clone(),
|
||||
});
|
||||
|
||||
let res = self.with_conn(func).await;
|
||||
|
||||
@@ -199,48 +175,13 @@ impl Persistence {
|
||||
/// Call the provided function in a tokio blocking thread, with a Diesel database connection.
|
||||
async fn with_conn<F, R>(&self, func: F) -> DatabaseResult<R>
|
||||
where
|
||||
F: Fn(&mut PgConnection) -> DatabaseResult<R> + Send + 'static,
|
||||
F: FnOnce(&mut PgConnection) -> DatabaseResult<R> + Send + 'static,
|
||||
R: Send + 'static,
|
||||
{
|
||||
// A generous allowance for how many times we may retry serializable transactions
|
||||
// before giving up. This is not expected to be hit: it is a defensive measure in case we
|
||||
// somehow engineer a situation where duelling transactions might otherwise live-lock.
|
||||
const MAX_RETRIES: usize = 128;
|
||||
|
||||
let mut conn = self.connection_pool.get()?;
|
||||
tokio::task::spawn_blocking(move || -> DatabaseResult<R> {
|
||||
let mut retry_count = 0;
|
||||
loop {
|
||||
match conn.build_transaction().serializable().run(|c| func(c)) {
|
||||
Ok(r) => break Ok(r),
|
||||
Err(
|
||||
err @ DatabaseError::Query(diesel::result::Error::DatabaseError(
|
||||
diesel::result::DatabaseErrorKind::SerializationFailure,
|
||||
_,
|
||||
)),
|
||||
) => {
|
||||
retry_count += 1;
|
||||
if retry_count > MAX_RETRIES {
|
||||
tracing::error!(
|
||||
"Exceeded max retries on SerializationFailure errors: {err:?}"
|
||||
);
|
||||
break Err(err);
|
||||
} else {
|
||||
// Retry on serialization errors: these are expected, because even though our
|
||||
// transactions don't fight for the same rows, they will occasionally collide
|
||||
// on index pages (e.g. increment_generation for unrelated shards can collide)
|
||||
tracing::debug!(
|
||||
"Retrying transaction on serialization failure {err:?}"
|
||||
);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Err(e) => break Err(e),
|
||||
}
|
||||
}
|
||||
})
|
||||
.await
|
||||
.expect("Task panic")
|
||||
tokio::task::spawn_blocking(move || -> DatabaseResult<R> { func(&mut conn) })
|
||||
.await
|
||||
.expect("Task panic")
|
||||
}
|
||||
|
||||
/// When a node is first registered, persist it before using it for anything
|
||||
@@ -393,11 +334,14 @@ impl Persistence {
|
||||
self.with_measured_conn(
|
||||
DatabaseOperation::InsertTenantShards,
|
||||
move |conn| -> DatabaseResult<()> {
|
||||
for tenant in &shards {
|
||||
diesel::insert_into(tenant_shards)
|
||||
.values(tenant)
|
||||
.execute(conn)?;
|
||||
}
|
||||
conn.transaction(|conn| -> QueryResult<()> {
|
||||
for tenant in &shards {
|
||||
diesel::insert_into(tenant_shards)
|
||||
.values(tenant)
|
||||
.execute(conn)?;
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
Ok(())
|
||||
},
|
||||
)
|
||||
@@ -565,11 +509,8 @@ impl Persistence {
|
||||
let update = ShardUpdate {
|
||||
generation: input_generation.map(|g| g.into().unwrap() as i32),
|
||||
placement_policy: input_placement_policy
|
||||
.as_ref()
|
||||
.map(|p| serde_json::to_string(&p).unwrap()),
|
||||
config: input_config
|
||||
.as_ref()
|
||||
.map(|c| serde_json::to_string(&c).unwrap()),
|
||||
config: input_config.map(|c| serde_json::to_string(&c).unwrap()),
|
||||
scheduling_policy: input_scheduling_policy
|
||||
.map(|p| serde_json::to_string(&p).unwrap()),
|
||||
};
|
||||
@@ -616,51 +557,55 @@ impl Persistence {
|
||||
) -> DatabaseResult<()> {
|
||||
use crate::schema::tenant_shards::dsl::*;
|
||||
self.with_measured_conn(DatabaseOperation::BeginShardSplit, move |conn| -> DatabaseResult<()> {
|
||||
// Mark parent shards as splitting
|
||||
conn.transaction(|conn| -> DatabaseResult<()> {
|
||||
// Mark parent shards as splitting
|
||||
|
||||
let updated = diesel::update(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.filter(shard_count.eq(old_shard_count.literal() as i32))
|
||||
.set((splitting.eq(1),))
|
||||
.execute(conn)?;
|
||||
if u8::try_from(updated)
|
||||
.map_err(|_| DatabaseError::Logical(
|
||||
format!("Overflow existing shard count {} while splitting", updated))
|
||||
)? != old_shard_count.count() {
|
||||
// Perhaps a deletion or another split raced with this attempt to split, mutating
|
||||
// the parent shards that we intend to split. In this case the split request should fail.
|
||||
return Err(DatabaseError::Logical(
|
||||
format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {})", old_shard_count.count())
|
||||
));
|
||||
}
|
||||
|
||||
// FIXME: spurious clone to sidestep closure move rules
|
||||
let parent_to_children = parent_to_children.clone();
|
||||
|
||||
// Insert child shards
|
||||
for (parent_shard_id, children) in parent_to_children {
|
||||
let mut parent = crate::schema::tenant_shards::table
|
||||
.filter(tenant_id.eq(parent_shard_id.tenant_id.to_string()))
|
||||
.filter(shard_number.eq(parent_shard_id.shard_number.0 as i32))
|
||||
.filter(shard_count.eq(parent_shard_id.shard_count.literal() as i32))
|
||||
.load::<TenantShardPersistence>(conn)?;
|
||||
let parent = if parent.len() != 1 {
|
||||
return Err(DatabaseError::Logical(format!(
|
||||
"Parent shard {parent_shard_id} not found"
|
||||
)));
|
||||
} else {
|
||||
parent.pop().unwrap()
|
||||
};
|
||||
for mut shard in children {
|
||||
// Carry the parent's generation into the child
|
||||
shard.generation = parent.generation;
|
||||
|
||||
debug_assert!(shard.splitting == SplitState::Splitting);
|
||||
diesel::insert_into(tenant_shards)
|
||||
.values(shard)
|
||||
.execute(conn)?;
|
||||
let updated = diesel::update(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.filter(shard_count.eq(old_shard_count.literal() as i32))
|
||||
.set((splitting.eq(1),))
|
||||
.execute(conn)?;
|
||||
if u8::try_from(updated)
|
||||
.map_err(|_| DatabaseError::Logical(
|
||||
format!("Overflow existing shard count {} while splitting", updated))
|
||||
)? != old_shard_count.count() {
|
||||
// Perhaps a deletion or another split raced with this attempt to split, mutating
|
||||
// the parent shards that we intend to split. In this case the split request should fail.
|
||||
return Err(DatabaseError::Logical(
|
||||
format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {})", old_shard_count.count())
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: spurious clone to sidestep closure move rules
|
||||
let parent_to_children = parent_to_children.clone();
|
||||
|
||||
// Insert child shards
|
||||
for (parent_shard_id, children) in parent_to_children {
|
||||
let mut parent = crate::schema::tenant_shards::table
|
||||
.filter(tenant_id.eq(parent_shard_id.tenant_id.to_string()))
|
||||
.filter(shard_number.eq(parent_shard_id.shard_number.0 as i32))
|
||||
.filter(shard_count.eq(parent_shard_id.shard_count.literal() as i32))
|
||||
.load::<TenantShardPersistence>(conn)?;
|
||||
let parent = if parent.len() != 1 {
|
||||
return Err(DatabaseError::Logical(format!(
|
||||
"Parent shard {parent_shard_id} not found"
|
||||
)));
|
||||
} else {
|
||||
parent.pop().unwrap()
|
||||
};
|
||||
for mut shard in children {
|
||||
// Carry the parent's generation into the child
|
||||
shard.generation = parent.generation;
|
||||
|
||||
debug_assert!(shard.splitting == SplitState::Splitting);
|
||||
diesel::insert_into(tenant_shards)
|
||||
.values(shard)
|
||||
.execute(conn)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
@@ -678,18 +623,22 @@ impl Persistence {
|
||||
self.with_measured_conn(
|
||||
DatabaseOperation::CompleteShardSplit,
|
||||
move |conn| -> DatabaseResult<()> {
|
||||
// Drop parent shards
|
||||
diesel::delete(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.filter(shard_count.eq(old_shard_count.literal() as i32))
|
||||
.execute(conn)?;
|
||||
conn.transaction(|conn| -> QueryResult<()> {
|
||||
// Drop parent shards
|
||||
diesel::delete(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.filter(shard_count.eq(old_shard_count.literal() as i32))
|
||||
.execute(conn)?;
|
||||
|
||||
// Clear sharding flag
|
||||
let updated = diesel::update(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.set((splitting.eq(0),))
|
||||
.execute(conn)?;
|
||||
debug_assert!(updated > 0);
|
||||
// Clear sharding flag
|
||||
let updated = diesel::update(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.set((splitting.eq(0),))
|
||||
.execute(conn)?;
|
||||
debug_assert!(updated > 0);
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
},
|
||||
@@ -708,41 +657,46 @@ impl Persistence {
|
||||
self.with_measured_conn(
|
||||
DatabaseOperation::AbortShardSplit,
|
||||
move |conn| -> DatabaseResult<AbortShardSplitStatus> {
|
||||
// Clear the splitting state on parent shards
|
||||
let updated = diesel::update(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.filter(shard_count.ne(new_shard_count.literal() as i32))
|
||||
.set((splitting.eq(0),))
|
||||
.execute(conn)?;
|
||||
let aborted =
|
||||
conn.transaction(|conn| -> DatabaseResult<AbortShardSplitStatus> {
|
||||
// Clear the splitting state on parent shards
|
||||
let updated = diesel::update(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.filter(shard_count.ne(new_shard_count.literal() as i32))
|
||||
.set((splitting.eq(0),))
|
||||
.execute(conn)?;
|
||||
|
||||
// Parent shards are already gone: we cannot abort.
|
||||
if updated == 0 {
|
||||
return Ok(AbortShardSplitStatus::Complete);
|
||||
}
|
||||
// Parent shards are already gone: we cannot abort.
|
||||
if updated == 0 {
|
||||
return Ok(AbortShardSplitStatus::Complete);
|
||||
}
|
||||
|
||||
// Sanity check: if parent shards were present, their cardinality should
|
||||
// be less than the number of child shards.
|
||||
if updated >= new_shard_count.count() as usize {
|
||||
return Err(DatabaseError::Logical(format!(
|
||||
"Unexpected parent shard count {updated} while aborting split to \
|
||||
// Sanity check: if parent shards were present, their cardinality should
|
||||
// be less than the number of child shards.
|
||||
if updated >= new_shard_count.count() as usize {
|
||||
return Err(DatabaseError::Logical(format!(
|
||||
"Unexpected parent shard count {updated} while aborting split to \
|
||||
count {new_shard_count:?} on tenant {split_tenant_id}"
|
||||
)));
|
||||
}
|
||||
)));
|
||||
}
|
||||
|
||||
// Erase child shards
|
||||
diesel::delete(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.filter(shard_count.eq(new_shard_count.literal() as i32))
|
||||
.execute(conn)?;
|
||||
// Erase child shards
|
||||
diesel::delete(tenant_shards)
|
||||
.filter(tenant_id.eq(split_tenant_id.to_string()))
|
||||
.filter(shard_count.eq(new_shard_count.literal() as i32))
|
||||
.execute(conn)?;
|
||||
|
||||
Ok(AbortShardSplitStatus::Aborted)
|
||||
Ok(AbortShardSplitStatus::Aborted)
|
||||
})?;
|
||||
|
||||
Ok(aborted)
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
/// Parts of [`crate::tenant_shard::TenantShard`] that are stored durably
|
||||
/// Parts of [`crate::tenant_state::TenantState`] that are stored durably
|
||||
#[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq)]
|
||||
#[diesel(table_name = crate::schema::tenant_shards)]
|
||||
pub(crate) struct TenantShardPersistence {
|
||||
@@ -1,12 +1,12 @@
|
||||
use crate::pageserver_client::PageserverClient;
|
||||
use crate::persistence::Persistence;
|
||||
use crate::service;
|
||||
use hyper::StatusCode;
|
||||
use pageserver_api::models::{
|
||||
LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig,
|
||||
};
|
||||
use pageserver_api::shard::{ShardIdentity, TenantShardId};
|
||||
use pageserver_client::mgmt_api;
|
||||
use reqwest::StatusCode;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
@@ -18,14 +18,14 @@ use utils::sync::gate::GateGuard;
|
||||
|
||||
use crate::compute_hook::{ComputeHook, NotifyError};
|
||||
use crate::node::Node;
|
||||
use crate::tenant_shard::{IntentState, ObservedState, ObservedStateLocation};
|
||||
use crate::tenant_state::{IntentState, ObservedState, ObservedStateLocation};
|
||||
|
||||
const DEFAULT_HEATMAP_PERIOD: &str = "60s";
|
||||
|
||||
/// Object with the lifetime of the background reconcile task that is created
|
||||
/// for tenants which have a difference between their intent and observed states.
|
||||
pub(super) struct Reconciler {
|
||||
/// See [`crate::tenant_shard::TenantShard`] for the meanings of these fields: they are a snapshot
|
||||
/// See [`crate::tenant_state::TenantState`] for the meanings of these fields: they are a snapshot
|
||||
/// of a tenant's state from when we spawned a reconcile task.
|
||||
pub(super) tenant_shard_id: TenantShardId,
|
||||
pub(crate) shard: ShardIdentity,
|
||||
@@ -48,15 +48,11 @@ pub(super) struct Reconciler {
|
||||
|
||||
/// To avoid stalling if the cloud control plane is unavailable, we may proceed
|
||||
/// past failures in [`ComputeHook::notify`], but we _must_ remember that we failed
|
||||
/// so that we can set [`crate::tenant_shard::TenantShard::pending_compute_notification`] to ensure a later retry.
|
||||
/// so that we can set [`crate::tenant_state::TenantState::pending_compute_notification`] to ensure a later retry.
|
||||
pub(crate) compute_notify_failure: bool,
|
||||
|
||||
/// Reconciler is responsible for keeping alive semaphore units that limit concurrency on how many
|
||||
/// we will spawn.
|
||||
pub(crate) _resource_units: ReconcileUnits,
|
||||
|
||||
/// A means to abort background reconciliation: it is essential to
|
||||
/// call this when something changes in the original TenantShard that
|
||||
/// call this when something changes in the original TenantState that
|
||||
/// will make this reconciliation impossible or unnecessary, for
|
||||
/// example when a pageserver node goes offline, or the PlacementPolicy for
|
||||
/// the tenant is changed.
|
||||
@@ -70,20 +66,7 @@ pub(super) struct Reconciler {
|
||||
pub(crate) persistence: Arc<Persistence>,
|
||||
}
|
||||
|
||||
/// RAII resource units granted to a Reconciler, which it should keep alive until it finishes doing I/O
|
||||
pub(crate) struct ReconcileUnits {
|
||||
_sem_units: tokio::sync::OwnedSemaphorePermit,
|
||||
}
|
||||
|
||||
impl ReconcileUnits {
|
||||
pub(crate) fn new(sem_units: tokio::sync::OwnedSemaphorePermit) -> Self {
|
||||
Self {
|
||||
_sem_units: sem_units,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This is a snapshot of [`crate::tenant_shard::IntentState`], but it does not do any
|
||||
/// This is a snapshot of [`crate::tenant_state::IntentState`], but it does not do any
|
||||
/// reference counting for Scheduler. The IntentState is what the scheduler works with,
|
||||
/// and the TargetState is just the instruction for a particular Reconciler run.
|
||||
#[derive(Debug)]
|
||||
@@ -767,10 +750,7 @@ impl Reconciler {
|
||||
// It is up to the caller whether they want to drop out on this error, but they don't have to:
|
||||
// in general we should avoid letting unavailability of the cloud control plane stop us from
|
||||
// making progress.
|
||||
if !matches!(e, NotifyError::ShuttingDown) {
|
||||
tracing::warn!("Failed to notify compute of attached pageserver {node}: {e}");
|
||||
}
|
||||
|
||||
tracing::warn!("Failed to notify compute of attached pageserver {node}: {e}");
|
||||
// Set this flag so that in our ReconcileResult we will set the flag on the shard that it
|
||||
// needs to retry at some point.
|
||||
self.compute_notify_failure = true;
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::{node::Node, tenant_shard::TenantShard};
|
||||
use crate::{node::Node, tenant_state::TenantState};
|
||||
use pageserver_api::controller_api::UtilizationScore;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
@@ -27,10 +27,8 @@ pub enum MaySchedule {
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct SchedulerNode {
|
||||
/// How many shards are currently scheduled on this node, via their [`crate::tenant_shard::IntentState`].
|
||||
/// How many shards are currently scheduled on this node, via their [`crate::tenant_state::IntentState`].
|
||||
shard_count: usize,
|
||||
/// How many shards are currently attached on this node, via their [`crate::tenant_shard::IntentState`].
|
||||
attached_shard_count: usize,
|
||||
|
||||
/// Whether this node is currently elegible to have new shards scheduled (this is derived
|
||||
/// from a node's availability state and scheduling policy).
|
||||
@@ -44,9 +42,7 @@ impl PartialEq for SchedulerNode {
|
||||
(MaySchedule::Yes(_), MaySchedule::Yes(_)) | (MaySchedule::No, MaySchedule::No)
|
||||
);
|
||||
|
||||
may_schedule_matches
|
||||
&& self.shard_count == other.shard_count
|
||||
&& self.attached_shard_count == other.attached_shard_count
|
||||
may_schedule_matches && self.shard_count == other.shard_count
|
||||
}
|
||||
}
|
||||
|
||||
@@ -88,21 +84,7 @@ impl std::ops::Add for AffinityScore {
|
||||
}
|
||||
}
|
||||
|
||||
/// Hint for whether this is a sincere attempt to schedule, or a speculative
|
||||
/// check for where we _would_ schedule (done during optimization)
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum ScheduleMode {
|
||||
Normal,
|
||||
Speculative,
|
||||
}
|
||||
|
||||
impl Default for ScheduleMode {
|
||||
fn default() -> Self {
|
||||
Self::Normal
|
||||
}
|
||||
}
|
||||
|
||||
// For carrying state between multiple calls to [`TenantShard::schedule`], e.g. when calling
|
||||
// For carrying state between multiple calls to [`TenantState::schedule`], e.g. when calling
|
||||
// it for many shards in the same tenant.
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct ScheduleContext {
|
||||
@@ -111,8 +93,6 @@ pub(crate) struct ScheduleContext {
|
||||
|
||||
/// Specifically how many _attached_ locations are on each node
|
||||
pub(crate) attached_nodes: HashMap<NodeId, usize>,
|
||||
|
||||
pub(crate) mode: ScheduleMode,
|
||||
}
|
||||
|
||||
impl ScheduleContext {
|
||||
@@ -142,15 +122,6 @@ impl ScheduleContext {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum RefCountUpdate {
|
||||
PromoteSecondary,
|
||||
Attach,
|
||||
Detach,
|
||||
DemoteAttached,
|
||||
AddSecondary,
|
||||
RemoveSecondary,
|
||||
}
|
||||
|
||||
impl Scheduler {
|
||||
pub(crate) fn new<'a>(nodes: impl Iterator<Item = &'a Node>) -> Self {
|
||||
let mut scheduler_nodes = HashMap::new();
|
||||
@@ -159,7 +130,6 @@ impl Scheduler {
|
||||
node.get_id(),
|
||||
SchedulerNode {
|
||||
shard_count: 0,
|
||||
attached_shard_count: 0,
|
||||
may_schedule: node.may_schedule(),
|
||||
},
|
||||
);
|
||||
@@ -177,7 +147,7 @@ impl Scheduler {
|
||||
pub(crate) fn consistency_check<'a>(
|
||||
&self,
|
||||
nodes: impl Iterator<Item = &'a Node>,
|
||||
shards: impl Iterator<Item = &'a TenantShard>,
|
||||
shards: impl Iterator<Item = &'a TenantState>,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut expect_nodes: HashMap<NodeId, SchedulerNode> = HashMap::new();
|
||||
for node in nodes {
|
||||
@@ -185,7 +155,6 @@ impl Scheduler {
|
||||
node.get_id(),
|
||||
SchedulerNode {
|
||||
shard_count: 0,
|
||||
attached_shard_count: 0,
|
||||
may_schedule: node.may_schedule(),
|
||||
},
|
||||
);
|
||||
@@ -194,10 +163,7 @@ impl Scheduler {
|
||||
for shard in shards {
|
||||
if let Some(node_id) = shard.intent.get_attached() {
|
||||
match expect_nodes.get_mut(node_id) {
|
||||
Some(node) => {
|
||||
node.shard_count += 1;
|
||||
node.attached_shard_count += 1;
|
||||
}
|
||||
Some(node) => node.shard_count += 1,
|
||||
None => anyhow::bail!(
|
||||
"Tenant {} references nonexistent node {}",
|
||||
shard.tenant_shard_id,
|
||||
@@ -245,42 +211,31 @@ impl Scheduler {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update the reference counts of a node. These reference counts are used to guide scheduling
|
||||
/// decisions, not for memory management: they represent the number of tenant shard whose IntentState
|
||||
/// targets this node and the number of tenants shars whose IntentState is attached to this
|
||||
/// node.
|
||||
/// Increment the reference count of a node. This reference count is used to guide scheduling
|
||||
/// decisions, not for memory management: it represents one tenant shard whose IntentState targets
|
||||
/// this node.
|
||||
///
|
||||
/// It is an error to call this for a node that is not known to the scheduler (i.e. passed into
|
||||
/// [`Self::new`] or [`Self::node_upsert`])
|
||||
pub(crate) fn update_node_ref_counts(&mut self, node_id: NodeId, update: RefCountUpdate) {
|
||||
pub(crate) fn node_inc_ref(&mut self, node_id: NodeId) {
|
||||
let Some(node) = self.nodes.get_mut(&node_id) else {
|
||||
tracing::error!("Scheduler missing node {node_id}");
|
||||
debug_assert!(false);
|
||||
return;
|
||||
};
|
||||
|
||||
node.shard_count += 1;
|
||||
}
|
||||
|
||||
/// Decrement a node's reference count. Inverse of [`Self::node_inc_ref`].
|
||||
pub(crate) fn node_dec_ref(&mut self, node_id: NodeId) {
|
||||
let Some(node) = self.nodes.get_mut(&node_id) else {
|
||||
debug_assert!(false);
|
||||
tracing::error!("Scheduler missing node {node_id}");
|
||||
return;
|
||||
};
|
||||
|
||||
match update {
|
||||
RefCountUpdate::PromoteSecondary => {
|
||||
node.attached_shard_count += 1;
|
||||
}
|
||||
RefCountUpdate::Attach => {
|
||||
node.shard_count += 1;
|
||||
node.attached_shard_count += 1;
|
||||
}
|
||||
RefCountUpdate::Detach => {
|
||||
node.shard_count -= 1;
|
||||
node.attached_shard_count -= 1;
|
||||
}
|
||||
RefCountUpdate::DemoteAttached => {
|
||||
node.attached_shard_count -= 1;
|
||||
}
|
||||
RefCountUpdate::AddSecondary => {
|
||||
node.shard_count += 1;
|
||||
}
|
||||
RefCountUpdate::RemoveSecondary => {
|
||||
node.shard_count -= 1;
|
||||
}
|
||||
}
|
||||
node.shard_count -= 1;
|
||||
}
|
||||
|
||||
pub(crate) fn node_upsert(&mut self, node: &Node) {
|
||||
@@ -292,7 +247,6 @@ impl Scheduler {
|
||||
Vacant(entry) => {
|
||||
entry.insert(SchedulerNode {
|
||||
shard_count: 0,
|
||||
attached_shard_count: 0,
|
||||
may_schedule: node.may_schedule(),
|
||||
});
|
||||
}
|
||||
@@ -375,34 +329,27 @@ impl Scheduler {
|
||||
scores.sort_by_key(|i| (i.1, i.2, i.0));
|
||||
|
||||
if scores.is_empty() {
|
||||
// After applying constraints, no pageservers were left.
|
||||
if !matches!(context.mode, ScheduleMode::Speculative) {
|
||||
// If this was not a speculative attempt, log details to understand why we couldn't
|
||||
// schedule: this may help an engineer understand if some nodes are marked offline
|
||||
// in a way that's preventing progress.
|
||||
// After applying constraints, no pageservers were left. We log some detail about
|
||||
// the state of nodes to help understand why this happened. This is not logged as an error because
|
||||
// it is legitimately possible for enough nodes to be Offline to prevent scheduling a shard.
|
||||
tracing::info!("Scheduling failure, while excluding {hard_exclude:?}, node states:");
|
||||
for (node_id, node) in &self.nodes {
|
||||
tracing::info!(
|
||||
"Scheduling failure, while excluding {hard_exclude:?}, node states:"
|
||||
"Node {node_id}: may_schedule={} shards={}",
|
||||
node.may_schedule != MaySchedule::No,
|
||||
node.shard_count
|
||||
);
|
||||
for (node_id, node) in &self.nodes {
|
||||
tracing::info!(
|
||||
"Node {node_id}: may_schedule={} shards={}",
|
||||
node.may_schedule != MaySchedule::No,
|
||||
node.shard_count
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return Err(ScheduleError::ImpossibleConstraint);
|
||||
}
|
||||
|
||||
// Lowest score wins
|
||||
let node_id = scores.first().unwrap().0;
|
||||
|
||||
if !matches!(context.mode, ScheduleMode::Speculative) {
|
||||
tracing::info!(
|
||||
tracing::info!(
|
||||
"scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?})",
|
||||
scores.iter().map(|i| i.0 .0).collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
// Note that we do not update shard count here to reflect the scheduling: that
|
||||
// is IntentState's job when the scheduled location is used.
|
||||
@@ -415,11 +362,6 @@ impl Scheduler {
|
||||
pub(crate) fn get_node_shard_count(&self, node_id: NodeId) -> usize {
|
||||
self.nodes.get(&node_id).unwrap().shard_count
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn get_node_attached_shard_count(&self, node_id: NodeId) -> usize {
|
||||
self.nodes.get(&node_id).unwrap().attached_shard_count
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -456,7 +398,7 @@ pub(crate) mod test_utils {
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use crate::tenant_shard::IntentState;
|
||||
use crate::tenant_state::IntentState;
|
||||
#[test]
|
||||
fn scheduler_basic() -> anyhow::Result<()> {
|
||||
let nodes = test_utils::make_test_nodes(2);
|
||||
@@ -472,28 +414,18 @@ mod tests {
|
||||
let scheduled = scheduler.schedule_shard(&[], &context)?;
|
||||
t2_intent.set_attached(&mut scheduler, Some(scheduled));
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 1);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(1)), 1);
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(2)), 1);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(2)), 1);
|
||||
assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 1);
|
||||
assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 1);
|
||||
|
||||
let scheduled = scheduler.schedule_shard(&t1_intent.all_pageservers(), &context)?;
|
||||
t1_intent.push_secondary(&mut scheduler, scheduled);
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 1);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(1)), 1);
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(2)), 2);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(2)), 1);
|
||||
assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 1);
|
||||
assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 2);
|
||||
|
||||
t1_intent.clear(&mut scheduler);
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 0);
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(2)), 1);
|
||||
|
||||
let total_attached = scheduler.get_node_attached_shard_count(NodeId(1))
|
||||
+ scheduler.get_node_attached_shard_count(NodeId(2));
|
||||
assert_eq!(total_attached, 1);
|
||||
assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 0);
|
||||
assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 1);
|
||||
|
||||
if cfg!(debug_assertions) {
|
||||
// Dropping an IntentState without clearing it causes a panic in debug mode,
|
||||
@@ -504,12 +436,8 @@ mod tests {
|
||||
assert!(result.is_err());
|
||||
} else {
|
||||
t2_intent.clear(&mut scheduler);
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 0);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(1)), 0);
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(2)), 0);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(2)), 0);
|
||||
assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 0);
|
||||
assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 0);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,8 +7,7 @@ use std::{
|
||||
use crate::{
|
||||
metrics::{self, ReconcileCompleteLabelGroup, ReconcileOutcome},
|
||||
persistence::TenantShardPersistence,
|
||||
reconciler::ReconcileUnits,
|
||||
scheduler::{AffinityScore, MaySchedule, RefCountUpdate, ScheduleContext},
|
||||
scheduler::{AffinityScore, MaySchedule, ScheduleContext},
|
||||
};
|
||||
use pageserver_api::controller_api::{PlacementPolicy, ShardSchedulingPolicy};
|
||||
use pageserver_api::{
|
||||
@@ -23,7 +22,7 @@ use utils::{
|
||||
generation::Generation,
|
||||
id::NodeId,
|
||||
seqwait::{SeqWait, SeqWaitError},
|
||||
sync::gate::GateGuard,
|
||||
sync::gate::Gate,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
@@ -38,18 +37,12 @@ use crate::{
|
||||
};
|
||||
|
||||
/// Serialization helper
|
||||
fn read_last_error<S, T>(v: &std::sync::Mutex<Option<T>>, serializer: S) -> Result<S::Ok, S::Error>
|
||||
fn read_mutex_content<S, T>(v: &std::sync::Mutex<T>, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::ser::Serializer,
|
||||
T: std::fmt::Display,
|
||||
T: Clone + std::fmt::Display,
|
||||
{
|
||||
serializer.collect_str(
|
||||
&v.lock()
|
||||
.unwrap()
|
||||
.as_ref()
|
||||
.map(|e| format!("{e}"))
|
||||
.unwrap_or("".to_string()),
|
||||
)
|
||||
serializer.collect_str(&v.lock().unwrap())
|
||||
}
|
||||
|
||||
/// In-memory state for a particular tenant shard.
|
||||
@@ -57,7 +50,7 @@ where
|
||||
/// This struct implement Serialize for debugging purposes, but is _not_ persisted
|
||||
/// itself: see [`crate::persistence`] for the subset of tenant shard state that is persisted.
|
||||
#[derive(Serialize)]
|
||||
pub(crate) struct TenantShard {
|
||||
pub(crate) struct TenantState {
|
||||
pub(crate) tenant_shard_id: TenantShardId,
|
||||
|
||||
pub(crate) shard: ShardIdentity,
|
||||
@@ -102,10 +95,6 @@ pub(crate) struct TenantShard {
|
||||
/// reconciliation, and timeline creation.
|
||||
pub(crate) splitting: SplitState,
|
||||
|
||||
/// If a tenant was enqueued for later reconcile due to hitting concurrency limit, this flag
|
||||
/// is set. This flag is cleared when the tenant is popped off the delay queue.
|
||||
pub(crate) delayed_reconcile: bool,
|
||||
|
||||
/// Optionally wait for reconciliation to complete up to a particular
|
||||
/// sequence number.
|
||||
#[serde(skip)]
|
||||
@@ -117,19 +106,15 @@ pub(crate) struct TenantShard {
|
||||
#[serde(skip)]
|
||||
pub(crate) error_waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,
|
||||
|
||||
/// The most recent error from a reconcile on this tenant. This is a nested Arc
|
||||
/// because:
|
||||
/// - ReconcileWaiters need to Arc-clone the overall object to read it later
|
||||
/// - ReconcileWaitError needs to use an `Arc<ReconcileError>` because we can construct
|
||||
/// many waiters for one shard, and the underlying error types are not Clone.
|
||||
/// The most recent error from a reconcile on this tenant
|
||||
/// TODO: generalize to an array of recent events
|
||||
/// TOOD: use a ArcSwap instead of mutex for faster reads?
|
||||
#[serde(serialize_with = "read_last_error")]
|
||||
pub(crate) last_error: std::sync::Arc<std::sync::Mutex<Option<Arc<ReconcileError>>>>,
|
||||
#[serde(serialize_with = "read_mutex_content")]
|
||||
pub(crate) last_error: std::sync::Arc<std::sync::Mutex<String>>,
|
||||
|
||||
/// If we have a pending compute notification that for some reason we weren't able to send,
|
||||
/// set this to true. If this is set, calls to [`Self::get_reconcile_needed`] will return Yes
|
||||
/// and trigger a Reconciler run. This is the mechanism by which compute notifications are included in the scope
|
||||
/// set this to true. If this is set, calls to [`Self::maybe_reconcile`] will run a task to retry
|
||||
/// sending it. This is the mechanism by which compute notifications are included in the scope
|
||||
/// of state that we publish externally in an eventually consistent way.
|
||||
pub(crate) pending_compute_notification: bool,
|
||||
|
||||
@@ -153,7 +138,7 @@ impl IntentState {
|
||||
}
|
||||
pub(crate) fn single(scheduler: &mut Scheduler, node_id: Option<NodeId>) -> Self {
|
||||
if let Some(node_id) = node_id {
|
||||
scheduler.update_node_ref_counts(node_id, RefCountUpdate::Attach);
|
||||
scheduler.node_inc_ref(node_id);
|
||||
}
|
||||
Self {
|
||||
attached: node_id,
|
||||
@@ -164,10 +149,10 @@ impl IntentState {
|
||||
pub(crate) fn set_attached(&mut self, scheduler: &mut Scheduler, new_attached: Option<NodeId>) {
|
||||
if self.attached != new_attached {
|
||||
if let Some(old_attached) = self.attached.take() {
|
||||
scheduler.update_node_ref_counts(old_attached, RefCountUpdate::Detach);
|
||||
scheduler.node_dec_ref(old_attached);
|
||||
}
|
||||
if let Some(new_attached) = &new_attached {
|
||||
scheduler.update_node_ref_counts(*new_attached, RefCountUpdate::Attach);
|
||||
scheduler.node_inc_ref(*new_attached);
|
||||
}
|
||||
self.attached = new_attached;
|
||||
}
|
||||
@@ -177,27 +162,22 @@ impl IntentState {
|
||||
/// secondary to attached while maintaining the scheduler's reference counts.
|
||||
pub(crate) fn promote_attached(
|
||||
&mut self,
|
||||
scheduler: &mut Scheduler,
|
||||
_scheduler: &mut Scheduler,
|
||||
promote_secondary: NodeId,
|
||||
) {
|
||||
// If we call this with a node that isn't in secondary, it would cause incorrect
|
||||
// scheduler reference counting, since we assume the node is already referenced as a secondary.
|
||||
debug_assert!(self.secondary.contains(&promote_secondary));
|
||||
|
||||
// TODO: when scheduler starts tracking attached + secondary counts separately, we will
|
||||
// need to call into it here.
|
||||
self.secondary.retain(|n| n != &promote_secondary);
|
||||
|
||||
let demoted = self.attached;
|
||||
self.attached = Some(promote_secondary);
|
||||
|
||||
scheduler.update_node_ref_counts(promote_secondary, RefCountUpdate::PromoteSecondary);
|
||||
if let Some(demoted) = demoted {
|
||||
scheduler.update_node_ref_counts(demoted, RefCountUpdate::DemoteAttached);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn push_secondary(&mut self, scheduler: &mut Scheduler, new_secondary: NodeId) {
|
||||
debug_assert!(!self.secondary.contains(&new_secondary));
|
||||
scheduler.update_node_ref_counts(new_secondary, RefCountUpdate::AddSecondary);
|
||||
scheduler.node_inc_ref(new_secondary);
|
||||
self.secondary.push(new_secondary);
|
||||
}
|
||||
|
||||
@@ -205,27 +185,27 @@ impl IntentState {
|
||||
pub(crate) fn remove_secondary(&mut self, scheduler: &mut Scheduler, node_id: NodeId) {
|
||||
let index = self.secondary.iter().position(|n| *n == node_id);
|
||||
if let Some(index) = index {
|
||||
scheduler.update_node_ref_counts(node_id, RefCountUpdate::RemoveSecondary);
|
||||
scheduler.node_dec_ref(node_id);
|
||||
self.secondary.remove(index);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn clear_secondary(&mut self, scheduler: &mut Scheduler) {
|
||||
for secondary in self.secondary.drain(..) {
|
||||
scheduler.update_node_ref_counts(secondary, RefCountUpdate::RemoveSecondary);
|
||||
scheduler.node_dec_ref(secondary);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove the last secondary node from the list of secondaries
|
||||
pub(crate) fn pop_secondary(&mut self, scheduler: &mut Scheduler) {
|
||||
if let Some(node_id) = self.secondary.pop() {
|
||||
scheduler.update_node_ref_counts(node_id, RefCountUpdate::RemoveSecondary);
|
||||
scheduler.node_dec_ref(node_id);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn clear(&mut self, scheduler: &mut Scheduler) {
|
||||
if let Some(old_attached) = self.attached.take() {
|
||||
scheduler.update_node_ref_counts(old_attached, RefCountUpdate::Detach);
|
||||
scheduler.node_dec_ref(old_attached);
|
||||
}
|
||||
|
||||
self.clear_secondary(scheduler);
|
||||
@@ -256,11 +236,12 @@ impl IntentState {
|
||||
/// forget the location on the offline node.
|
||||
///
|
||||
/// Returns true if a change was made
|
||||
pub(crate) fn demote_attached(&mut self, scheduler: &mut Scheduler, node_id: NodeId) -> bool {
|
||||
pub(crate) fn demote_attached(&mut self, node_id: NodeId) -> bool {
|
||||
if self.attached == Some(node_id) {
|
||||
// TODO: when scheduler starts tracking attached + secondary counts separately, we will
|
||||
// need to call into it here.
|
||||
self.attached = None;
|
||||
self.secondary.push(node_id);
|
||||
scheduler.update_node_ref_counts(node_id, RefCountUpdate::DemoteAttached);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
@@ -307,18 +288,18 @@ pub(crate) struct ReconcilerWaiter {
|
||||
|
||||
seq_wait: std::sync::Arc<SeqWait<Sequence, Sequence>>,
|
||||
error_seq_wait: std::sync::Arc<SeqWait<Sequence, Sequence>>,
|
||||
error: std::sync::Arc<std::sync::Mutex<Option<Arc<ReconcileError>>>>,
|
||||
error: std::sync::Arc<std::sync::Mutex<String>>,
|
||||
seq: Sequence,
|
||||
}
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
pub(crate) enum ReconcileWaitError {
|
||||
pub enum ReconcileWaitError {
|
||||
#[error("Timeout waiting for shard {0}")]
|
||||
Timeout(TenantShardId),
|
||||
#[error("shutting down")]
|
||||
Shutdown,
|
||||
#[error("Reconcile error on shard {0}: {1}")]
|
||||
Failed(TenantShardId, Arc<ReconcileError>),
|
||||
Failed(TenantShardId, String),
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
@@ -329,28 +310,18 @@ pub(crate) struct ReplaceSecondary {
|
||||
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
pub(crate) struct MigrateAttachment {
|
||||
pub(crate) old_attached_node_id: NodeId,
|
||||
pub(crate) new_attached_node_id: NodeId,
|
||||
old_attached_node_id: NodeId,
|
||||
new_attached_node_id: NodeId,
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
pub(crate) enum ScheduleOptimizationAction {
|
||||
pub(crate) enum ScheduleOptimization {
|
||||
// Replace one of our secondary locations with a different node
|
||||
ReplaceSecondary(ReplaceSecondary),
|
||||
// Migrate attachment to an existing secondary location
|
||||
MigrateAttachment(MigrateAttachment),
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq, Debug)]
|
||||
pub(crate) struct ScheduleOptimization {
|
||||
// What was the reconcile sequence when we generated this optimization? The optimization
|
||||
// should only be applied if the shard's sequence is still at this value, in case other changes
|
||||
// happened between planning the optimization and applying it.
|
||||
sequence: Sequence,
|
||||
|
||||
pub(crate) action: ScheduleOptimizationAction,
|
||||
}
|
||||
|
||||
impl ReconcilerWaiter {
|
||||
pub(crate) async fn wait_timeout(&self, timeout: Duration) -> Result<(), ReconcileWaitError> {
|
||||
tokio::select! {
|
||||
@@ -366,8 +337,7 @@ impl ReconcilerWaiter {
|
||||
SeqWaitError::Timeout => unreachable!()
|
||||
})?;
|
||||
|
||||
return Err(ReconcileWaitError::Failed(self.tenant_shard_id,
|
||||
self.error.lock().unwrap().clone().expect("If error_seq_wait was advanced error was set").clone()))
|
||||
return Err(ReconcileWaitError::Failed(self.tenant_shard_id, self.error.lock().unwrap().clone()))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -383,19 +353,8 @@ pub(crate) struct ReconcilerHandle {
|
||||
cancel: CancellationToken,
|
||||
}
|
||||
|
||||
pub(crate) enum ReconcileNeeded {
|
||||
/// shard either doesn't need reconciliation, or is forbidden from spawning a reconciler
|
||||
/// in its current state (e.g. shard split in progress, or ShardSchedulingPolicy forbids it)
|
||||
No,
|
||||
/// shard has a reconciler running, and its intent hasn't changed since that one was
|
||||
/// spawned: wait for the existing reconciler rather than spawning a new one.
|
||||
WaitExisting(ReconcilerWaiter),
|
||||
/// shard needs reconciliation: call into [`TenantShard::spawn_reconciler`]
|
||||
Yes,
|
||||
}
|
||||
|
||||
/// When a reconcile task completes, it sends this result object
|
||||
/// to be applied to the primary TenantShard.
|
||||
/// to be applied to the primary TenantState.
|
||||
pub(crate) struct ReconcileResult {
|
||||
pub(crate) sequence: Sequence,
|
||||
/// On errors, `observed` should be treated as an incompleted description
|
||||
@@ -408,7 +367,7 @@ pub(crate) struct ReconcileResult {
|
||||
pub(crate) generation: Option<Generation>,
|
||||
pub(crate) observed: ObservedState,
|
||||
|
||||
/// Set [`TenantShard::pending_compute_notification`] from this flag
|
||||
/// Set [`TenantState::pending_compute_notification`] from this flag
|
||||
pub(crate) pending_compute_notification: bool,
|
||||
}
|
||||
|
||||
@@ -420,7 +379,7 @@ impl ObservedState {
|
||||
}
|
||||
}
|
||||
|
||||
impl TenantShard {
|
||||
impl TenantState {
|
||||
pub(crate) fn new(
|
||||
tenant_shard_id: TenantShardId,
|
||||
shard: ShardIdentity,
|
||||
@@ -437,7 +396,6 @@ impl TenantShard {
|
||||
reconciler: None,
|
||||
splitting: SplitState::Idle,
|
||||
sequence: Sequence(1),
|
||||
delayed_reconcile: false,
|
||||
waiter: Arc::new(SeqWait::new(Sequence(0))),
|
||||
error_waiter: Arc::new(SeqWait::new(Sequence(0))),
|
||||
last_error: Arc::default(),
|
||||
@@ -597,7 +555,7 @@ impl TenantShard {
|
||||
Secondary => {
|
||||
if let Some(node_id) = self.intent.get_attached() {
|
||||
// Populate secondary by demoting the attached node
|
||||
self.intent.demote_attached(scheduler, *node_id);
|
||||
self.intent.demote_attached(*node_id);
|
||||
modified = true;
|
||||
} else if self.intent.secondary.is_empty() {
|
||||
// Populate secondary by scheduling a fresh node
|
||||
@@ -689,13 +647,10 @@ impl TenantShard {
|
||||
"Identified optimization: migrate attachment {attached}->{preferred_node} (secondaries {:?})",
|
||||
self.intent.get_secondary()
|
||||
);
|
||||
return Some(ScheduleOptimization {
|
||||
sequence: self.sequence,
|
||||
action: ScheduleOptimizationAction::MigrateAttachment(MigrateAttachment {
|
||||
old_attached_node_id: attached,
|
||||
new_attached_node_id: *preferred_node,
|
||||
}),
|
||||
});
|
||||
return Some(ScheduleOptimization::MigrateAttachment(MigrateAttachment {
|
||||
old_attached_node_id: attached,
|
||||
new_attached_node_id: *preferred_node,
|
||||
}));
|
||||
}
|
||||
} else {
|
||||
tracing::debug!(
|
||||
@@ -753,45 +708,36 @@ impl TenantShard {
|
||||
"Identified optimization: replace secondary {secondary}->{candidate_node} (current secondaries {:?})",
|
||||
self.intent.get_secondary()
|
||||
);
|
||||
return Some(ScheduleOptimization {
|
||||
sequence: self.sequence,
|
||||
action: ScheduleOptimizationAction::ReplaceSecondary(ReplaceSecondary {
|
||||
old_node_id: *secondary,
|
||||
new_node_id: candidate_node,
|
||||
}),
|
||||
});
|
||||
return Some(ScheduleOptimization::ReplaceSecondary(ReplaceSecondary {
|
||||
old_node_id: *secondary,
|
||||
new_node_id: candidate_node,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Return true if the optimization was really applied: it will not be applied if the optimization's
|
||||
/// sequence is behind this tenant shard's
|
||||
pub(crate) fn apply_optimization(
|
||||
&mut self,
|
||||
scheduler: &mut Scheduler,
|
||||
optimization: ScheduleOptimization,
|
||||
) -> bool {
|
||||
if optimization.sequence != self.sequence {
|
||||
return false;
|
||||
}
|
||||
|
||||
) {
|
||||
metrics::METRICS_REGISTRY
|
||||
.metrics_group
|
||||
.storage_controller_schedule_optimization
|
||||
.inc();
|
||||
|
||||
match optimization.action {
|
||||
ScheduleOptimizationAction::MigrateAttachment(MigrateAttachment {
|
||||
match optimization {
|
||||
ScheduleOptimization::MigrateAttachment(MigrateAttachment {
|
||||
old_attached_node_id,
|
||||
new_attached_node_id,
|
||||
}) => {
|
||||
self.intent.demote_attached(scheduler, old_attached_node_id);
|
||||
self.intent.demote_attached(old_attached_node_id);
|
||||
self.intent
|
||||
.promote_attached(scheduler, new_attached_node_id);
|
||||
}
|
||||
ScheduleOptimizationAction::ReplaceSecondary(ReplaceSecondary {
|
||||
ScheduleOptimization::ReplaceSecondary(ReplaceSecondary {
|
||||
old_node_id,
|
||||
new_node_id,
|
||||
}) => {
|
||||
@@ -799,8 +745,6 @@ impl TenantShard {
|
||||
self.intent.push_secondary(scheduler, new_node_id);
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Query whether the tenant's observed state for attached node matches its intent state, and if so,
|
||||
@@ -887,10 +831,16 @@ impl TenantShard {
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
|
||||
pub(crate) fn get_reconcile_needed(
|
||||
pub(crate) fn maybe_reconcile(
|
||||
&mut self,
|
||||
result_tx: &tokio::sync::mpsc::UnboundedSender<ReconcileResult>,
|
||||
pageservers: &Arc<HashMap<NodeId, Node>>,
|
||||
) -> ReconcileNeeded {
|
||||
compute_hook: &Arc<ComputeHook>,
|
||||
service_config: &service::Config,
|
||||
persistence: &Arc<Persistence>,
|
||||
gate: &Gate,
|
||||
cancel: &CancellationToken,
|
||||
) -> Option<ReconcilerWaiter> {
|
||||
// If there are any ambiguous observed states, and the nodes they refer to are available,
|
||||
// we should reconcile to clean them up.
|
||||
let mut dirty_observed = false;
|
||||
@@ -912,8 +862,8 @@ impl TenantShard {
|
||||
active_nodes_dirty || dirty_observed || self.pending_compute_notification;
|
||||
|
||||
if !do_reconcile {
|
||||
tracing::debug!("Not dirty, no reconciliation needed.");
|
||||
return ReconcileNeeded::No;
|
||||
tracing::info!("Not dirty, no reconciliation needed.");
|
||||
return None;
|
||||
}
|
||||
|
||||
// If we are currently splitting, then never start a reconciler task: the splitting logic
|
||||
@@ -921,7 +871,7 @@ impl TenantShard {
|
||||
// up top, so that we only log this message if we would otherwise have done a reconciliation.
|
||||
if !matches!(self.splitting, SplitState::Idle) {
|
||||
tracing::info!("Refusing to reconcile, splitting in progress");
|
||||
return ReconcileNeeded::No;
|
||||
return None;
|
||||
}
|
||||
|
||||
// Reconcile already in flight for the current sequence?
|
||||
@@ -931,7 +881,7 @@ impl TenantShard {
|
||||
"Reconciliation already in progress for sequence {:?}",
|
||||
self.sequence,
|
||||
);
|
||||
return ReconcileNeeded::WaitExisting(ReconcilerWaiter {
|
||||
return Some(ReconcilerWaiter {
|
||||
tenant_shard_id: self.tenant_shard_id,
|
||||
seq_wait: self.waiter.clone(),
|
||||
error_seq_wait: self.error_waiter.clone(),
|
||||
@@ -950,67 +900,10 @@ impl TenantShard {
|
||||
// We only reach this point if there is work to do and we're going to skip
|
||||
// doing it: warn it obvious why this tenant isn't doing what it ought to.
|
||||
tracing::warn!("Skipping reconcile for policy {:?}", self.scheduling_policy);
|
||||
return ReconcileNeeded::No;
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
ReconcileNeeded::Yes
|
||||
}
|
||||
|
||||
/// Ensure the sequence number is set to a value where waiting for this value will make us wait
|
||||
/// for the next reconcile: i.e. it is ahead of all completed or running reconcilers.
|
||||
///
|
||||
/// Constructing a ReconcilerWaiter with the resulting sequence number gives the property
|
||||
/// that the waiter will not complete until some future Reconciler is constructed and run.
|
||||
fn ensure_sequence_ahead(&mut self) {
|
||||
// Find the highest sequence for which a Reconciler has previously run or is currently
|
||||
// running
|
||||
let max_seen = std::cmp::max(
|
||||
self.reconciler
|
||||
.as_ref()
|
||||
.map(|r| r.sequence)
|
||||
.unwrap_or(Sequence(0)),
|
||||
std::cmp::max(self.waiter.load(), self.error_waiter.load()),
|
||||
);
|
||||
|
||||
if self.sequence <= max_seen {
|
||||
self.sequence = max_seen.next();
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a waiter that will wait for some future Reconciler that hasn't been spawned yet.
|
||||
///
|
||||
/// This is appropriate when you can't spawn a reconciler (e.g. due to resource limits), but
|
||||
/// you would like to wait on the next reconciler that gets spawned in the background.
|
||||
pub(crate) fn future_reconcile_waiter(&mut self) -> ReconcilerWaiter {
|
||||
self.ensure_sequence_ahead();
|
||||
|
||||
ReconcilerWaiter {
|
||||
tenant_shard_id: self.tenant_shard_id,
|
||||
seq_wait: self.waiter.clone(),
|
||||
error_seq_wait: self.error_waiter.clone(),
|
||||
error: self.last_error.clone(),
|
||||
seq: self.sequence,
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
|
||||
pub(crate) fn spawn_reconciler(
|
||||
&mut self,
|
||||
result_tx: &tokio::sync::mpsc::UnboundedSender<ReconcileResult>,
|
||||
pageservers: &Arc<HashMap<NodeId, Node>>,
|
||||
compute_hook: &Arc<ComputeHook>,
|
||||
service_config: &service::Config,
|
||||
persistence: &Arc<Persistence>,
|
||||
units: ReconcileUnits,
|
||||
gate_guard: GateGuard,
|
||||
cancel: &CancellationToken,
|
||||
) -> Option<ReconcilerWaiter> {
|
||||
// Reconcile in flight for a stale sequence? Our sequence's task will wait for it before
|
||||
// doing our sequence's work.
|
||||
let old_handle = self.reconciler.take();
|
||||
|
||||
// Build list of nodes from which the reconciler should detach
|
||||
let mut detach = Vec::new();
|
||||
for node_id in self.observed.locations.keys() {
|
||||
@@ -1026,9 +919,18 @@ impl TenantShard {
|
||||
}
|
||||
}
|
||||
|
||||
// Reconcile in flight for a stale sequence? Our sequence's task will wait for it before
|
||||
// doing our sequence's work.
|
||||
let old_handle = self.reconciler.take();
|
||||
|
||||
let Ok(gate_guard) = gate.enter() else {
|
||||
// Shutting down, don't start a reconciler
|
||||
return None;
|
||||
};
|
||||
|
||||
// Advance the sequence before spawning a reconciler, so that sequence waiters
|
||||
// can distinguish between before+after the reconcile completes.
|
||||
self.ensure_sequence_ahead();
|
||||
self.sequence = self.sequence.next();
|
||||
|
||||
let reconciler_cancel = cancel.child_token();
|
||||
let reconciler_intent = TargetState::from_intent(pageservers, &self.intent);
|
||||
@@ -1043,7 +945,6 @@ impl TenantShard {
|
||||
compute_hook: compute_hook.clone(),
|
||||
service_config: service_config.clone(),
|
||||
_gate_guard: gate_guard,
|
||||
_resource_units: units,
|
||||
cancel: reconciler_cancel.clone(),
|
||||
persistence: persistence.clone(),
|
||||
compute_notify_failure: false,
|
||||
@@ -1110,18 +1011,16 @@ impl TenantShard {
|
||||
status: outcome_label,
|
||||
});
|
||||
|
||||
// Constructing result implicitly drops Reconciler, freeing any ReconcileUnits before the Service might
|
||||
// try and schedule more work in response to our result.
|
||||
let result = ReconcileResult {
|
||||
sequence: reconcile_seq,
|
||||
result,
|
||||
tenant_shard_id: reconciler.tenant_shard_id,
|
||||
generation: reconciler.generation,
|
||||
observed: reconciler.observed,
|
||||
pending_compute_notification: reconciler.compute_notify_failure,
|
||||
};
|
||||
|
||||
result_tx.send(result).ok();
|
||||
result_tx
|
||||
.send(ReconcileResult {
|
||||
sequence: reconcile_seq,
|
||||
result,
|
||||
tenant_shard_id: reconciler.tenant_shard_id,
|
||||
generation: reconciler.generation,
|
||||
observed: reconciler.observed,
|
||||
pending_compute_notification: reconciler.compute_notify_failure,
|
||||
})
|
||||
.ok();
|
||||
}
|
||||
.instrument(reconciler_span),
|
||||
);
|
||||
@@ -1190,13 +1089,6 @@ impl TenantShard {
|
||||
&self.scheduling_policy
|
||||
}
|
||||
|
||||
pub(crate) fn set_last_error(&mut self, sequence: Sequence, error: ReconcileError) {
|
||||
// Ordering: always set last_error before advancing sequence, so that sequence
|
||||
// waiters are guaranteed to see a Some value when they see an error.
|
||||
*(self.last_error.lock().unwrap()) = Some(Arc::new(error));
|
||||
self.error_waiter.advance(sequence);
|
||||
}
|
||||
|
||||
pub(crate) fn from_persistent(
|
||||
tsp: TenantShardPersistence,
|
||||
intent: IntentState,
|
||||
@@ -1219,7 +1111,6 @@ impl TenantShard {
|
||||
error_waiter: Arc::new(SeqWait::new(Sequence::initial())),
|
||||
last_error: Arc::default(),
|
||||
pending_compute_notification: false,
|
||||
delayed_reconcile: false,
|
||||
scheduling_policy: serde_json::from_str(&tsp.scheduling_policy).unwrap(),
|
||||
})
|
||||
}
|
||||
@@ -1252,7 +1143,7 @@ pub(crate) mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
fn make_test_tenant_shard(policy: PlacementPolicy) -> TenantShard {
|
||||
fn make_test_tenant_shard(policy: PlacementPolicy) -> TenantState {
|
||||
let tenant_id = TenantId::generate();
|
||||
let shard_number = ShardNumber(0);
|
||||
let shard_count = ShardCount::new(1);
|
||||
@@ -1262,7 +1153,7 @@ pub(crate) mod tests {
|
||||
shard_number,
|
||||
shard_count,
|
||||
};
|
||||
TenantShard::new(
|
||||
TenantState::new(
|
||||
tenant_shard_id,
|
||||
ShardIdentity::new(
|
||||
shard_number,
|
||||
@@ -1274,7 +1165,7 @@ pub(crate) mod tests {
|
||||
)
|
||||
}
|
||||
|
||||
fn make_test_tenant(policy: PlacementPolicy, shard_count: ShardCount) -> Vec<TenantShard> {
|
||||
fn make_test_tenant(policy: PlacementPolicy, shard_count: ShardCount) -> Vec<TenantState> {
|
||||
let tenant_id = TenantId::generate();
|
||||
|
||||
(0..shard_count.count())
|
||||
@@ -1286,7 +1177,7 @@ pub(crate) mod tests {
|
||||
shard_number,
|
||||
shard_count,
|
||||
};
|
||||
TenantShard::new(
|
||||
TenantState::new(
|
||||
tenant_shard_id,
|
||||
ShardIdentity::new(
|
||||
shard_number,
|
||||
@@ -1311,26 +1202,24 @@ pub(crate) mod tests {
|
||||
let mut scheduler = Scheduler::new(nodes.values());
|
||||
let mut context = ScheduleContext::default();
|
||||
|
||||
let mut tenant_shard = make_test_tenant_shard(PlacementPolicy::Attached(1));
|
||||
tenant_shard
|
||||
let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
|
||||
tenant_state
|
||||
.schedule(&mut scheduler, &mut context)
|
||||
.expect("we have enough nodes, scheduling should work");
|
||||
|
||||
// Expect to initially be schedule on to different nodes
|
||||
assert_eq!(tenant_shard.intent.secondary.len(), 1);
|
||||
assert!(tenant_shard.intent.attached.is_some());
|
||||
assert_eq!(tenant_state.intent.secondary.len(), 1);
|
||||
assert!(tenant_state.intent.attached.is_some());
|
||||
|
||||
let attached_node_id = tenant_shard.intent.attached.unwrap();
|
||||
let secondary_node_id = *tenant_shard.intent.secondary.iter().last().unwrap();
|
||||
let attached_node_id = tenant_state.intent.attached.unwrap();
|
||||
let secondary_node_id = *tenant_state.intent.secondary.iter().last().unwrap();
|
||||
assert_ne!(attached_node_id, secondary_node_id);
|
||||
|
||||
// Notifying the attached node is offline should demote it to a secondary
|
||||
let changed = tenant_shard
|
||||
.intent
|
||||
.demote_attached(&mut scheduler, attached_node_id);
|
||||
let changed = tenant_state.intent.demote_attached(attached_node_id);
|
||||
assert!(changed);
|
||||
assert!(tenant_shard.intent.attached.is_none());
|
||||
assert_eq!(tenant_shard.intent.secondary.len(), 2);
|
||||
assert!(tenant_state.intent.attached.is_none());
|
||||
assert_eq!(tenant_state.intent.secondary.len(), 2);
|
||||
|
||||
// Update the scheduler state to indicate the node is offline
|
||||
nodes
|
||||
@@ -1340,18 +1229,18 @@ pub(crate) mod tests {
|
||||
scheduler.node_upsert(nodes.get(&attached_node_id).unwrap());
|
||||
|
||||
// Scheduling the node should promote the still-available secondary node to attached
|
||||
tenant_shard
|
||||
tenant_state
|
||||
.schedule(&mut scheduler, &mut context)
|
||||
.expect("active nodes are available");
|
||||
assert_eq!(tenant_shard.intent.attached.unwrap(), secondary_node_id);
|
||||
assert_eq!(tenant_state.intent.attached.unwrap(), secondary_node_id);
|
||||
|
||||
// The original attached node should have been retained as a secondary
|
||||
assert_eq!(
|
||||
*tenant_shard.intent.secondary.iter().last().unwrap(),
|
||||
*tenant_state.intent.secondary.iter().last().unwrap(),
|
||||
attached_node_id
|
||||
);
|
||||
|
||||
tenant_shard.intent.clear(&mut scheduler);
|
||||
tenant_state.intent.clear(&mut scheduler);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1361,48 +1250,48 @@ pub(crate) mod tests {
|
||||
let nodes = make_test_nodes(3);
|
||||
let mut scheduler = Scheduler::new(nodes.values());
|
||||
|
||||
let mut tenant_shard = make_test_tenant_shard(PlacementPolicy::Attached(1));
|
||||
let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
|
||||
|
||||
tenant_shard.observed.locations.insert(
|
||||
tenant_state.observed.locations.insert(
|
||||
NodeId(3),
|
||||
ObservedStateLocation {
|
||||
conf: Some(LocationConfig {
|
||||
mode: LocationConfigMode::AttachedMulti,
|
||||
generation: Some(2),
|
||||
secondary_conf: None,
|
||||
shard_number: tenant_shard.shard.number.0,
|
||||
shard_count: tenant_shard.shard.count.literal(),
|
||||
shard_stripe_size: tenant_shard.shard.stripe_size.0,
|
||||
shard_number: tenant_state.shard.number.0,
|
||||
shard_count: tenant_state.shard.count.literal(),
|
||||
shard_stripe_size: tenant_state.shard.stripe_size.0,
|
||||
tenant_conf: TenantConfig::default(),
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
tenant_shard.observed.locations.insert(
|
||||
tenant_state.observed.locations.insert(
|
||||
NodeId(2),
|
||||
ObservedStateLocation {
|
||||
conf: Some(LocationConfig {
|
||||
mode: LocationConfigMode::AttachedStale,
|
||||
generation: Some(1),
|
||||
secondary_conf: None,
|
||||
shard_number: tenant_shard.shard.number.0,
|
||||
shard_count: tenant_shard.shard.count.literal(),
|
||||
shard_stripe_size: tenant_shard.shard.stripe_size.0,
|
||||
shard_number: tenant_state.shard.number.0,
|
||||
shard_count: tenant_state.shard.count.literal(),
|
||||
shard_stripe_size: tenant_state.shard.stripe_size.0,
|
||||
tenant_conf: TenantConfig::default(),
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
tenant_shard.intent_from_observed(&mut scheduler);
|
||||
tenant_state.intent_from_observed(&mut scheduler);
|
||||
|
||||
// The highest generationed attached location gets used as attached
|
||||
assert_eq!(tenant_shard.intent.attached, Some(NodeId(3)));
|
||||
assert_eq!(tenant_state.intent.attached, Some(NodeId(3)));
|
||||
// Other locations get used as secondary
|
||||
assert_eq!(tenant_shard.intent.secondary, vec![NodeId(2)]);
|
||||
assert_eq!(tenant_state.intent.secondary, vec![NodeId(2)]);
|
||||
|
||||
scheduler.consistency_check(nodes.values(), [&tenant_shard].into_iter())?;
|
||||
scheduler.consistency_check(nodes.values(), [&tenant_state].into_iter())?;
|
||||
|
||||
tenant_shard.intent.clear(&mut scheduler);
|
||||
tenant_state.intent.clear(&mut scheduler);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1411,23 +1300,23 @@ pub(crate) mod tests {
|
||||
let nodes = make_test_nodes(3);
|
||||
let mut scheduler = Scheduler::new(nodes.values());
|
||||
|
||||
let mut tenant_shard = make_test_tenant_shard(PlacementPolicy::Attached(1));
|
||||
let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
|
||||
|
||||
// In pause mode, schedule() shouldn't do anything
|
||||
tenant_shard.scheduling_policy = ShardSchedulingPolicy::Pause;
|
||||
assert!(tenant_shard
|
||||
tenant_state.scheduling_policy = ShardSchedulingPolicy::Pause;
|
||||
assert!(tenant_state
|
||||
.schedule(&mut scheduler, &mut ScheduleContext::default())
|
||||
.is_ok());
|
||||
assert!(tenant_shard.intent.all_pageservers().is_empty());
|
||||
assert!(tenant_state.intent.all_pageservers().is_empty());
|
||||
|
||||
// In active mode, schedule() works
|
||||
tenant_shard.scheduling_policy = ShardSchedulingPolicy::Active;
|
||||
assert!(tenant_shard
|
||||
tenant_state.scheduling_policy = ShardSchedulingPolicy::Active;
|
||||
assert!(tenant_state
|
||||
.schedule(&mut scheduler, &mut ScheduleContext::default())
|
||||
.is_ok());
|
||||
assert!(!tenant_shard.intent.all_pageservers().is_empty());
|
||||
assert!(!tenant_state.intent.all_pageservers().is_empty());
|
||||
|
||||
tenant_shard.intent.clear(&mut scheduler);
|
||||
tenant_state.intent.clear(&mut scheduler);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1458,13 +1347,10 @@ pub(crate) mod tests {
|
||||
// would be no other shards from the same tenant, and request to do so.
|
||||
assert_eq!(
|
||||
optimization_a,
|
||||
Some(ScheduleOptimization {
|
||||
sequence: shard_a.sequence,
|
||||
action: ScheduleOptimizationAction::MigrateAttachment(MigrateAttachment {
|
||||
old_attached_node_id: NodeId(1),
|
||||
new_attached_node_id: NodeId(2)
|
||||
})
|
||||
})
|
||||
Some(ScheduleOptimization::MigrateAttachment(MigrateAttachment {
|
||||
old_attached_node_id: NodeId(1),
|
||||
new_attached_node_id: NodeId(2)
|
||||
}))
|
||||
);
|
||||
|
||||
// Note that these optimizing two shards in the same tenant with the same ScheduleContext is
|
||||
@@ -1475,13 +1361,10 @@ pub(crate) mod tests {
|
||||
let optimization_b = shard_b.optimize_attachment(&nodes, &schedule_context);
|
||||
assert_eq!(
|
||||
optimization_b,
|
||||
Some(ScheduleOptimization {
|
||||
sequence: shard_b.sequence,
|
||||
action: ScheduleOptimizationAction::MigrateAttachment(MigrateAttachment {
|
||||
old_attached_node_id: NodeId(1),
|
||||
new_attached_node_id: NodeId(3)
|
||||
})
|
||||
})
|
||||
Some(ScheduleOptimization::MigrateAttachment(MigrateAttachment {
|
||||
old_attached_node_id: NodeId(1),
|
||||
new_attached_node_id: NodeId(3)
|
||||
}))
|
||||
);
|
||||
|
||||
// Applying these optimizations should result in the end state proposed
|
||||
@@ -1525,13 +1408,10 @@ pub(crate) mod tests {
|
||||
// same tenant should generate an optimization to move one away
|
||||
assert_eq!(
|
||||
optimization_a,
|
||||
Some(ScheduleOptimization {
|
||||
sequence: shard_a.sequence,
|
||||
action: ScheduleOptimizationAction::ReplaceSecondary(ReplaceSecondary {
|
||||
old_node_id: NodeId(3),
|
||||
new_node_id: NodeId(4)
|
||||
})
|
||||
})
|
||||
Some(ScheduleOptimization::ReplaceSecondary(ReplaceSecondary {
|
||||
old_node_id: NodeId(3),
|
||||
new_node_id: NodeId(4)
|
||||
}))
|
||||
);
|
||||
|
||||
shard_a.apply_optimization(&mut scheduler, optimization_a.unwrap());
|
||||
@@ -1549,7 +1429,7 @@ pub(crate) mod tests {
|
||||
fn optimize_til_idle(
|
||||
nodes: &HashMap<NodeId, Node>,
|
||||
scheduler: &mut Scheduler,
|
||||
shards: &mut [TenantShard],
|
||||
shards: &mut [TenantState],
|
||||
) {
|
||||
let mut loop_n = 0;
|
||||
loop {
|
||||
@@ -1610,14 +1490,7 @@ pub(crate) mod tests {
|
||||
|
||||
// We should see equal number of locations on the two nodes.
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 4);
|
||||
// Scheduling does not consider the number of attachments picking the initial
|
||||
// pageserver to attach to (hence the assertion that all primaries are on the
|
||||
// same node)
|
||||
// TODO: Tweak the scheduling to evenly distribute attachments for new shards.
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(1)), 4);
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(2)), 4);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(2)), 0);
|
||||
|
||||
// Add another two nodes: we should see the shards spread out when their optimize
|
||||
// methods are called
|
||||
@@ -1626,16 +1499,9 @@ pub(crate) mod tests {
|
||||
optimize_til_idle(&nodes, &mut scheduler, &mut shards);
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 2);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(1)), 1);
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(2)), 2);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(2)), 1);
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(3)), 2);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(3)), 1);
|
||||
|
||||
assert_eq!(scheduler.get_node_shard_count(NodeId(4)), 2);
|
||||
assert_eq!(scheduler.get_node_attached_shard_count(NodeId(4)), 1);
|
||||
|
||||
for shard in shards.iter_mut() {
|
||||
shard.intent.clear(&mut scheduler);
|
||||
@@ -86,10 +86,7 @@ where
|
||||
.stdout(process_log_file)
|
||||
.stderr(same_file_for_stderr)
|
||||
.args(args);
|
||||
|
||||
let filled_cmd = fill_env_vars_prefixed_neon(fill_remote_storage_secrets_vars(
|
||||
fill_rust_env_vars(background_command),
|
||||
));
|
||||
let filled_cmd = fill_remote_storage_secrets_vars(fill_rust_env_vars(background_command));
|
||||
filled_cmd.envs(envs);
|
||||
|
||||
let pid_file_to_check = match &initial_pid_file {
|
||||
@@ -271,15 +268,6 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
cmd
|
||||
}
|
||||
|
||||
fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {
|
||||
for (var, val) in std::env::vars() {
|
||||
if var.starts_with("NEON_PAGESERVER_") {
|
||||
cmd = cmd.env(var, val);
|
||||
}
|
||||
}
|
||||
cmd
|
||||
}
|
||||
|
||||
/// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
|
||||
/// 1. Claims a pidfile with a fcntl lock on it and
|
||||
/// 2. Sets up the pidfile's file descriptor so that it (and the lock)
|
||||
|
||||
@@ -9,23 +9,22 @@ use anyhow::{anyhow, bail, Context, Result};
|
||||
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
|
||||
use compute_api::spec::ComputeMode;
|
||||
use control_plane::endpoint::ComputeControlPlane;
|
||||
use control_plane::local_env::{
|
||||
InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf, NeonLocalInitPageserverConf,
|
||||
SafekeeperConf,
|
||||
};
|
||||
use control_plane::pageserver::PageServerNode;
|
||||
use control_plane::local_env::{InitForceMode, LocalEnv};
|
||||
use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
|
||||
use control_plane::safekeeper::SafekeeperNode;
|
||||
use control_plane::storage_controller::StorageController;
|
||||
use control_plane::{broker, local_env};
|
||||
use pageserver_api::config::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
|
||||
use pageserver_api::controller_api::{
|
||||
NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy, PlacementPolicy,
|
||||
};
|
||||
use pageserver_api::controller_api::PlacementPolicy;
|
||||
use pageserver_api::models::{
|
||||
ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo,
|
||||
};
|
||||
use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
|
||||
use pageserver_api::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
|
||||
};
|
||||
use postgres_backend::AuthType;
|
||||
use postgres_connection::parse_host_port;
|
||||
use safekeeper_api::{
|
||||
@@ -55,6 +54,44 @@ const DEFAULT_PG_VERSION: &str = "15";
|
||||
|
||||
const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
|
||||
|
||||
fn default_conf(num_pageservers: u16) -> String {
|
||||
let mut template = format!(
|
||||
r#"
|
||||
# Default built-in configuration, defined in main.rs
|
||||
control_plane_api = '{DEFAULT_PAGESERVER_CONTROL_PLANE_API}'
|
||||
|
||||
[broker]
|
||||
listen_addr = '{DEFAULT_BROKER_ADDR}'
|
||||
|
||||
[[safekeepers]]
|
||||
id = {DEFAULT_SAFEKEEPER_ID}
|
||||
pg_port = {DEFAULT_SAFEKEEPER_PG_PORT}
|
||||
http_port = {DEFAULT_SAFEKEEPER_HTTP_PORT}
|
||||
|
||||
"#,
|
||||
);
|
||||
|
||||
for i in 0..num_pageservers {
|
||||
let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
|
||||
let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
|
||||
let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
|
||||
|
||||
template += &format!(
|
||||
r#"
|
||||
[[pageservers]]
|
||||
id = {pageserver_id}
|
||||
listen_pg_addr = '127.0.0.1:{pg_port}'
|
||||
listen_http_addr = '127.0.0.1:{http_port}'
|
||||
pg_auth_type = '{trust_auth}'
|
||||
http_auth_type = '{trust_auth}'
|
||||
"#,
|
||||
trust_auth = AuthType::Trust,
|
||||
)
|
||||
}
|
||||
|
||||
template
|
||||
}
|
||||
|
||||
///
|
||||
/// Timelines tree element used as a value in the HashMap.
|
||||
///
|
||||
@@ -98,7 +135,7 @@ fn main() -> Result<()> {
|
||||
let subcommand_result = match sub_name {
|
||||
"tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
|
||||
"timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
|
||||
"start" => rt.block_on(handle_start_all(&env)),
|
||||
"start" => rt.block_on(handle_start_all(sub_args, &env)),
|
||||
"stop" => rt.block_on(handle_stop_all(sub_args, &env)),
|
||||
"pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
|
||||
"storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
|
||||
@@ -117,7 +154,7 @@ fn main() -> Result<()> {
|
||||
};
|
||||
|
||||
match subcommand_result {
|
||||
Ok(Some(updated_env)) => updated_env.persist_config()?,
|
||||
Ok(Some(updated_env)) => updated_env.persist_config(&updated_env.base_data_dir)?,
|
||||
Ok(None) => (),
|
||||
Err(e) => {
|
||||
eprintln!("command failed: {e:?}");
|
||||
@@ -306,65 +343,48 @@ fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId
|
||||
}
|
||||
|
||||
fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
||||
let num_pageservers = init_match.get_one::<u16>("num-pageservers");
|
||||
|
||||
let force = init_match.get_one("force").expect("we set a default value");
|
||||
|
||||
// Create the in-memory `LocalEnv` that we'd normally load from disk in `load_config`.
|
||||
let init_conf: NeonLocalInitConf = if let Some(config_path) =
|
||||
init_match.get_one::<PathBuf>("config")
|
||||
{
|
||||
// User (likely the Python test suite) provided a description of the environment.
|
||||
if num_pageservers.is_some() {
|
||||
bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
|
||||
}
|
||||
let num_pageservers = init_match
|
||||
.get_one::<u16>("num-pageservers")
|
||||
.expect("num-pageservers arg has a default");
|
||||
// Create config file
|
||||
let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
|
||||
// load and parse the file
|
||||
let contents = std::fs::read_to_string(config_path).with_context(|| {
|
||||
std::fs::read_to_string(config_path).with_context(|| {
|
||||
format!(
|
||||
"Could not read configuration file '{}'",
|
||||
config_path.display()
|
||||
)
|
||||
})?;
|
||||
toml_edit::de::from_str(&contents)?
|
||||
})?
|
||||
} else {
|
||||
// User (likely interactive) did not provide a description of the environment, give them the default
|
||||
NeonLocalInitConf {
|
||||
control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
|
||||
broker: NeonBroker {
|
||||
listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
|
||||
},
|
||||
safekeepers: vec![SafekeeperConf {
|
||||
id: DEFAULT_SAFEKEEPER_ID,
|
||||
pg_port: DEFAULT_SAFEKEEPER_PG_PORT,
|
||||
http_port: DEFAULT_SAFEKEEPER_HTTP_PORT,
|
||||
..Default::default()
|
||||
}],
|
||||
pageservers: (0..num_pageservers.copied().unwrap_or(1))
|
||||
.map(|i| {
|
||||
let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
|
||||
let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
|
||||
let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
|
||||
NeonLocalInitPageserverConf {
|
||||
id: pageserver_id,
|
||||
listen_pg_addr: format!("127.0.0.1:{pg_port}"),
|
||||
listen_http_addr: format!("127.0.0.1:{http_port}"),
|
||||
pg_auth_type: AuthType::Trust,
|
||||
http_auth_type: AuthType::Trust,
|
||||
other: Default::default(),
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
pg_distrib_dir: None,
|
||||
neon_distrib_dir: None,
|
||||
default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
|
||||
storage_controller: None,
|
||||
control_plane_compute_hook_api: None,
|
||||
}
|
||||
// Built-in default config
|
||||
default_conf(*num_pageservers)
|
||||
};
|
||||
|
||||
LocalEnv::init(init_conf, force)
|
||||
.context("materialize initial neon_local environment on disk")?;
|
||||
Ok(LocalEnv::load_config().expect("freshly written config should be loadable"))
|
||||
let pg_version = init_match
|
||||
.get_one::<u32>("pg-version")
|
||||
.copied()
|
||||
.context("Failed to parse postgres version from the argument string")?;
|
||||
|
||||
let mut env =
|
||||
LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
|
||||
let force = init_match.get_one("force").expect("we set a default value");
|
||||
env.init(pg_version, force)
|
||||
.context("Failed to initialize neon repository")?;
|
||||
|
||||
// Create remote storage location for default LocalFs remote storage
|
||||
std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
|
||||
|
||||
// Initialize pageserver, create initial tenant and timeline.
|
||||
for ps_conf in &env.pageservers {
|
||||
PageServerNode::from_env(&env, ps_conf)
|
||||
.initialize(&pageserver_config_overrides(init_match))
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("pageserver init failed: {e:?}");
|
||||
exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
Ok(env)
|
||||
}
|
||||
|
||||
/// The default pageserver is the one where CLI tenant/timeline operations are sent by default.
|
||||
@@ -379,6 +399,15 @@ fn get_default_pageserver(env: &local_env::LocalEnv) -> PageServerNode {
|
||||
PageServerNode::from_env(env, ps_conf)
|
||||
}
|
||||
|
||||
fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
|
||||
init_match
|
||||
.get_many::<String>("pageserver-config-override")
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(String::as_str)
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn handle_tenant(
|
||||
tenant_match: &ArgMatches,
|
||||
env: &mut local_env::LocalEnv,
|
||||
@@ -390,54 +419,6 @@ async fn handle_tenant(
|
||||
println!("{} {:?}", t.id, t.state);
|
||||
}
|
||||
}
|
||||
Some(("import", import_match)) => {
|
||||
let tenant_id = parse_tenant_id(import_match)?.unwrap_or_else(TenantId::generate);
|
||||
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
let create_response = storage_controller.tenant_import(tenant_id).await?;
|
||||
|
||||
let shard_zero = create_response
|
||||
.shards
|
||||
.first()
|
||||
.expect("Import response omitted shards");
|
||||
|
||||
let attached_pageserver_id = shard_zero.node_id;
|
||||
let pageserver =
|
||||
PageServerNode::from_env(env, env.get_pageserver_conf(attached_pageserver_id)?);
|
||||
|
||||
println!(
|
||||
"Imported tenant {tenant_id}, attached to pageserver {attached_pageserver_id}"
|
||||
);
|
||||
|
||||
let timelines = pageserver
|
||||
.http_client
|
||||
.list_timelines(shard_zero.shard_id)
|
||||
.await?;
|
||||
|
||||
// Pick a 'main' timeline that has no ancestors, the rest will get arbitrary names
|
||||
let main_timeline = timelines
|
||||
.iter()
|
||||
.find(|t| t.ancestor_timeline_id.is_none())
|
||||
.expect("No timelines found")
|
||||
.timeline_id;
|
||||
|
||||
let mut branch_i = 0;
|
||||
for timeline in timelines.iter() {
|
||||
let branch_name = if timeline.timeline_id == main_timeline {
|
||||
"main".to_string()
|
||||
} else {
|
||||
branch_i += 1;
|
||||
format!("branch_{branch_i}")
|
||||
};
|
||||
|
||||
println!(
|
||||
"Importing timeline {tenant_id}/{} as branch {branch_name}",
|
||||
timeline.timeline_id
|
||||
);
|
||||
|
||||
env.register_branch_mapping(branch_name, tenant_id, timeline.timeline_id)?;
|
||||
}
|
||||
}
|
||||
Some(("create", create_match)) => {
|
||||
let tenant_conf: HashMap<_, _> = create_match
|
||||
.get_many::<String>("config")
|
||||
@@ -810,8 +791,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
|
||||
.copied()
|
||||
.unwrap_or(false);
|
||||
|
||||
let allow_multiple = sub_args.get_flag("allow-multiple");
|
||||
|
||||
let mode = match (lsn, hot_standby) {
|
||||
(Some(lsn), false) => ComputeMode::Static(lsn),
|
||||
(None, true) => ComputeMode::Replica,
|
||||
@@ -829,9 +808,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if !allow_multiple {
|
||||
cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
|
||||
}
|
||||
cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
|
||||
|
||||
cplane.new_endpoint(
|
||||
&endpoint_id,
|
||||
@@ -860,8 +837,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
|
||||
|
||||
let remote_ext_config = sub_args.get_one::<String>("remote-ext-config");
|
||||
|
||||
let allow_multiple = sub_args.get_flag("allow-multiple");
|
||||
|
||||
// If --safekeepers argument is given, use only the listed safekeeper nodes.
|
||||
let safekeepers =
|
||||
if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
|
||||
@@ -887,13 +862,11 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
if !allow_multiple {
|
||||
cplane.check_conflicting_endpoints(
|
||||
endpoint.mode,
|
||||
endpoint.tenant_id,
|
||||
endpoint.timeline_id,
|
||||
)?;
|
||||
}
|
||||
cplane.check_conflicting_endpoints(
|
||||
endpoint.mode,
|
||||
endpoint.tenant_id,
|
||||
endpoint.timeline_id,
|
||||
)?;
|
||||
|
||||
let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
|
||||
let conf = env.get_pageserver_conf(pageserver_id).unwrap();
|
||||
@@ -1049,7 +1022,10 @@ fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageSe
|
||||
async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
|
||||
match sub_match.subcommand() {
|
||||
Some(("start", subcommand_args)) => {
|
||||
if let Err(e) = get_pageserver(env, subcommand_args)?.start().await {
|
||||
if let Err(e) = get_pageserver(env, subcommand_args)?
|
||||
.start(&pageserver_config_overrides(subcommand_args))
|
||||
.await
|
||||
{
|
||||
eprintln!("pageserver start failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
@@ -1075,12 +1051,30 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if let Err(e) = pageserver.start().await {
|
||||
if let Err(e) = pageserver
|
||||
.start(&pageserver_config_overrides(subcommand_args))
|
||||
.await
|
||||
{
|
||||
eprintln!("pageserver start failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
Some(("set-state", subcommand_args)) => {
|
||||
let pageserver = get_pageserver(env, subcommand_args)?;
|
||||
let scheduling = subcommand_args.get_one("scheduling");
|
||||
let availability = subcommand_args.get_one("availability");
|
||||
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
storage_controller
|
||||
.node_configure(NodeConfigureRequest {
|
||||
node_id: pageserver.conf.id,
|
||||
scheduling: scheduling.cloned(),
|
||||
availability: availability.cloned(),
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
|
||||
Some(("status", subcommand_args)) => {
|
||||
match get_pageserver(env, subcommand_args)?.check_status().await {
|
||||
Ok(_) => println!("Page server is up and running"),
|
||||
@@ -1202,7 +1196,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn handle_start_all(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
// Endpoints are not started automatically
|
||||
|
||||
broker::start_broker_process(env).await?;
|
||||
@@ -1219,7 +1213,10 @@ async fn handle_start_all(env: &local_env::LocalEnv) -> anyhow::Result<()> {
|
||||
|
||||
for ps_conf in &env.pageservers {
|
||||
let pageserver = PageServerNode::from_env(env, ps_conf);
|
||||
if let Err(e) = pageserver.start().await {
|
||||
if let Err(e) = pageserver
|
||||
.start(&pageserver_config_overrides(sub_match))
|
||||
.await
|
||||
{
|
||||
eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
|
||||
try_stop_all(env, true).await;
|
||||
exit(1);
|
||||
@@ -1251,7 +1248,7 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
|
||||
match ComputeControlPlane::load(env.clone()) {
|
||||
Ok(cplane) => {
|
||||
for (_k, node) in cplane.endpoints {
|
||||
if let Err(e) = node.stop(if immediate { "immediate" } else { "fast" }, false) {
|
||||
if let Err(e) = node.stop(if immediate { "immediate" } else { "fast " }, false) {
|
||||
eprintln!("postgres stop failed: {e:#}");
|
||||
}
|
||||
}
|
||||
@@ -1360,6 +1357,13 @@ fn cli() -> Command {
|
||||
.required(false)
|
||||
.value_name("stop-mode");
|
||||
|
||||
let pageserver_config_args = Arg::new("pageserver-config-override")
|
||||
.long("pageserver-config-override")
|
||||
.num_args(1)
|
||||
.action(ArgAction::Append)
|
||||
.help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
|
||||
.required(false);
|
||||
|
||||
let remote_ext_config_args = Arg::new("remote-ext-config")
|
||||
.long("remote-ext-config")
|
||||
.num_args(1)
|
||||
@@ -1393,7 +1397,9 @@ fn cli() -> Command {
|
||||
let num_pageservers_arg = Arg::new("num-pageservers")
|
||||
.value_parser(value_parser!(u16))
|
||||
.long("num-pageservers")
|
||||
.help("How many pageservers to create (default 1)");
|
||||
.help("How many pageservers to create (default 1)")
|
||||
.required(false)
|
||||
.default_value("1");
|
||||
|
||||
let update_catalog = Arg::new("update-catalog")
|
||||
.value_parser(value_parser!(bool))
|
||||
@@ -1407,25 +1413,20 @@ fn cli() -> Command {
|
||||
.help("If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`")
|
||||
.required(false);
|
||||
|
||||
let allow_multiple = Arg::new("allow-multiple")
|
||||
.help("Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests.")
|
||||
.long("allow-multiple")
|
||||
.action(ArgAction::SetTrue)
|
||||
.required(false);
|
||||
|
||||
Command::new("Neon CLI")
|
||||
.arg_required_else_help(true)
|
||||
.version(GIT_VERSION)
|
||||
.subcommand(
|
||||
Command::new("init")
|
||||
.about("Initialize a new Neon repository, preparing configs for services to start with")
|
||||
.arg(pageserver_config_args.clone())
|
||||
.arg(num_pageservers_arg.clone())
|
||||
.arg(
|
||||
Arg::new("config")
|
||||
.long("config")
|
||||
.required(false)
|
||||
.value_parser(value_parser!(PathBuf))
|
||||
.value_name("config")
|
||||
.value_name("config"),
|
||||
)
|
||||
.arg(pg_version_arg.clone())
|
||||
.arg(force_arg)
|
||||
@@ -1433,7 +1434,6 @@ fn cli() -> Command {
|
||||
.subcommand(
|
||||
Command::new("timeline")
|
||||
.about("Manage timelines")
|
||||
.arg_required_else_help(true)
|
||||
.subcommand(Command::new("list")
|
||||
.about("List all timelines, available to this pageserver")
|
||||
.arg(tenant_id_arg.clone()))
|
||||
@@ -1496,8 +1496,6 @@ fn cli() -> Command {
|
||||
.subcommand(Command::new("config")
|
||||
.arg(tenant_id_arg.clone())
|
||||
.arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false)))
|
||||
.subcommand(Command::new("import").arg(tenant_id_arg.clone().required(true))
|
||||
.about("Import a tenant that is present in remote storage, and create branches for its timelines"))
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("pageserver")
|
||||
@@ -1507,6 +1505,7 @@ fn cli() -> Command {
|
||||
.subcommand(Command::new("status"))
|
||||
.subcommand(Command::new("start")
|
||||
.about("Start local pageserver")
|
||||
.arg(pageserver_config_args.clone())
|
||||
)
|
||||
.subcommand(Command::new("stop")
|
||||
.about("Stop local pageserver")
|
||||
@@ -1514,14 +1513,21 @@ fn cli() -> Command {
|
||||
)
|
||||
.subcommand(Command::new("restart")
|
||||
.about("Restart local pageserver")
|
||||
.arg(pageserver_config_args.clone())
|
||||
)
|
||||
.subcommand(Command::new("set-state")
|
||||
.arg(Arg::new("availability").value_parser(value_parser!(NodeAvailability)).long("availability").action(ArgAction::Set).help("Availability state: offline,active"))
|
||||
.arg(Arg::new("scheduling").value_parser(value_parser!(NodeSchedulingPolicy)).long("scheduling").action(ArgAction::Set).help("Scheduling state: draining,pause,filling,active"))
|
||||
.about("Set scheduling or availability state of pageserver node")
|
||||
.arg(pageserver_config_args.clone())
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("storage_controller")
|
||||
.arg_required_else_help(true)
|
||||
.about("Manage storage_controller")
|
||||
.subcommand(Command::new("start").about("Start storage controller"))
|
||||
.subcommand(Command::new("stop").about("Stop storage controller")
|
||||
.subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
|
||||
.subcommand(Command::new("stop").about("Stop local pageserver")
|
||||
.arg(stop_mode_arg.clone()))
|
||||
)
|
||||
.subcommand(
|
||||
@@ -1567,7 +1573,6 @@ fn cli() -> Command {
|
||||
.arg(pg_version_arg.clone())
|
||||
.arg(hot_standby_arg.clone())
|
||||
.arg(update_catalog)
|
||||
.arg(allow_multiple.clone())
|
||||
)
|
||||
.subcommand(Command::new("start")
|
||||
.about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
|
||||
@@ -1576,7 +1581,6 @@ fn cli() -> Command {
|
||||
.arg(safekeepers_arg)
|
||||
.arg(remote_ext_config_args)
|
||||
.arg(create_test_user)
|
||||
.arg(allow_multiple.clone())
|
||||
)
|
||||
.subcommand(Command::new("reconfigure")
|
||||
.about("Reconfigure the endpoint")
|
||||
@@ -1628,6 +1632,7 @@ fn cli() -> Command {
|
||||
.subcommand(
|
||||
Command::new("start")
|
||||
.about("Start page server and safekeepers")
|
||||
.arg(pageserver_config_args)
|
||||
)
|
||||
.subcommand(
|
||||
Command::new("stop")
|
||||
|
||||
@@ -554,7 +554,6 @@ impl Endpoint {
|
||||
format_version: 1.0,
|
||||
operation_uuid: None,
|
||||
features: self.features.clone(),
|
||||
swap_size_bytes: None,
|
||||
cluster: Cluster {
|
||||
cluster_id: None, // project ID: not used
|
||||
name: None, // project name: not used
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//! Now it also provides init method which acts like a stub for proper installation
|
||||
//! script which will use local paths.
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use anyhow::{bail, ensure, Context};
|
||||
|
||||
use clap::ValueEnum;
|
||||
use postgres_backend::AuthType;
|
||||
@@ -17,14 +17,11 @@ use std::net::Ipv4Addr;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use std::time::Duration;
|
||||
use utils::{
|
||||
auth::{encode_from_key_file, Claims},
|
||||
id::{NodeId, TenantId, TenantTimelineId, TimelineId},
|
||||
};
|
||||
|
||||
use crate::pageserver::PageServerNode;
|
||||
use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
|
||||
use crate::safekeeper::SafekeeperNode;
|
||||
|
||||
pub const DEFAULT_PG_VERSION: u32 = 15;
|
||||
@@ -36,7 +33,7 @@ pub const DEFAULT_PG_VERSION: u32 = 15;
|
||||
// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
|
||||
// an example.
|
||||
//
|
||||
#[derive(PartialEq, Eq, Clone, Debug)]
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
pub struct LocalEnv {
|
||||
// Base directory for all the nodes (the pageserver, safekeepers and
|
||||
// compute endpoints).
|
||||
@@ -44,99 +41,55 @@ pub struct LocalEnv {
|
||||
// This is not stored in the config file. Rather, this is the path where the
|
||||
// config file itself is. It is read from the NEON_REPO_DIR env variable or
|
||||
// '.neon' if not given.
|
||||
#[serde(skip)]
|
||||
pub base_data_dir: PathBuf,
|
||||
|
||||
// Path to postgres distribution. It's expected that "bin", "include",
|
||||
// "lib", "share" from postgres distribution are there. If at some point
|
||||
// in time we will be able to run against vanilla postgres we may split that
|
||||
// to four separate paths and match OS-specific installation layout.
|
||||
#[serde(default)]
|
||||
pub pg_distrib_dir: PathBuf,
|
||||
|
||||
// Path to pageserver binary.
|
||||
#[serde(default)]
|
||||
pub neon_distrib_dir: PathBuf,
|
||||
|
||||
// Default tenant ID to use with the 'neon_local' command line utility, when
|
||||
// --tenant_id is not explicitly specified.
|
||||
#[serde(default)]
|
||||
pub default_tenant_id: Option<TenantId>,
|
||||
|
||||
// used to issue tokens during e.g pg start
|
||||
#[serde(default)]
|
||||
pub private_key_path: PathBuf,
|
||||
|
||||
pub broker: NeonBroker,
|
||||
|
||||
// Configuration for the storage controller (1 per neon_local environment)
|
||||
pub storage_controller: NeonStorageControllerConf,
|
||||
|
||||
/// This Vec must always contain at least one pageserver
|
||||
/// Populdated by [`Self::load_config`] from the individual `pageserver.toml`s.
|
||||
/// NB: not used anymore except for informing users that they need to change their `.neon/config`.
|
||||
pub pageservers: Vec<PageServerConf>,
|
||||
|
||||
#[serde(default)]
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
|
||||
// Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
|
||||
// be propagated into each pageserver's configuration.
|
||||
#[serde(default)]
|
||||
pub control_plane_api: Option<Url>,
|
||||
|
||||
// Control plane upcall API for storage controller. If set, this will be propagated into the
|
||||
// storage controller's configuration.
|
||||
#[serde(default)]
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
|
||||
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
|
||||
#[serde(default)]
|
||||
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
|
||||
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
|
||||
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
|
||||
pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
}
|
||||
|
||||
/// On-disk state stored in `.neon/config`.
|
||||
#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
|
||||
#[serde(default, deny_unknown_fields)]
|
||||
pub struct OnDiskConfig {
|
||||
pub pg_distrib_dir: PathBuf,
|
||||
pub neon_distrib_dir: PathBuf,
|
||||
pub default_tenant_id: Option<TenantId>,
|
||||
pub private_key_path: PathBuf,
|
||||
pub broker: NeonBroker,
|
||||
pub storage_controller: NeonStorageControllerConf,
|
||||
#[serde(
|
||||
skip_serializing,
|
||||
deserialize_with = "fail_if_pageservers_field_specified"
|
||||
)]
|
||||
pub pageservers: Vec<PageServerConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Url>,
|
||||
pub control_plane_compute_hook_api: Option<Url>,
|
||||
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
|
||||
}
|
||||
|
||||
fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
Err(serde::de::Error::custom(
|
||||
"The 'pageservers' field is no longer used; pageserver.toml is now authoritative; \
|
||||
Please remove the `pageservers` from your .neon/config.",
|
||||
))
|
||||
}
|
||||
|
||||
/// The description of the neon_local env to be initialized by `neon_local init --config`.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct NeonLocalInitConf {
|
||||
// TODO: do we need this? Seems unused
|
||||
pub pg_distrib_dir: Option<PathBuf>,
|
||||
// TODO: do we need this? Seems unused
|
||||
pub neon_distrib_dir: Option<PathBuf>,
|
||||
pub default_tenant_id: TenantId,
|
||||
pub broker: NeonBroker,
|
||||
pub storage_controller: Option<NeonStorageControllerConf>,
|
||||
pub pageservers: Vec<NeonLocalInitPageserverConf>,
|
||||
pub safekeepers: Vec<SafekeeperConf>,
|
||||
pub control_plane_api: Option<Option<Url>>,
|
||||
pub control_plane_compute_hook_api: Option<Option<Url>>,
|
||||
}
|
||||
|
||||
/// Broker config for cluster internal communication.
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
@@ -145,33 +98,6 @@ pub struct NeonBroker {
|
||||
pub listen_addr: SocketAddr,
|
||||
}
|
||||
|
||||
/// Broker config for cluster internal communication.
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default)]
|
||||
pub struct NeonStorageControllerConf {
|
||||
/// Heartbeat timeout before marking a node offline
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub max_unavailable: Duration,
|
||||
|
||||
/// Threshold for auto-splitting a tenant into shards
|
||||
pub split_threshold: Option<u64>,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
// Use a shorter pageserver unavailability interval than the default to speed up tests.
|
||||
const DEFAULT_MAX_UNAVAILABLE_INTERVAL: std::time::Duration =
|
||||
std::time::Duration::from_secs(10);
|
||||
}
|
||||
|
||||
impl Default for NeonStorageControllerConf {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_unavailable: Self::DEFAULT_MAX_UNAVAILABLE_INTERVAL,
|
||||
split_threshold: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Dummy Default impl to satisfy Deserialize derive.
|
||||
impl Default for NeonBroker {
|
||||
fn default() -> Self {
|
||||
@@ -187,18 +113,22 @@ impl NeonBroker {
|
||||
}
|
||||
}
|
||||
|
||||
// neon_local needs to know this subset of pageserver configuration.
|
||||
// For legacy reasons, this information is duplicated from `pageserver.toml` into `.neon/config`.
|
||||
// It can get stale if `pageserver.toml` is changed.
|
||||
// TODO(christian): don't store this at all in `.neon/config`, always load it from `pageserver.toml`
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
#[serde(default, deny_unknown_fields)]
|
||||
pub struct PageServerConf {
|
||||
// node id
|
||||
pub id: NodeId,
|
||||
|
||||
// Pageserver connection settings
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
|
||||
// auth type used for the PG and HTTP ports
|
||||
pub pg_auth_type: AuthType,
|
||||
pub http_auth_type: AuthType,
|
||||
|
||||
pub(crate) virtual_file_io_engine: Option<String>,
|
||||
pub(crate) get_vectored_impl: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for PageServerConf {
|
||||
@@ -209,40 +139,8 @@ impl Default for PageServerConf {
|
||||
listen_http_addr: String::new(),
|
||||
pg_auth_type: AuthType::Trust,
|
||||
http_auth_type: AuthType::Trust,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The toml that can be passed to `neon_local init --config`.
|
||||
/// This is a subset of the `pageserver.toml` configuration.
|
||||
// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
|
||||
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
|
||||
pub struct NeonLocalInitPageserverConf {
|
||||
pub id: NodeId,
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_http_addr: String,
|
||||
pub pg_auth_type: AuthType,
|
||||
pub http_auth_type: AuthType,
|
||||
#[serde(flatten)]
|
||||
pub other: HashMap<String, toml::Value>,
|
||||
}
|
||||
|
||||
impl From<&NeonLocalInitPageserverConf> for PageServerConf {
|
||||
fn from(conf: &NeonLocalInitPageserverConf) -> Self {
|
||||
let NeonLocalInitPageserverConf {
|
||||
id,
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
other: _,
|
||||
} = conf;
|
||||
Self {
|
||||
id: *id,
|
||||
listen_pg_addr: listen_pg_addr.clone(),
|
||||
listen_http_addr: listen_http_addr.clone(),
|
||||
pg_auth_type: *pg_auth_type,
|
||||
http_auth_type: *http_auth_type,
|
||||
virtual_file_io_engine: None,
|
||||
get_vectored_impl: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -258,7 +156,6 @@ pub struct SafekeeperConf {
|
||||
pub remote_storage: Option<String>,
|
||||
pub backup_threads: Option<u32>,
|
||||
pub auth_enabled: bool,
|
||||
pub listen_addr: Option<String>,
|
||||
}
|
||||
|
||||
impl Default for SafekeeperConf {
|
||||
@@ -272,7 +169,6 @@ impl Default for SafekeeperConf {
|
||||
remote_storage: None,
|
||||
backup_threads: None,
|
||||
auth_enabled: false,
|
||||
listen_addr: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -430,7 +326,41 @@ impl LocalEnv {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Construct `Self` from on-disk state.
|
||||
/// Create a LocalEnv from a config file.
|
||||
///
|
||||
/// Unlike 'load_config', this function fills in any defaults that are missing
|
||||
/// from the config file.
|
||||
pub fn parse_config(toml: &str) -> anyhow::Result<Self> {
|
||||
let mut env: LocalEnv = toml::from_str(toml)?;
|
||||
|
||||
// Find postgres binaries.
|
||||
// Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
|
||||
// Note that later in the code we assume, that distrib dirs follow the same pattern
|
||||
// for all postgres versions.
|
||||
if env.pg_distrib_dir == Path::new("") {
|
||||
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
|
||||
env.pg_distrib_dir = postgres_bin.into();
|
||||
} else {
|
||||
let cwd = env::current_dir()?;
|
||||
env.pg_distrib_dir = cwd.join("pg_install")
|
||||
}
|
||||
}
|
||||
|
||||
// Find neon binaries.
|
||||
if env.neon_distrib_dir == Path::new("") {
|
||||
env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
|
||||
}
|
||||
|
||||
if env.pageservers.is_empty() {
|
||||
anyhow::bail!("Configuration must contain at least one pageserver");
|
||||
}
|
||||
|
||||
env.base_data_dir = base_path();
|
||||
|
||||
Ok(env)
|
||||
}
|
||||
|
||||
/// Locate and load config
|
||||
pub fn load_config() -> anyhow::Result<Self> {
|
||||
let repopath = base_path();
|
||||
|
||||
@@ -444,129 +374,38 @@ impl LocalEnv {
|
||||
// TODO: check that it looks like a neon repository
|
||||
|
||||
// load and parse file
|
||||
let config_file_contents = fs::read_to_string(repopath.join("config"))?;
|
||||
let on_disk_config: OnDiskConfig = toml::from_str(config_file_contents.as_str())?;
|
||||
let mut env = {
|
||||
let OnDiskConfig {
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id,
|
||||
private_key_path,
|
||||
broker,
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
branch_name_mappings,
|
||||
} = on_disk_config;
|
||||
LocalEnv {
|
||||
base_data_dir: repopath.clone(),
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id,
|
||||
private_key_path,
|
||||
broker,
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
branch_name_mappings,
|
||||
}
|
||||
};
|
||||
let config = fs::read_to_string(repopath.join("config"))?;
|
||||
let mut env: LocalEnv = toml::from_str(config.as_str())?;
|
||||
|
||||
// The source of truth for pageserver configuration is the pageserver.toml.
|
||||
assert!(
|
||||
env.pageservers.is_empty(),
|
||||
"we ensure this during deserialization"
|
||||
);
|
||||
env.pageservers = {
|
||||
let iter = std::fs::read_dir(&repopath).context("open dir")?;
|
||||
let mut pageservers = Vec::new();
|
||||
for res in iter {
|
||||
let dentry = res?;
|
||||
const PREFIX: &str = "pageserver_";
|
||||
let dentry_name = dentry
|
||||
.file_name()
|
||||
.into_string()
|
||||
.ok()
|
||||
.with_context(|| format!("non-utf8 dentry: {:?}", dentry.path()))
|
||||
.unwrap();
|
||||
if !dentry_name.starts_with(PREFIX) {
|
||||
continue;
|
||||
}
|
||||
if !dentry.file_type().context("determine file type")?.is_dir() {
|
||||
anyhow::bail!("expected a directory, got {:?}", dentry.path());
|
||||
}
|
||||
let id = dentry_name[PREFIX.len()..]
|
||||
.parse::<NodeId>()
|
||||
.with_context(|| format!("parse id from {:?}", dentry.path()))?;
|
||||
// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
|
||||
#[derive(serde::Serialize, serde::Deserialize)]
|
||||
// (allow unknown fields, unlike PageServerConf)
|
||||
struct PageserverConfigTomlSubset {
|
||||
id: NodeId,
|
||||
listen_pg_addr: String,
|
||||
listen_http_addr: String,
|
||||
pg_auth_type: AuthType,
|
||||
http_auth_type: AuthType,
|
||||
}
|
||||
let config_toml_path = dentry.path().join("pageserver.toml");
|
||||
let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(
|
||||
&std::fs::read_to_string(&config_toml_path)
|
||||
.with_context(|| format!("read {:?}", config_toml_path))?,
|
||||
)
|
||||
.context("parse pageserver.toml")?;
|
||||
let PageserverConfigTomlSubset {
|
||||
id: config_toml_id,
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
} = config_toml;
|
||||
let conf = PageServerConf {
|
||||
id: {
|
||||
anyhow::ensure!(
|
||||
config_toml_id == id,
|
||||
"id mismatch: config_toml.id={config_toml_id} id={id}",
|
||||
);
|
||||
id
|
||||
},
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
};
|
||||
pageservers.push(conf);
|
||||
}
|
||||
pageservers
|
||||
};
|
||||
env.base_data_dir = repopath;
|
||||
|
||||
Ok(env)
|
||||
}
|
||||
|
||||
pub fn persist_config(&self) -> anyhow::Result<()> {
|
||||
Self::persist_config_impl(
|
||||
&self.base_data_dir,
|
||||
&OnDiskConfig {
|
||||
pg_distrib_dir: self.pg_distrib_dir.clone(),
|
||||
neon_distrib_dir: self.neon_distrib_dir.clone(),
|
||||
default_tenant_id: self.default_tenant_id,
|
||||
private_key_path: self.private_key_path.clone(),
|
||||
broker: self.broker.clone(),
|
||||
storage_controller: self.storage_controller.clone(),
|
||||
pageservers: vec![], // it's skip_serializing anyway
|
||||
safekeepers: self.safekeepers.clone(),
|
||||
control_plane_api: self.control_plane_api.clone(),
|
||||
control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
|
||||
branch_name_mappings: self.branch_name_mappings.clone(),
|
||||
},
|
||||
)
|
||||
}
|
||||
pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
|
||||
// Currently, the user first passes a config file with 'neon_local init --config=<path>'
|
||||
// We read that in, in `create_config`, and fill any missing defaults. Then it's saved
|
||||
// to .neon/config. TODO: We lose any formatting and comments along the way, which is
|
||||
// a bit sad.
|
||||
let mut conf_content = r#"# This file describes a local deployment of the page server
|
||||
# and safekeeeper node. It is read by the 'neon_local' command-line
|
||||
# utility.
|
||||
"#
|
||||
.to_string();
|
||||
|
||||
// Convert the LocalEnv to a toml file.
|
||||
//
|
||||
// This could be as simple as this:
|
||||
//
|
||||
// conf_content += &toml::to_string_pretty(env)?;
|
||||
//
|
||||
// But it results in a "values must be emitted before tables". I'm not sure
|
||||
// why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
|
||||
// Maybe rust reorders the fields to squeeze avoid padding or something?
|
||||
// In any case, converting to toml::Value first, and serializing that, works.
|
||||
// See https://github.com/alexcrichton/toml-rs/issues/142
|
||||
conf_content += &toml::to_string_pretty(&toml::Value::try_from(self)?)?;
|
||||
|
||||
pub fn persist_config_impl(base_path: &Path, config: &OnDiskConfig) -> anyhow::Result<()> {
|
||||
let conf_content = &toml::to_string_pretty(config)?;
|
||||
let target_config_path = base_path.join("config");
|
||||
fs::write(&target_config_path, conf_content).with_context(|| {
|
||||
format!(
|
||||
@@ -591,13 +430,17 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
|
||||
/// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
|
||||
pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
|
||||
let base_path = base_path();
|
||||
assert_ne!(base_path, Path::new(""));
|
||||
let base_path = &base_path;
|
||||
//
|
||||
// Initialize a new Neon repository
|
||||
//
|
||||
pub fn init(&mut self, pg_version: u32, force: &InitForceMode) -> anyhow::Result<()> {
|
||||
// check if config already exists
|
||||
let base_path = &self.base_data_dir;
|
||||
ensure!(
|
||||
base_path != Path::new(""),
|
||||
"repository base path is missing"
|
||||
);
|
||||
|
||||
// create base_path dir
|
||||
if base_path.exists() {
|
||||
match force {
|
||||
InitForceMode::MustNotExist => {
|
||||
@@ -629,96 +472,70 @@ impl LocalEnv {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
|
||||
bail!(
|
||||
"Can't find postgres binary at {}",
|
||||
self.pg_bin_dir(pg_version)?.display()
|
||||
);
|
||||
}
|
||||
for binary in ["pageserver", "safekeeper"] {
|
||||
if !self.neon_distrib_dir.join(binary).exists() {
|
||||
bail!(
|
||||
"Can't find binary '{binary}' in neon distrib dir '{}'",
|
||||
self.neon_distrib_dir.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if !base_path.exists() {
|
||||
fs::create_dir(base_path)?;
|
||||
}
|
||||
|
||||
let NeonLocalInitConf {
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id,
|
||||
broker,
|
||||
storage_controller,
|
||||
pageservers,
|
||||
safekeepers,
|
||||
control_plane_api,
|
||||
control_plane_compute_hook_api,
|
||||
} = conf;
|
||||
|
||||
// Find postgres binaries.
|
||||
// Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
|
||||
// Note that later in the code we assume, that distrib dirs follow the same pattern
|
||||
// for all postgres versions.
|
||||
let pg_distrib_dir = pg_distrib_dir.unwrap_or_else(|| {
|
||||
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
|
||||
postgres_bin.into()
|
||||
} else {
|
||||
let cwd = env::current_dir().unwrap();
|
||||
cwd.join("pg_install")
|
||||
}
|
||||
});
|
||||
|
||||
// Find neon binaries.
|
||||
let neon_distrib_dir = neon_distrib_dir
|
||||
.unwrap_or_else(|| env::current_exe().unwrap().parent().unwrap().to_owned());
|
||||
|
||||
// Generate keypair for JWT.
|
||||
//
|
||||
// The keypair is only needed if authentication is enabled in any of the
|
||||
// components. For convenience, we generate the keypair even if authentication
|
||||
// is not enabled, so that you can easily enable it after the initialization
|
||||
// step.
|
||||
generate_auth_keys(
|
||||
base_path.join("auth_private_key.pem").as_path(),
|
||||
base_path.join("auth_public_key.pem").as_path(),
|
||||
)
|
||||
.context("generate auth keys")?;
|
||||
let private_key_path = PathBuf::from("auth_private_key.pem");
|
||||
|
||||
// create the runtime type because the remaining initialization code below needs
|
||||
// a LocalEnv instance op operation
|
||||
// TODO: refactor to avoid this, LocalEnv should only be constructed from on-disk state
|
||||
let env = LocalEnv {
|
||||
base_data_dir: base_path.clone(),
|
||||
pg_distrib_dir,
|
||||
neon_distrib_dir,
|
||||
default_tenant_id: Some(default_tenant_id),
|
||||
private_key_path,
|
||||
broker,
|
||||
storage_controller: storage_controller.unwrap_or_default(),
|
||||
pageservers: pageservers.iter().map(Into::into).collect(),
|
||||
safekeepers,
|
||||
control_plane_api: control_plane_api.unwrap_or_default(),
|
||||
control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
|
||||
branch_name_mappings: Default::default(),
|
||||
};
|
||||
|
||||
// create endpoints dir
|
||||
fs::create_dir_all(env.endpoints_path())?;
|
||||
|
||||
// create safekeeper dirs
|
||||
for safekeeper in &env.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
|
||||
// step. However, if the key generation fails, we treat it as non-fatal if
|
||||
// authentication was not enabled.
|
||||
if self.private_key_path == PathBuf::new() {
|
||||
match generate_auth_keys(
|
||||
base_path.join("auth_private_key.pem").as_path(),
|
||||
base_path.join("auth_public_key.pem").as_path(),
|
||||
) {
|
||||
Ok(()) => {
|
||||
self.private_key_path = PathBuf::from("auth_private_key.pem");
|
||||
}
|
||||
Err(e) => {
|
||||
if !self.auth_keys_needed() {
|
||||
eprintln!("Could not generate keypair for JWT authentication: {e}");
|
||||
eprintln!("Continuing anyway because authentication was not enabled");
|
||||
self.private_key_path = PathBuf::from("auth_private_key.pem");
|
||||
} else {
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// initialize pageserver state
|
||||
for (i, ps) in pageservers.into_iter().enumerate() {
|
||||
let runtime_ps = &env.pageservers[i];
|
||||
assert_eq!(&PageServerConf::from(&ps), runtime_ps);
|
||||
fs::create_dir(env.pageserver_data_dir(ps.id))?;
|
||||
PageServerNode::from_env(&env, runtime_ps)
|
||||
.initialize(ps)
|
||||
.context("pageserver init failed")?;
|
||||
fs::create_dir_all(self.endpoints_path())?;
|
||||
|
||||
for safekeeper in &self.safekeepers {
|
||||
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
|
||||
}
|
||||
|
||||
// setup remote remote location for default LocalFs remote storage
|
||||
std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
|
||||
self.persist_config(base_path)
|
||||
}
|
||||
|
||||
env.persist_config()
|
||||
fn auth_keys_needed(&self) -> bool {
|
||||
self.pageservers.iter().any(|ps| {
|
||||
ps.pg_auth_type == AuthType::NeonJWT || ps.http_auth_type == AuthType::NeonJWT
|
||||
}) || self.safekeepers.iter().any(|sk| sk.auth_enabled)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn base_path() -> PathBuf {
|
||||
fn base_path() -> PathBuf {
|
||||
match std::env::var_os("NEON_REPO_DIR") {
|
||||
Some(val) => PathBuf::from(val),
|
||||
None => PathBuf::from(".neon"),
|
||||
@@ -761,3 +578,31 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn simple_conf_parsing() {
|
||||
let simple_conf_toml = include_str!("../simple.conf");
|
||||
let simple_conf_parse_result = LocalEnv::parse_config(simple_conf_toml);
|
||||
assert!(
|
||||
simple_conf_parse_result.is_ok(),
|
||||
"failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
|
||||
);
|
||||
|
||||
let string_to_replace = "listen_addr = '127.0.0.1:50051'";
|
||||
let spoiled_url_str = "listen_addr = '!@$XOXO%^&'";
|
||||
let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
|
||||
assert!(
|
||||
spoiled_url_toml.contains(spoiled_url_str),
|
||||
"Failed to replace string {string_to_replace} in the toml file {simple_conf_toml}"
|
||||
);
|
||||
let spoiled_url_parse_result = LocalEnv::parse_config(&spoiled_url_toml);
|
||||
assert!(
|
||||
spoiled_url_parse_result.is_err(),
|
||||
"expected toml with invalid Url {spoiled_url_toml} to fail the parsing, but got {spoiled_url_parse_result:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,21 +4,21 @@
|
||||
//!
|
||||
//! .neon/
|
||||
//!
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::num::NonZeroU64;
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use camino::Utf8PathBuf;
|
||||
use futures::SinkExt;
|
||||
use pageserver_api::models::{
|
||||
self, AuxFilePolicy, LocationConfig, ShardParameters, TenantHistorySize, TenantInfo,
|
||||
TimelineInfo,
|
||||
self, LocationConfig, ShardParameters, TenantHistorySize, TenantInfo, TimelineInfo,
|
||||
};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api;
|
||||
@@ -30,7 +30,7 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
use crate::local_env::{NeonLocalInitPageserverConf, PageServerConf};
|
||||
use crate::local_env::PageServerConf;
|
||||
use crate::{background_process, local_env::LocalEnv};
|
||||
|
||||
/// Directory within .neon which will be used by default for LocalFs remote storage.
|
||||
@@ -74,23 +74,57 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
|
||||
fn pageserver_init_make_toml(
|
||||
&self,
|
||||
conf: NeonLocalInitPageserverConf,
|
||||
) -> anyhow::Result<toml_edit::Document> {
|
||||
assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
|
||||
|
||||
// TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
|
||||
|
||||
/// Merge overrides provided by the user on the command line with our default overides derived from neon_local configuration.
|
||||
///
|
||||
/// These all end up on the command line of the `pageserver` binary.
|
||||
fn neon_local_overrides(&self, cli_overrides: &[&str]) -> Vec<String> {
|
||||
// FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
|
||||
let pg_distrib_dir_param = format!(
|
||||
"pg_distrib_dir='{}'",
|
||||
self.env.pg_distrib_dir_raw().display()
|
||||
);
|
||||
|
||||
let PageServerConf {
|
||||
id,
|
||||
listen_pg_addr,
|
||||
listen_http_addr,
|
||||
pg_auth_type,
|
||||
http_auth_type,
|
||||
virtual_file_io_engine,
|
||||
get_vectored_impl,
|
||||
} = &self.conf;
|
||||
|
||||
let id = format!("id={}", id);
|
||||
|
||||
let http_auth_type_param = format!("http_auth_type='{}'", http_auth_type);
|
||||
let listen_http_addr_param = format!("listen_http_addr='{}'", listen_http_addr);
|
||||
|
||||
let pg_auth_type_param = format!("pg_auth_type='{}'", pg_auth_type);
|
||||
let listen_pg_addr_param = format!("listen_pg_addr='{}'", listen_pg_addr);
|
||||
let virtual_file_io_engine = if let Some(virtual_file_io_engine) = virtual_file_io_engine {
|
||||
format!("virtual_file_io_engine='{virtual_file_io_engine}'")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let get_vectored_impl = if let Some(get_vectored_impl) = get_vectored_impl {
|
||||
format!("get_vectored_impl='{get_vectored_impl}'")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
let broker_endpoint_param = format!("broker_endpoint='{}'", self.env.broker.client_url());
|
||||
|
||||
let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];
|
||||
let mut overrides = vec![
|
||||
id,
|
||||
pg_distrib_dir_param,
|
||||
http_auth_type_param,
|
||||
pg_auth_type_param,
|
||||
listen_http_addr_param,
|
||||
listen_pg_addr_param,
|
||||
broker_endpoint_param,
|
||||
virtual_file_io_engine,
|
||||
get_vectored_impl,
|
||||
];
|
||||
|
||||
if let Some(control_plane_api) = &self.env.control_plane_api {
|
||||
overrides.push(format!(
|
||||
@@ -100,7 +134,7 @@ impl PageServerNode {
|
||||
|
||||
// Storage controller uses the same auth as pageserver: if JWT is enabled
|
||||
// for us, we will also need it to talk to them.
|
||||
if matches!(conf.http_auth_type, AuthType::NeonJWT) {
|
||||
if matches!(http_auth_type, AuthType::NeonJWT) {
|
||||
let jwt_token = self
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
|
||||
@@ -109,40 +143,31 @@ impl PageServerNode {
|
||||
}
|
||||
}
|
||||
|
||||
if !conf.other.contains_key("remote_storage") {
|
||||
if !cli_overrides
|
||||
.iter()
|
||||
.any(|c| c.starts_with("remote_storage"))
|
||||
{
|
||||
overrides.push(format!(
|
||||
"remote_storage={{local_path='../{PAGESERVER_REMOTE_STORAGE_DIR}'}}"
|
||||
));
|
||||
}
|
||||
|
||||
if conf.http_auth_type != AuthType::Trust || conf.pg_auth_type != AuthType::Trust {
|
||||
if *http_auth_type != AuthType::Trust || *pg_auth_type != AuthType::Trust {
|
||||
// Keys are generated in the toplevel repo dir, pageservers' workdirs
|
||||
// are one level below that, so refer to keys with ../
|
||||
overrides.push("auth_validation_public_key_path='../auth_public_key.pem'".to_owned());
|
||||
}
|
||||
|
||||
// Apply the user-provided overrides
|
||||
overrides.push(
|
||||
toml_edit::ser::to_string_pretty(&conf)
|
||||
.expect("we deserialized this from toml earlier"),
|
||||
);
|
||||
overrides.extend(cli_overrides.iter().map(|&c| c.to_owned()));
|
||||
|
||||
// Turn `overrides` into a toml document.
|
||||
// TODO: above code is legacy code, it should be refactored to use toml_edit directly.
|
||||
let mut config_toml = toml_edit::Document::new();
|
||||
for fragment_str in overrides {
|
||||
let fragment = toml_edit::Document::from_str(&fragment_str)
|
||||
.expect("all fragments in `overrides` are valid toml documents, this function controls that");
|
||||
for (key, item) in fragment.iter() {
|
||||
config_toml.insert(key, item.clone());
|
||||
}
|
||||
}
|
||||
Ok(config_toml)
|
||||
overrides
|
||||
}
|
||||
|
||||
/// Initializes a pageserver node by creating its config with the overrides provided.
|
||||
pub fn initialize(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {
|
||||
self.pageserver_init(conf)
|
||||
pub fn initialize(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
|
||||
// First, run `pageserver --init` and wait for it to write a config into FS and exit.
|
||||
self.pageserver_init(config_overrides)
|
||||
.with_context(|| format!("Failed to run init for pageserver node {}", self.conf.id))
|
||||
}
|
||||
|
||||
@@ -158,11 +183,11 @@ impl PageServerNode {
|
||||
.expect("non-Unicode path")
|
||||
}
|
||||
|
||||
pub async fn start(&self) -> anyhow::Result<()> {
|
||||
self.start_node().await
|
||||
pub async fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
|
||||
self.start_node(config_overrides, false).await
|
||||
}
|
||||
|
||||
fn pageserver_init(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {
|
||||
fn pageserver_init(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
|
||||
let datadir = self.repo_path();
|
||||
let node_id = self.conf.id;
|
||||
println!(
|
||||
@@ -173,20 +198,29 @@ impl PageServerNode {
|
||||
);
|
||||
io::stdout().flush()?;
|
||||
|
||||
let config = self
|
||||
.pageserver_init_make_toml(conf)
|
||||
.context("make pageserver toml")?;
|
||||
let config_file_path = datadir.join("pageserver.toml");
|
||||
let mut config_file = std::fs::OpenOptions::new()
|
||||
.create_new(true)
|
||||
.write(true)
|
||||
.open(&config_file_path)
|
||||
.with_context(|| format!("open pageserver toml for write: {config_file_path:?}"))?;
|
||||
config_file
|
||||
.write_all(config.to_string().as_bytes())
|
||||
.context("write pageserver toml")?;
|
||||
drop(config_file);
|
||||
// TODO: invoke a TBD config-check command to validate that pageserver will start with the written config
|
||||
if !datadir.exists() {
|
||||
std::fs::create_dir(&datadir)?;
|
||||
}
|
||||
|
||||
let datadir_path_str = datadir.to_str().with_context(|| {
|
||||
format!("Cannot start pageserver node {node_id} in path that has no string representation: {datadir:?}")
|
||||
})?;
|
||||
let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
|
||||
args.push(Cow::Borrowed("--init"));
|
||||
|
||||
let init_output = Command::new(self.env.pageserver_bin())
|
||||
.args(args.iter().map(Cow::as_ref))
|
||||
.envs(self.pageserver_env_variables()?)
|
||||
.output()
|
||||
.with_context(|| format!("Failed to run pageserver init for node {node_id}"))?;
|
||||
|
||||
anyhow::ensure!(
|
||||
init_output.status.success(),
|
||||
"Pageserver init for node {} did not finish successfully, stdout: {}, stderr: {}",
|
||||
node_id,
|
||||
String::from_utf8_lossy(&init_output.stdout),
|
||||
String::from_utf8_lossy(&init_output.stderr),
|
||||
);
|
||||
|
||||
// Write metadata file, used by pageserver on startup to register itself with
|
||||
// the storage controller
|
||||
@@ -200,13 +234,12 @@ impl PageServerNode {
|
||||
// situation: the metadata is written by some other script.
|
||||
std::fs::write(
|
||||
metadata_path,
|
||||
serde_json::to_vec(&pageserver_api::config::NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: self.pg_connection_config.port(),
|
||||
http_host: "localhost".to_string(),
|
||||
http_port,
|
||||
other: HashMap::new(),
|
||||
})
|
||||
serde_json::to_vec(&serde_json::json!({
|
||||
"host": "localhost",
|
||||
"port": self.pg_connection_config.port(),
|
||||
"http_host": "localhost",
|
||||
"http_port": http_port,
|
||||
}))
|
||||
.unwrap(),
|
||||
)
|
||||
.expect("Failed to write metadata file");
|
||||
@@ -214,7 +247,11 @@ impl PageServerNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn start_node(&self) -> anyhow::Result<()> {
|
||||
async fn start_node(
|
||||
&self,
|
||||
config_overrides: &[&str],
|
||||
update_config: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
// TODO: using a thread here because start_process() is not async but we need to call check_status()
|
||||
let datadir = self.repo_path();
|
||||
print!(
|
||||
@@ -231,12 +268,15 @@ impl PageServerNode {
|
||||
self.conf.id, datadir,
|
||||
)
|
||||
})?;
|
||||
let args = vec!["-D", datadir_path_str];
|
||||
let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
|
||||
if update_config {
|
||||
args.push(Cow::Borrowed("--update-config"));
|
||||
}
|
||||
background_process::start_process(
|
||||
"pageserver",
|
||||
&datadir,
|
||||
&self.env.pageserver_bin(),
|
||||
args,
|
||||
args.iter().map(Cow::as_ref),
|
||||
self.pageserver_env_variables()?,
|
||||
background_process::InitialPidFile::Expect(self.pid_file()),
|
||||
|| async {
|
||||
@@ -253,6 +293,22 @@ impl PageServerNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn pageserver_basic_args<'a>(
|
||||
&self,
|
||||
config_overrides: &'a [&'a str],
|
||||
datadir_path_str: &'a str,
|
||||
) -> Vec<Cow<'a, str>> {
|
||||
let mut args = vec![Cow::Borrowed("-D"), Cow::Borrowed(datadir_path_str)];
|
||||
|
||||
let overrides = self.neon_local_overrides(config_overrides);
|
||||
for config_override in overrides {
|
||||
args.push(Cow::Borrowed("-c"));
|
||||
args.push(Cow::Owned(config_override));
|
||||
}
|
||||
|
||||
args
|
||||
}
|
||||
|
||||
fn pageserver_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {
|
||||
// FIXME: why is this tied to pageserver's auth type? Whether or not the safekeeper
|
||||
// needs a token, and how to generate that token, seems independent to whether
|
||||
@@ -378,11 +434,6 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("parse `timeline_get_throttle` from json")?,
|
||||
switch_aux_file_policy: settings
|
||||
.remove("switch_aux_file_policy")
|
||||
.map(|x| x.parse::<AuxFilePolicy>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'switch_aux_file_policy'")?,
|
||||
};
|
||||
if !settings.is_empty() {
|
||||
bail!("Unrecognized tenant settings: {settings:?}")
|
||||
@@ -501,11 +552,6 @@ impl PageServerNode {
|
||||
.map(serde_json::from_str)
|
||||
.transpose()
|
||||
.context("parse `timeline_get_throttle` from json")?,
|
||||
switch_aux_file_policy: settings
|
||||
.remove("switch_aux_file_policy")
|
||||
.map(|x| x.parse::<AuxFilePolicy>())
|
||||
.transpose()
|
||||
.context("Failed to parse 'switch_aux_file_policy'")?,
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -70,31 +70,24 @@ pub struct SafekeeperNode {
|
||||
pub pg_connection_config: PgConnectionConfig,
|
||||
pub env: LocalEnv,
|
||||
pub http_client: reqwest::Client,
|
||||
pub listen_addr: String,
|
||||
pub http_base_url: String,
|
||||
}
|
||||
|
||||
impl SafekeeperNode {
|
||||
pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {
|
||||
let listen_addr = if let Some(ref listen_addr) = conf.listen_addr {
|
||||
listen_addr.clone()
|
||||
} else {
|
||||
"127.0.0.1".to_string()
|
||||
};
|
||||
SafekeeperNode {
|
||||
id: conf.id,
|
||||
conf: conf.clone(),
|
||||
pg_connection_config: Self::safekeeper_connection_config(&listen_addr, conf.pg_port),
|
||||
pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
|
||||
env: env.clone(),
|
||||
http_client: reqwest::Client::new(),
|
||||
http_base_url: format!("http://{}:{}/v1", listen_addr, conf.http_port),
|
||||
listen_addr,
|
||||
http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct libpq connection string for connecting to this safekeeper.
|
||||
fn safekeeper_connection_config(addr: &str, port: u16) -> PgConnectionConfig {
|
||||
PgConnectionConfig::new_host_port(url::Host::parse(addr).unwrap(), port)
|
||||
fn safekeeper_connection_config(port: u16) -> PgConnectionConfig {
|
||||
PgConnectionConfig::new_host_port(url::Host::parse("127.0.0.1").unwrap(), port)
|
||||
}
|
||||
|
||||
pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
|
||||
@@ -118,8 +111,8 @@ impl SafekeeperNode {
|
||||
);
|
||||
io::stdout().flush().unwrap();
|
||||
|
||||
let listen_pg = format!("{}:{}", self.listen_addr, self.conf.pg_port);
|
||||
let listen_http = format!("{}:{}", self.listen_addr, self.conf.http_port);
|
||||
let listen_pg = format!("127.0.0.1:{}", self.conf.pg_port);
|
||||
let listen_http = format!("127.0.0.1:{}", self.conf.http_port);
|
||||
let id = self.id;
|
||||
let datadir = self.datadir_path();
|
||||
|
||||
@@ -146,7 +139,7 @@ impl SafekeeperNode {
|
||||
availability_zone,
|
||||
];
|
||||
if let Some(pg_tenant_only_port) = self.conf.pg_tenant_only_port {
|
||||
let listen_pg_tenant_only = format!("{}:{}", self.listen_addr, pg_tenant_only_port);
|
||||
let listen_pg_tenant_only = format!("127.0.0.1:{}", pg_tenant_only_port);
|
||||
args.extend(["--listen-pg-tenant-only".to_owned(), listen_pg_tenant_only]);
|
||||
}
|
||||
if !self.conf.sync {
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
use crate::{
|
||||
background_process,
|
||||
local_env::{LocalEnv, NeonStorageControllerConf},
|
||||
};
|
||||
use crate::{background_process, local_env::LocalEnv};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hyper::Method;
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
NodeConfigureRequest, NodeRegisterRequest, TenantCreateResponse, TenantLocateResponse,
|
||||
@@ -16,7 +14,6 @@ use pageserver_api::{
|
||||
};
|
||||
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::Method;
|
||||
use serde::{de::DeserializeOwned, Deserialize, Serialize};
|
||||
use std::{fs, str::FromStr};
|
||||
use tokio::process::Command;
|
||||
@@ -35,13 +32,15 @@ pub struct StorageController {
|
||||
public_key: Option<String>,
|
||||
postgres_port: u16,
|
||||
client: reqwest::Client,
|
||||
config: NeonStorageControllerConf,
|
||||
}
|
||||
|
||||
const COMMAND: &str = "storage_controller";
|
||||
|
||||
const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;
|
||||
|
||||
// Use a shorter pageserver unavailability interval than the default to speed up tests.
|
||||
const NEON_LOCAL_MAX_UNAVAILABLE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct AttachHookRequest {
|
||||
pub tenant_shard_id: TenantShardId,
|
||||
@@ -136,7 +135,6 @@ impl StorageController {
|
||||
client: reqwest::ClientBuilder::new()
|
||||
.build()
|
||||
.expect("Failed to construct http client"),
|
||||
config: env.storage_controller.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -243,13 +241,9 @@ impl StorageController {
|
||||
anyhow::bail!("initdb failed with status {status}");
|
||||
}
|
||||
|
||||
// Write a minimal config file:
|
||||
// - Specify the port, since this is chosen dynamically
|
||||
// - Switch off fsync, since we're running on lightweight test environments and when e.g. scale testing
|
||||
// the storage controller we don't want a slow local disk to interfere with that.
|
||||
tokio::fs::write(
|
||||
&pg_data_path.join("postgresql.conf"),
|
||||
format!("port = {}\nfsync=off\n", self.postgres_port),
|
||||
format!("port = {}", self.postgres_port),
|
||||
)
|
||||
.await?;
|
||||
};
|
||||
@@ -278,6 +272,8 @@ impl StorageController {
|
||||
// Run migrations on every startup, in case something changed.
|
||||
let database_url = self.setup_database().await?;
|
||||
|
||||
let max_unavailable: humantime::Duration = NEON_LOCAL_MAX_UNAVAILABLE_INTERVAL.into();
|
||||
|
||||
let mut args = vec![
|
||||
"-l",
|
||||
&self.listen,
|
||||
@@ -287,7 +283,7 @@ impl StorageController {
|
||||
"--database-url",
|
||||
&database_url,
|
||||
"--max-unavailable-interval",
|
||||
&humantime::Duration::from(self.config.max_unavailable).to_string(),
|
||||
&max_unavailable.to_string(),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|s| s.to_string())
|
||||
@@ -309,10 +305,6 @@ impl StorageController {
|
||||
));
|
||||
}
|
||||
|
||||
if let Some(split_threshold) = self.config.split_threshold.as_ref() {
|
||||
args.push(format!("--split-threshold={split_threshold}"))
|
||||
}
|
||||
|
||||
background_process::start_process(
|
||||
COMMAND,
|
||||
&self.env.base_data_dir,
|
||||
@@ -387,7 +379,7 @@ impl StorageController {
|
||||
/// Simple HTTP request wrapper for calling into storage controller
|
||||
async fn dispatch<RQ, RS>(
|
||||
&self,
|
||||
method: reqwest::Method,
|
||||
method: hyper::Method,
|
||||
path: String,
|
||||
body: Option<RQ>,
|
||||
) -> anyhow::Result<RS>
|
||||
@@ -480,16 +472,6 @@ impl StorageController {
|
||||
.await
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn tenant_import(&self, tenant_id: TenantId) -> anyhow::Result<TenantCreateResponse> {
|
||||
self.dispatch::<(), TenantCreateResponse>(
|
||||
Method::POST,
|
||||
format!("debug/v1/tenant/{tenant_id}/import"),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
pub async fn tenant_locate(&self, tenant_id: TenantId) -> anyhow::Result<TenantLocateResponse> {
|
||||
self.dispatch::<(), _>(
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
[package]
|
||||
name = "storcon_cli"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
clap.workspace = true
|
||||
comfy-table.workspace = true
|
||||
futures.workspace = true
|
||||
humantime.workspace = true
|
||||
hyper.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
pageserver_client.workspace = true
|
||||
reqwest.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json = { workspace = true, features = ["raw_value"] }
|
||||
thiserror.workspace = true
|
||||
tokio.workspace = true
|
||||
tracing.workspace = true
|
||||
utils.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
|
||||
@@ -1,948 +0,0 @@
|
||||
use futures::StreamExt;
|
||||
use std::{collections::HashMap, str::FromStr, time::Duration};
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use pageserver_api::{
|
||||
controller_api::{
|
||||
NodeAvailabilityWrapper, NodeDescribeResponse, ShardSchedulingPolicy,
|
||||
TenantDescribeResponse, TenantPolicyRequest,
|
||||
},
|
||||
models::{
|
||||
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
|
||||
ShardParameters, TenantConfig, TenantConfigRequest, TenantCreateRequest,
|
||||
TenantShardSplitRequest, TenantShardSplitResponse,
|
||||
},
|
||||
shard::{ShardStripeSize, TenantShardId},
|
||||
};
|
||||
use pageserver_client::mgmt_api::{self, ResponseErrorMessageExt};
|
||||
use reqwest::{Method, StatusCode, Url};
|
||||
use serde::{de::DeserializeOwned, Serialize};
|
||||
use utils::id::{NodeId, TenantId};
|
||||
|
||||
use pageserver_api::controller_api::{
|
||||
NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy,
|
||||
TenantLocateResponse, TenantShardMigrateRequest, TenantShardMigrateResponse,
|
||||
};
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
enum Command {
|
||||
/// Register a pageserver with the storage controller. This shouldn't usually be necessary,
|
||||
/// since pageservers auto-register when they start up
|
||||
NodeRegister {
|
||||
#[arg(long)]
|
||||
node_id: NodeId,
|
||||
|
||||
#[arg(long)]
|
||||
listen_pg_addr: String,
|
||||
#[arg(long)]
|
||||
listen_pg_port: u16,
|
||||
|
||||
#[arg(long)]
|
||||
listen_http_addr: String,
|
||||
#[arg(long)]
|
||||
listen_http_port: u16,
|
||||
},
|
||||
|
||||
/// Modify a node's configuration in the storage controller
|
||||
NodeConfigure {
|
||||
#[arg(long)]
|
||||
node_id: NodeId,
|
||||
|
||||
/// Availability is usually auto-detected based on heartbeats. Set 'offline' here to
|
||||
/// manually mark a node offline
|
||||
#[arg(long)]
|
||||
availability: Option<NodeAvailabilityArg>,
|
||||
/// Scheduling policy controls whether tenant shards may be scheduled onto this node.
|
||||
#[arg(long)]
|
||||
scheduling: Option<NodeSchedulingPolicy>,
|
||||
},
|
||||
/// Modify a tenant's policies in the storage controller
|
||||
TenantPolicy {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
/// Placement policy controls whether a tenant is `detached`, has only a secondary location (`secondary`),
|
||||
/// or is in the normal attached state with N secondary locations (`attached:N`)
|
||||
#[arg(long)]
|
||||
placement: Option<PlacementPolicyArg>,
|
||||
/// Scheduling policy enables pausing the controller's scheduling activity involving this tenant. `active` is normal,
|
||||
/// `essential` disables optimization scheduling changes, `pause` disables all scheduling changes, and `stop` prevents
|
||||
/// all reconciliation activity including for scheduling changes already made. `pause` and `stop` can make a tenant
|
||||
/// unavailable, and are only for use in emergencies.
|
||||
#[arg(long)]
|
||||
scheduling: Option<ShardSchedulingPolicyArg>,
|
||||
},
|
||||
/// List nodes known to the storage controller
|
||||
Nodes {},
|
||||
/// List tenants known to the storage controller
|
||||
Tenants {},
|
||||
/// Create a new tenant in the storage controller, and by extension on pageservers.
|
||||
TenantCreate {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
/// Delete a tenant in the storage controller, and by extension on pageservers.
|
||||
TenantDelete {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
/// Split an existing tenant into a higher number of shards than its current shard count.
|
||||
TenantShardSplit {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
shard_count: u8,
|
||||
/// Optional, in 8kiB pages. e.g. set 2048 for 16MB stripes.
|
||||
#[arg(long)]
|
||||
stripe_size: Option<u32>,
|
||||
},
|
||||
/// Migrate the attached location for a tenant shard to a specific pageserver.
|
||||
TenantShardMigrate {
|
||||
#[arg(long)]
|
||||
tenant_shard_id: TenantShardId,
|
||||
#[arg(long)]
|
||||
node: NodeId,
|
||||
},
|
||||
/// Modify the pageserver tenant configuration of a tenant: this is the configuration structure
|
||||
/// that is passed through to pageservers, and does not affect storage controller behavior.
|
||||
TenantConfig {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
config: String,
|
||||
},
|
||||
/// Attempt to balance the locations for a tenant across pageservers. This is a client-side
|
||||
/// alternative to the storage controller's scheduling optimization behavior.
|
||||
TenantScatter {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
/// Print details about a particular tenant, including all its shards' states.
|
||||
TenantDescribe {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
/// For a tenant which hasn't been onboarded to the storage controller yet, add it in secondary
|
||||
/// mode so that it can warm up content on a pageserver.
|
||||
TenantWarmup {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
},
|
||||
/// Uncleanly drop a tenant from the storage controller: this doesn't delete anything from pageservers. Appropriate
|
||||
/// if you e.g. used `tenant-warmup` by mistake on a tenant ID that doesn't really exist, or is in some other region.
|
||||
TenantDrop {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
unclean: bool,
|
||||
},
|
||||
NodeDrop {
|
||||
#[arg(long)]
|
||||
node_id: NodeId,
|
||||
#[arg(long)]
|
||||
unclean: bool,
|
||||
},
|
||||
TenantSetTimeBasedEviction {
|
||||
#[arg(long)]
|
||||
tenant_id: TenantId,
|
||||
#[arg(long)]
|
||||
period: humantime::Duration,
|
||||
#[arg(long)]
|
||||
threshold: humantime::Duration,
|
||||
},
|
||||
// Drain a set of specified pageservers by moving the primary attachments to pageservers
|
||||
// outside of the specified set.
|
||||
Drain {
|
||||
// Set of pageserver node ids to drain.
|
||||
#[arg(long)]
|
||||
nodes: Vec<NodeId>,
|
||||
// Optional: migration concurrency (default is 8)
|
||||
#[arg(long)]
|
||||
concurrency: Option<usize>,
|
||||
// Optional: maximum number of shards to migrate
|
||||
#[arg(long)]
|
||||
max_shards: Option<usize>,
|
||||
// Optional: when set to true, nothing is migrated, but the plan is printed to stdout
|
||||
#[arg(long)]
|
||||
dry_run: Option<bool>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(
|
||||
author,
|
||||
version,
|
||||
about,
|
||||
long_about = "CLI for Storage Controller Support/Debug"
|
||||
)]
|
||||
#[command(arg_required_else_help(true))]
|
||||
struct Cli {
|
||||
#[arg(long)]
|
||||
/// URL to storage controller. e.g. http://127.0.0.1:1234 when using `neon_local`
|
||||
api: Url,
|
||||
|
||||
#[arg(long)]
|
||||
/// JWT token for authenticating with storage controller. Depending on the API used, this
|
||||
/// should have either `pageserverapi` or `admin` scopes: for convenience, you should mint
|
||||
/// a token with both scopes to use with this tool.
|
||||
jwt: Option<String>,
|
||||
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct PlacementPolicyArg(PlacementPolicy);
|
||||
|
||||
impl FromStr for PlacementPolicyArg {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"detached" => Ok(Self(PlacementPolicy::Detached)),
|
||||
"secondary" => Ok(Self(PlacementPolicy::Secondary)),
|
||||
_ if s.starts_with("attached:") => {
|
||||
let mut splitter = s.split(':');
|
||||
let _prefix = splitter.next().unwrap();
|
||||
match splitter.next().and_then(|s| s.parse::<usize>().ok()) {
|
||||
Some(n) => Ok(Self(PlacementPolicy::Attached(n))),
|
||||
None => Err(anyhow::anyhow!(
|
||||
"Invalid format '{s}', a valid example is 'attached:1'"
|
||||
)),
|
||||
}
|
||||
}
|
||||
_ => Err(anyhow::anyhow!(
|
||||
"Unknown placement policy '{s}', try detached,secondary,attached:<n>"
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ShardSchedulingPolicyArg(ShardSchedulingPolicy);
|
||||
|
||||
impl FromStr for ShardSchedulingPolicyArg {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"active" => Ok(Self(ShardSchedulingPolicy::Active)),
|
||||
"essential" => Ok(Self(ShardSchedulingPolicy::Essential)),
|
||||
"pause" => Ok(Self(ShardSchedulingPolicy::Pause)),
|
||||
"stop" => Ok(Self(ShardSchedulingPolicy::Stop)),
|
||||
_ => Err(anyhow::anyhow!(
|
||||
"Unknown scheduling policy '{s}', try active,essential,pause,stop"
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct NodeAvailabilityArg(NodeAvailabilityWrapper);
|
||||
|
||||
impl FromStr for NodeAvailabilityArg {
|
||||
type Err = anyhow::Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"active" => Ok(Self(NodeAvailabilityWrapper::Active)),
|
||||
"offline" => Ok(Self(NodeAvailabilityWrapper::Offline)),
|
||||
_ => Err(anyhow::anyhow!("Unknown availability state '{s}'")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Client {
|
||||
base_url: Url,
|
||||
jwt_token: Option<String>,
|
||||
client: reqwest::Client,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
fn new(base_url: Url, jwt_token: Option<String>) -> Self {
|
||||
Self {
|
||||
base_url,
|
||||
jwt_token,
|
||||
client: reqwest::ClientBuilder::new()
|
||||
.build()
|
||||
.expect("Failed to construct http client"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple HTTP request wrapper for calling into storage controller
|
||||
async fn dispatch<RQ, RS>(
|
||||
&self,
|
||||
method: Method,
|
||||
path: String,
|
||||
body: Option<RQ>,
|
||||
) -> mgmt_api::Result<RS>
|
||||
where
|
||||
RQ: Serialize + Sized,
|
||||
RS: DeserializeOwned + Sized,
|
||||
{
|
||||
// The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
|
||||
// for general purpose API access.
|
||||
let url = Url::from_str(&format!(
|
||||
"http://{}:{}/{path}",
|
||||
self.base_url.host_str().unwrap(),
|
||||
self.base_url.port().unwrap()
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let mut builder = self.client.request(method, url);
|
||||
if let Some(body) = body {
|
||||
builder = builder.json(&body)
|
||||
}
|
||||
if let Some(jwt_token) = &self.jwt_token {
|
||||
builder = builder.header(
|
||||
reqwest::header::AUTHORIZATION,
|
||||
format!("Bearer {jwt_token}"),
|
||||
);
|
||||
}
|
||||
|
||||
let response = builder.send().await.map_err(mgmt_api::Error::ReceiveBody)?;
|
||||
let response = response.error_from_body().await?;
|
||||
|
||||
response
|
||||
.json()
|
||||
.await
|
||||
.map_err(pageserver_client::mgmt_api::Error::ReceiveBody)
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
let storcon_client = Client::new(cli.api.clone(), cli.jwt.clone());
|
||||
|
||||
let mut trimmed = cli.api.to_string();
|
||||
trimmed.pop();
|
||||
let vps_client = mgmt_api::Client::new(trimmed, cli.jwt.as_deref());
|
||||
|
||||
match cli.command {
|
||||
Command::NodeRegister {
|
||||
node_id,
|
||||
listen_pg_addr,
|
||||
listen_pg_port,
|
||||
listen_http_addr,
|
||||
listen_http_port,
|
||||
} => {
|
||||
storcon_client
|
||||
.dispatch::<_, ()>(
|
||||
Method::POST,
|
||||
"control/v1/node".to_string(),
|
||||
Some(NodeRegisterRequest {
|
||||
node_id,
|
||||
listen_pg_addr,
|
||||
listen_pg_port,
|
||||
listen_http_addr,
|
||||
listen_http_port,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantCreate { tenant_id } => {
|
||||
vps_client
|
||||
.tenant_create(&TenantCreateRequest {
|
||||
new_tenant_id: TenantShardId::unsharded(tenant_id),
|
||||
generation: None,
|
||||
shard_parameters: ShardParameters::default(),
|
||||
placement_policy: Some(PlacementPolicy::Attached(1)),
|
||||
config: TenantConfig::default(),
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
Command::TenantDelete { tenant_id } => {
|
||||
let status = vps_client
|
||||
.tenant_delete(TenantShardId::unsharded(tenant_id))
|
||||
.await?;
|
||||
tracing::info!("Delete status: {}", status);
|
||||
}
|
||||
Command::Nodes {} => {
|
||||
let resp = storcon_client
|
||||
.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/node".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header(["Id", "Hostname", "Scheduling", "Availability"]);
|
||||
for node in resp {
|
||||
table.add_row([
|
||||
format!("{}", node.id),
|
||||
node.listen_http_addr,
|
||||
format!("{:?}", node.scheduling),
|
||||
format!("{:?}", node.availability),
|
||||
]);
|
||||
}
|
||||
println!("{table}");
|
||||
}
|
||||
Command::NodeConfigure {
|
||||
node_id,
|
||||
availability,
|
||||
scheduling,
|
||||
} => {
|
||||
let req = NodeConfigureRequest {
|
||||
node_id,
|
||||
availability: availability.map(|a| a.0),
|
||||
scheduling,
|
||||
};
|
||||
storcon_client
|
||||
.dispatch::<_, ()>(
|
||||
Method::PUT,
|
||||
format!("control/v1/node/{node_id}/config"),
|
||||
Some(req),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::Tenants {} => {
|
||||
let resp = storcon_client
|
||||
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/tenant".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header([
|
||||
"TenantId",
|
||||
"ShardCount",
|
||||
"StripeSize",
|
||||
"Placement",
|
||||
"Scheduling",
|
||||
]);
|
||||
for tenant in resp {
|
||||
let shard_zero = tenant.shards.into_iter().next().unwrap();
|
||||
table.add_row([
|
||||
format!("{}", tenant.tenant_id),
|
||||
format!("{}", shard_zero.tenant_shard_id.shard_count.literal()),
|
||||
format!("{:?}", tenant.stripe_size),
|
||||
format!("{:?}", tenant.policy),
|
||||
format!("{:?}", shard_zero.scheduling_policy),
|
||||
]);
|
||||
}
|
||||
|
||||
println!("{table}");
|
||||
}
|
||||
Command::TenantPolicy {
|
||||
tenant_id,
|
||||
placement,
|
||||
scheduling,
|
||||
} => {
|
||||
let req = TenantPolicyRequest {
|
||||
scheduling: scheduling.map(|s| s.0),
|
||||
placement: placement.map(|p| p.0),
|
||||
};
|
||||
storcon_client
|
||||
.dispatch::<_, ()>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_id}/policy"),
|
||||
Some(req),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantShardSplit {
|
||||
tenant_id,
|
||||
shard_count,
|
||||
stripe_size,
|
||||
} => {
|
||||
let req = TenantShardSplitRequest {
|
||||
new_shard_count: shard_count,
|
||||
new_stripe_size: stripe_size.map(ShardStripeSize),
|
||||
};
|
||||
|
||||
let response = storcon_client
|
||||
.dispatch::<TenantShardSplitRequest, TenantShardSplitResponse>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_id}/shard_split"),
|
||||
Some(req),
|
||||
)
|
||||
.await?;
|
||||
println!(
|
||||
"Split tenant {} into {} shards: {}",
|
||||
tenant_id,
|
||||
shard_count,
|
||||
response
|
||||
.new_shards
|
||||
.iter()
|
||||
.map(|s| format!("{:?}", s))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
}
|
||||
Command::TenantShardMigrate {
|
||||
tenant_shard_id,
|
||||
node,
|
||||
} => {
|
||||
let req = TenantShardMigrateRequest {
|
||||
tenant_shard_id,
|
||||
node_id: node,
|
||||
};
|
||||
|
||||
storcon_client
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{tenant_shard_id}/migrate"),
|
||||
Some(req),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantConfig { tenant_id, config } => {
|
||||
let tenant_conf = serde_json::from_str(&config)?;
|
||||
|
||||
vps_client
|
||||
.tenant_config(&TenantConfigRequest {
|
||||
tenant_id,
|
||||
config: tenant_conf,
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
Command::TenantScatter { tenant_id } => {
|
||||
// Find the shards
|
||||
let locate_response = storcon_client
|
||||
.dispatch::<(), TenantLocateResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/tenant/{tenant_id}/locate"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
let shards = locate_response.shards;
|
||||
|
||||
let mut node_to_shards: HashMap<NodeId, Vec<TenantShardId>> = HashMap::new();
|
||||
let shard_count = shards.len();
|
||||
for s in shards {
|
||||
let entry = node_to_shards.entry(s.node_id).or_default();
|
||||
entry.push(s.shard_id);
|
||||
}
|
||||
|
||||
// Load list of available nodes
|
||||
let nodes_resp = storcon_client
|
||||
.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/node".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
for node in nodes_resp {
|
||||
if matches!(node.availability, NodeAvailabilityWrapper::Active) {
|
||||
node_to_shards.entry(node.id).or_default();
|
||||
}
|
||||
}
|
||||
|
||||
let max_shard_per_node = shard_count / node_to_shards.len();
|
||||
|
||||
loop {
|
||||
let mut migrate_shard = None;
|
||||
for shards in node_to_shards.values_mut() {
|
||||
if shards.len() > max_shard_per_node {
|
||||
// Pick the emptiest
|
||||
migrate_shard = Some(shards.pop().unwrap());
|
||||
}
|
||||
}
|
||||
let Some(migrate_shard) = migrate_shard else {
|
||||
break;
|
||||
};
|
||||
|
||||
// Pick the emptiest node to migrate to
|
||||
let mut destinations = node_to_shards
|
||||
.iter()
|
||||
.map(|(k, v)| (k, v.len()))
|
||||
.collect::<Vec<_>>();
|
||||
destinations.sort_by_key(|i| i.1);
|
||||
let (destination_node, destination_count) = *destinations.first().unwrap();
|
||||
if destination_count + 1 > max_shard_per_node {
|
||||
// Even the emptiest destination doesn't have space: we're done
|
||||
break;
|
||||
}
|
||||
let destination_node = *destination_node;
|
||||
|
||||
node_to_shards
|
||||
.get_mut(&destination_node)
|
||||
.unwrap()
|
||||
.push(migrate_shard);
|
||||
|
||||
println!("Migrate {} -> {} ...", migrate_shard, destination_node);
|
||||
|
||||
storcon_client
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{migrate_shard}/migrate"),
|
||||
Some(TenantShardMigrateRequest {
|
||||
tenant_shard_id: migrate_shard,
|
||||
node_id: destination_node,
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
println!("Migrate {} -> {} OK", migrate_shard, destination_node);
|
||||
}
|
||||
|
||||
// Spread the shards across the nodes
|
||||
}
|
||||
Command::TenantDescribe { tenant_id } => {
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/tenant/{tenant_id}"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
let shards = describe_response.shards;
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.set_header(["Shard", "Attached", "Secondary", "Last error", "status"]);
|
||||
for shard in shards {
|
||||
let secondary = shard
|
||||
.node_secondary
|
||||
.iter()
|
||||
.map(|n| format!("{}", n))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",");
|
||||
|
||||
let mut status_parts = Vec::new();
|
||||
if shard.is_reconciling {
|
||||
status_parts.push("reconciling");
|
||||
}
|
||||
|
||||
if shard.is_pending_compute_notification {
|
||||
status_parts.push("pending_compute");
|
||||
}
|
||||
|
||||
if shard.is_splitting {
|
||||
status_parts.push("splitting");
|
||||
}
|
||||
let status = status_parts.join(",");
|
||||
|
||||
table.add_row([
|
||||
format!("{}", shard.tenant_shard_id),
|
||||
shard
|
||||
.node_attached
|
||||
.map(|n| format!("{}", n))
|
||||
.unwrap_or(String::new()),
|
||||
secondary,
|
||||
shard.last_error,
|
||||
status,
|
||||
]);
|
||||
}
|
||||
println!("{table}");
|
||||
}
|
||||
Command::TenantWarmup { tenant_id } => {
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/tenant/{tenant_id}"),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
match describe_response {
|
||||
Ok(describe) => {
|
||||
if matches!(describe.policy, PlacementPolicy::Secondary) {
|
||||
// Fine: it's already known to controller in secondary mode: calling
|
||||
// again to put it into secondary mode won't cause problems.
|
||||
} else {
|
||||
anyhow::bail!("Tenant already present with policy {:?}", describe.policy);
|
||||
}
|
||||
}
|
||||
Err(mgmt_api::Error::ApiError(StatusCode::NOT_FOUND, _)) => {
|
||||
// Fine: this tenant isn't know to the storage controller yet.
|
||||
}
|
||||
Err(e) => {
|
||||
// Unexpected API error
|
||||
return Err(e.into());
|
||||
}
|
||||
}
|
||||
|
||||
vps_client
|
||||
.location_config(
|
||||
TenantShardId::unsharded(tenant_id),
|
||||
pageserver_api::models::LocationConfig {
|
||||
mode: pageserver_api::models::LocationConfigMode::Secondary,
|
||||
generation: None,
|
||||
secondary_conf: Some(LocationConfigSecondary { warm: true }),
|
||||
shard_number: 0,
|
||||
shard_count: 0,
|
||||
shard_stripe_size: ShardParameters::DEFAULT_STRIPE_SIZE.0,
|
||||
tenant_conf: TenantConfig::default(),
|
||||
},
|
||||
None,
|
||||
true,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let describe_response = storcon_client
|
||||
.dispatch::<(), TenantDescribeResponse>(
|
||||
Method::GET,
|
||||
format!("control/v1/tenant/{tenant_id}"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let secondary_ps_id = describe_response
|
||||
.shards
|
||||
.first()
|
||||
.unwrap()
|
||||
.node_secondary
|
||||
.first()
|
||||
.unwrap();
|
||||
|
||||
println!("Tenant {tenant_id} warming up on pageserver {secondary_ps_id}");
|
||||
loop {
|
||||
let (status, progress) = vps_client
|
||||
.tenant_secondary_download(
|
||||
TenantShardId::unsharded(tenant_id),
|
||||
Some(Duration::from_secs(10)),
|
||||
)
|
||||
.await?;
|
||||
println!(
|
||||
"Progress: {}/{} layers, {}/{} bytes",
|
||||
progress.layers_downloaded,
|
||||
progress.layers_total,
|
||||
progress.bytes_downloaded,
|
||||
progress.bytes_total
|
||||
);
|
||||
match status {
|
||||
StatusCode::OK => {
|
||||
println!("Download complete");
|
||||
break;
|
||||
}
|
||||
StatusCode::ACCEPTED => {
|
||||
// Loop
|
||||
}
|
||||
_ => {
|
||||
anyhow::bail!("Unexpected download status: {status}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Command::TenantDrop { tenant_id, unclean } => {
|
||||
if !unclean {
|
||||
anyhow::bail!("This command is not a tenant deletion, and uncleanly drops all controller state for the tenant. If you know what you're doing, add `--unclean` to proceed.")
|
||||
}
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(
|
||||
Method::POST,
|
||||
format!("debug/v1/tenant/{tenant_id}/drop"),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Command::NodeDrop { node_id, unclean } => {
|
||||
if !unclean {
|
||||
anyhow::bail!("This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it. If you know what you're doing, add `--unclean` to proceed.")
|
||||
}
|
||||
storcon_client
|
||||
.dispatch::<(), ()>(Method::POST, format!("debug/v1/node/{node_id}/drop"), None)
|
||||
.await?;
|
||||
}
|
||||
Command::TenantSetTimeBasedEviction {
|
||||
tenant_id,
|
||||
period,
|
||||
threshold,
|
||||
} => {
|
||||
vps_client
|
||||
.tenant_config(&TenantConfigRequest {
|
||||
tenant_id,
|
||||
config: TenantConfig {
|
||||
eviction_policy: Some(EvictionPolicy::LayerAccessThreshold(
|
||||
EvictionPolicyLayerAccessThreshold {
|
||||
period: period.into(),
|
||||
threshold: threshold.into(),
|
||||
},
|
||||
)),
|
||||
..Default::default()
|
||||
},
|
||||
})
|
||||
.await?;
|
||||
}
|
||||
Command::Drain {
|
||||
nodes,
|
||||
concurrency,
|
||||
max_shards,
|
||||
dry_run,
|
||||
} => {
|
||||
// Load the list of nodes, split them up into the drained and filled sets,
|
||||
// and validate that draining is possible.
|
||||
let node_descs = storcon_client
|
||||
.dispatch::<(), Vec<NodeDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/node".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut node_to_drain_descs = Vec::new();
|
||||
let mut node_to_fill_descs = Vec::new();
|
||||
|
||||
for desc in node_descs {
|
||||
let to_drain = nodes.iter().any(|id| *id == desc.id);
|
||||
if to_drain {
|
||||
node_to_drain_descs.push(desc);
|
||||
} else {
|
||||
node_to_fill_descs.push(desc);
|
||||
}
|
||||
}
|
||||
|
||||
if nodes.len() != node_to_drain_descs.len() {
|
||||
anyhow::bail!("Drain requested for node which doesn't exist.")
|
||||
}
|
||||
|
||||
node_to_fill_descs.retain(|desc| {
|
||||
matches!(desc.availability, NodeAvailabilityWrapper::Active)
|
||||
&& matches!(
|
||||
desc.scheduling,
|
||||
NodeSchedulingPolicy::Active | NodeSchedulingPolicy::Filling
|
||||
)
|
||||
});
|
||||
|
||||
if node_to_fill_descs.is_empty() {
|
||||
anyhow::bail!("There are no nodes to drain to")
|
||||
}
|
||||
|
||||
// Set the node scheduling policy to draining for the nodes which
|
||||
// we plan to drain.
|
||||
for node_desc in node_to_drain_descs.iter() {
|
||||
let req = NodeConfigureRequest {
|
||||
node_id: node_desc.id,
|
||||
availability: None,
|
||||
scheduling: Some(NodeSchedulingPolicy::Draining),
|
||||
};
|
||||
|
||||
storcon_client
|
||||
.dispatch::<_, ()>(
|
||||
Method::PUT,
|
||||
format!("control/v1/node/{}/config", node_desc.id),
|
||||
Some(req),
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
|
||||
// Perform the drain: move each tenant shard scheduled on a node to
|
||||
// be drained to a node which is being filled. A simple round robin
|
||||
// strategy is used to pick the new node.
|
||||
let tenants = storcon_client
|
||||
.dispatch::<(), Vec<TenantDescribeResponse>>(
|
||||
Method::GET,
|
||||
"control/v1/tenant".to_string(),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let mut selected_node_idx = 0;
|
||||
|
||||
struct DrainMove {
|
||||
tenant_shard_id: TenantShardId,
|
||||
from: NodeId,
|
||||
to: NodeId,
|
||||
}
|
||||
|
||||
let mut moves: Vec<DrainMove> = Vec::new();
|
||||
|
||||
let shards = tenants
|
||||
.into_iter()
|
||||
.flat_map(|tenant| tenant.shards.into_iter());
|
||||
for shard in shards {
|
||||
if let Some(max_shards) = max_shards {
|
||||
if moves.len() >= max_shards {
|
||||
println!(
|
||||
"Stop planning shard moves since the requested maximum was reached"
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let should_migrate = {
|
||||
if let Some(attached_to) = shard.node_attached {
|
||||
node_to_drain_descs
|
||||
.iter()
|
||||
.map(|desc| desc.id)
|
||||
.any(|id| id == attached_to)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
if !should_migrate {
|
||||
continue;
|
||||
}
|
||||
|
||||
moves.push(DrainMove {
|
||||
tenant_shard_id: shard.tenant_shard_id,
|
||||
from: shard
|
||||
.node_attached
|
||||
.expect("We only migrate attached tenant shards"),
|
||||
to: node_to_fill_descs[selected_node_idx].id,
|
||||
});
|
||||
selected_node_idx = (selected_node_idx + 1) % node_to_fill_descs.len();
|
||||
}
|
||||
|
||||
let total_moves = moves.len();
|
||||
|
||||
if dry_run == Some(true) {
|
||||
println!("Dryrun requested. Planned {total_moves} moves:");
|
||||
for mv in &moves {
|
||||
println!("{}: {} -> {}", mv.tenant_shard_id, mv.from, mv.to)
|
||||
}
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
const DEFAULT_MIGRATE_CONCURRENCY: usize = 8;
|
||||
let mut stream = futures::stream::iter(moves)
|
||||
.map(|mv| {
|
||||
let client = Client::new(cli.api.clone(), cli.jwt.clone());
|
||||
async move {
|
||||
client
|
||||
.dispatch::<TenantShardMigrateRequest, TenantShardMigrateResponse>(
|
||||
Method::PUT,
|
||||
format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id),
|
||||
Some(TenantShardMigrateRequest {
|
||||
tenant_shard_id: mv.tenant_shard_id,
|
||||
node_id: mv.to,
|
||||
}),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| (mv.tenant_shard_id, mv.from, mv.to, e))
|
||||
}
|
||||
})
|
||||
.buffered(concurrency.unwrap_or(DEFAULT_MIGRATE_CONCURRENCY));
|
||||
|
||||
let mut success = 0;
|
||||
let mut failure = 0;
|
||||
|
||||
while let Some(res) = stream.next().await {
|
||||
match res {
|
||||
Ok(_) => {
|
||||
success += 1;
|
||||
}
|
||||
Err((tenant_shard_id, from, to, error)) => {
|
||||
failure += 1;
|
||||
println!(
|
||||
"Failed to migrate {} from node {} to node {}: {}",
|
||||
tenant_shard_id, from, to, error
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (success + failure) % 20 == 0 {
|
||||
println!(
|
||||
"Processed {}/{} shards: {} succeeded, {} failed",
|
||||
success + failure,
|
||||
total_moves,
|
||||
success,
|
||||
failure
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
println!(
|
||||
"Processed {}/{} shards: {} succeeded, {} failed",
|
||||
success + failure,
|
||||
total_moves,
|
||||
success,
|
||||
failure
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -99,13 +99,6 @@ name = "async-executor"
|
||||
[[bans.deny]]
|
||||
name = "smol"
|
||||
|
||||
[[bans.deny]]
|
||||
# We want to use rustls instead of the platform's native tls implementation.
|
||||
name = "native-tls"
|
||||
|
||||
[[bans.deny]]
|
||||
name = "openssl"
|
||||
|
||||
# This section is considered when running `cargo deny check sources`.
|
||||
# More documentation about the 'sources' section can be found here:
|
||||
# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
# see https://diesel.rs/guides/configuring-diesel-cli
|
||||
|
||||
[print_schema]
|
||||
file = "storage_controller/src/schema.rs"
|
||||
file = "control_plane/attachment_service/src/schema.rs"
|
||||
custom_type_derives = ["diesel::query_builder::QueryId"]
|
||||
|
||||
[migrations_directory]
|
||||
dir = "storage_controller/migrations"
|
||||
dir = "control_plane/attachment_service/migrations"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
ARG REPOSITORY=neondatabase
|
||||
ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
|
||||
ARG COMPUTE_IMAGE=compute-node-v14
|
||||
ARG TAG=latest
|
||||
|
||||
@@ -8,11 +8,6 @@ USER root
|
||||
RUN apt-get update && \
|
||||
apt-get install -y curl \
|
||||
jq \
|
||||
python3-pip \
|
||||
netcat
|
||||
#Faker is required for the pg_anon test
|
||||
RUN pip3 install Faker
|
||||
#This is required for the pg_hintplan test
|
||||
RUN mkdir -p /ext-src/pg_hint_plan-src && chown postgres /ext-src/pg_hint_plan-src
|
||||
|
||||
USER postgres
|
||||
USER postgres
|
||||
|
||||
@@ -95,7 +95,7 @@
|
||||
},
|
||||
{
|
||||
"name": "shared_preload_libraries",
|
||||
"value": "neon,pg_cron,timescaledb,pg_stat_statements",
|
||||
"value": "neon",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
@@ -127,16 +127,6 @@
|
||||
"name": "max_replication_flush_lag",
|
||||
"value": "10GB",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "cron.database",
|
||||
"value": "postgres",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "session_preload_libraries",
|
||||
"value": "anon",
|
||||
"vartype": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
version: '3'
|
||||
|
||||
services:
|
||||
minio:
|
||||
restart: always
|
||||
@@ -159,12 +161,12 @@ services:
|
||||
context: ./compute_wrapper/
|
||||
args:
|
||||
- REPOSITORY=${REPOSITORY:-neondatabase}
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-16}
|
||||
- COMPUTE_IMAGE=compute-node-v${PG_VERSION:-14}
|
||||
- TAG=${TAG:-latest}
|
||||
- http_proxy=$http_proxy
|
||||
- https_proxy=$https_proxy
|
||||
environment:
|
||||
- PG_VERSION=${PG_VERSION:-16}
|
||||
- PG_VERSION=${PG_VERSION:-14}
|
||||
#- RUST_BACKTRACE=1
|
||||
# Mount the test files directly, for faster editing cycle.
|
||||
volumes:
|
||||
@@ -192,14 +194,3 @@ services:
|
||||
done"
|
||||
depends_on:
|
||||
- compute
|
||||
|
||||
neon-test-extensions:
|
||||
profiles: ["test-extensions"]
|
||||
image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest}
|
||||
entrypoint:
|
||||
- "/bin/bash"
|
||||
- "-c"
|
||||
command:
|
||||
- sleep 1800
|
||||
depends_on:
|
||||
- compute
|
||||
|
||||
@@ -7,94 +7,54 @@
|
||||
# Implicitly accepts `REPOSITORY` and `TAG` env vars that are passed into the compose file
|
||||
# Their defaults point at DockerHub `neondatabase/neon:latest` image.`,
|
||||
# to verify custom image builds (e.g pre-published ones).
|
||||
#
|
||||
# A test script for postgres extensions
|
||||
# Currently supports only v16
|
||||
#
|
||||
|
||||
# XXX: Current does not work on M1 macs due to x86_64 Docker images compiled only, and no seccomp support in M1 Docker emulation layer.
|
||||
|
||||
set -eux -o pipefail
|
||||
|
||||
COMPOSE_FILE='docker-compose.yml'
|
||||
cd $(dirname $0)
|
||||
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
COMPOSE_FILE=$SCRIPT_DIR/docker-compose.yml
|
||||
|
||||
COMPUTE_CONTAINER_NAME=docker-compose-compute-1
|
||||
TEST_CONTAINER_NAME=docker-compose-neon-test-extensions-1
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"
|
||||
: ${http_proxy:=}
|
||||
: ${https_proxy:=}
|
||||
export http_proxy https_proxy
|
||||
SQL="CREATE TABLE t(key int primary key, value text); insert into t values(1,1); select * from t;"
|
||||
PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -c '$SQL' postgres"
|
||||
|
||||
cleanup() {
|
||||
echo "show container information"
|
||||
docker ps
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE logs
|
||||
docker compose -f $COMPOSE_FILE logs
|
||||
echo "stop containers..."
|
||||
docker compose --profile test-extensions -f $COMPOSE_FILE down
|
||||
docker compose -f $COMPOSE_FILE down
|
||||
}
|
||||
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
|
||||
for pg_version in 14 15 16; do
|
||||
echo "clean up containers if exists"
|
||||
cleanup
|
||||
PG_TEST_VERSION=$(($pg_version < 16 ? 16 : $pg_version))
|
||||
PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d
|
||||
echo "start containers (pg_version=$pg_version)."
|
||||
PG_VERSION=$pg_version docker compose -f $COMPOSE_FILE up --build -d
|
||||
|
||||
echo "wait until the compute is ready. timeout after 60s. "
|
||||
cnt=0
|
||||
while sleep 3; do
|
||||
while sleep 1; do
|
||||
# check timeout
|
||||
cnt=`expr $cnt + 3`
|
||||
cnt=`expr $cnt + 1`
|
||||
if [ $cnt -gt 60 ]; then
|
||||
echo "timeout before the compute is ready."
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
if docker compose --profile test-extensions -f $COMPOSE_FILE logs "compute_is_ready" | grep -q "accepting connections"; then
|
||||
|
||||
# check if the compute is ready
|
||||
set +o pipefail
|
||||
result=`docker compose -f $COMPOSE_FILE logs "compute_is_ready" | grep "accepting connections" | wc -l`
|
||||
set -o pipefail
|
||||
if [ $result -eq 1 ]; then
|
||||
echo "OK. The compute is ready to connect."
|
||||
echo "execute simple queries."
|
||||
docker exec $COMPUTE_CONTAINER_NAME /bin/bash -c "psql $PSQL_OPTION"
|
||||
cleanup
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $pg_version -ge 16 ]
|
||||
then
|
||||
echo Enabling trust connection
|
||||
docker exec $COMPUTE_CONTAINER_NAME bash -c "sed -i '\$d' /var/db/postgres/compute/pg_hba.conf && echo -e 'host\t all\t all\t all\t trust' >> /var/db/postgres/compute/pg_hba.conf && psql $PSQL_OPTION -c 'select pg_reload_conf()' "
|
||||
echo Adding postgres role
|
||||
docker exec $COMPUTE_CONTAINER_NAME psql $PSQL_OPTION -c "CREATE ROLE postgres SUPERUSER LOGIN"
|
||||
# This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
|
||||
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
|
||||
echo Adding dummy config
|
||||
docker exec $COMPUTE_CONTAINER_NAME touch /var/db/postgres/compute/compute_ctl_temp_override.conf
|
||||
# This block is required for the pg_anon extension test.
|
||||
# The test assumes that it is running on the same host with the postgres engine.
|
||||
# In our case it's not true, that's why we are copying files to the compute node
|
||||
TMPDIR=$(mktemp -d)
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_anon-src/data $TMPDIR/data
|
||||
echo -e '1\t too \t many \t tabs' > $TMPDIR/data/bad.csv
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/tmp/tmp_anon_alternate_data
|
||||
rm -rf $TMPDIR
|
||||
TMPDIR=$(mktemp -d)
|
||||
# The following block does the same for the pg_hintplan test
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/pg_hint_plan-src/data $TMPDIR/data
|
||||
docker cp $TMPDIR/data $COMPUTE_CONTAINER_NAME:/ext-src/pg_hint_plan-src/
|
||||
rm -rf $TMPDIR
|
||||
# We are running tests now
|
||||
if docker exec -e SKIP=rum-src,timescaledb-src,rdkit-src,postgis-src,pgx_ulid-src,pgtap-src,pg_tiktoken-src,pg_jsonschema-src,pg_graphql-src,kq_imcx-src,wal2json_2_5-src \
|
||||
$TEST_CONTAINER_NAME /run-tests.sh | tee testout.txt
|
||||
then
|
||||
cleanup
|
||||
else
|
||||
FAILED=$(tail -1 testout.txt)
|
||||
for d in $FAILED
|
||||
do
|
||||
mkdir $d
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.diffs $d || true
|
||||
docker cp $TEST_CONTAINER_NAME:/ext-src/$d/regression.out $d || true
|
||||
cat $d/regression.out $d/regression.diffs || true
|
||||
done
|
||||
rm -rf $FAILED
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
cleanup
|
||||
done
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
cd /ext-src
|
||||
FAILED=
|
||||
LIST=$((echo ${SKIP} | sed 's/,/\n/g'; ls -d *-src) | sort | uniq -u)
|
||||
for d in ${LIST}
|
||||
do
|
||||
[ -d ${d} ] || continue
|
||||
psql -c "select 1" >/dev/null || break
|
||||
make -C ${d} installcheck || FAILED="${d} ${FAILED}"
|
||||
done
|
||||
[ -z "${FAILED}" ] && exit 0
|
||||
echo ${FAILED}
|
||||
exit 1
|
||||
@@ -4,18 +4,18 @@
|
||||
|
||||
Currently we build two main images:
|
||||
|
||||
- [neondatabase/neon](https://hub.docker.com/repository/docker/neondatabase/neon) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14.
|
||||
- [neondatabase/neon](https://hub.docker.com/repository/docker/zenithdb/zenith) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
|
||||
- [neondatabase/compute-node](https://hub.docker.com/repository/docker/zenithdb/compute-node) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres).
|
||||
|
||||
And additional intermediate image:
|
||||
|
||||
- [neondatabase/compute-tools](https://hub.docker.com/repository/docker/neondatabase/compute-tools) — compute node configuration management tools.
|
||||
|
||||
## Build pipeline
|
||||
## Building pipeline
|
||||
|
||||
We build all images after a successful `release` tests run and push automatically to Docker Hub with two parallel CI jobs
|
||||
|
||||
1. `neondatabase/compute-tools` and `neondatabase/compute-node-v16` (and -v15 and -v14)
|
||||
1. `neondatabase/compute-tools` and `neondatabase/compute-node`
|
||||
|
||||
2. `neondatabase/neon`
|
||||
|
||||
@@ -34,12 +34,12 @@ You can see a [docker compose](https://docs.docker.com/compose/) example to crea
|
||||
1. create containers
|
||||
|
||||
You can specify version of neon cluster using following environment values.
|
||||
- PG_VERSION: postgres version for compute (default is 16 as of this writing)
|
||||
- TAG: the tag version of [docker image](https://registry.hub.docker.com/r/neondatabase/neon/tags), which is tagged in [CI test](/.github/workflows/build_and_test.yml). Default is 'latest'
|
||||
- PG_VERSION: postgres version for compute (default is 14)
|
||||
- TAG: the tag version of [docker image](https://registry.hub.docker.com/r/neondatabase/neon/tags) (default is latest), which is tagged in [CI test](/.github/workflows/build_and_test.yml)
|
||||
```
|
||||
$ cd docker-compose/
|
||||
$ docker-compose down # remove the containers if exists
|
||||
$ PG_VERSION=16 TAG=latest docker-compose up --build -d # You can specify the postgres and image version
|
||||
$ PG_VERSION=15 TAG=2937 docker-compose up --build -d # You can specify the postgres and image version
|
||||
Creating network "dockercompose_default" with the default driver
|
||||
Creating docker-compose_storage_broker_1 ... done
|
||||
(...omit...)
|
||||
@@ -47,31 +47,29 @@ Creating docker-compose_storage_broker_1 ... done
|
||||
|
||||
2. connect compute node
|
||||
```
|
||||
$ psql postgresql://cloud_admin:cloud_admin@localhost:55433/postgres
|
||||
psql (16.3)
|
||||
Type "help" for help.
|
||||
|
||||
$ echo "localhost:55433:postgres:cloud_admin:cloud_admin" >> ~/.pgpass
|
||||
$ chmod 600 ~/.pgpass
|
||||
$ psql -h localhost -p 55433 -U cloud_admin
|
||||
postgres=# CREATE TABLE t(key int primary key, value text);
|
||||
CREATE TABLE
|
||||
postgres=# insert into t values(1, 1);
|
||||
postgres=# insert into t values(1,1);
|
||||
INSERT 0 1
|
||||
postgres=# select * from t;
|
||||
key | value
|
||||
key | value
|
||||
-----+-------
|
||||
1 | 1
|
||||
(1 row)
|
||||
|
||||
```
|
||||
|
||||
3. If you want to see the log, you can use `docker-compose logs` command.
|
||||
```
|
||||
# check the container name you want to see
|
||||
$ docker ps
|
||||
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
3582f6d76227 docker-compose_compute "/shell/compute.sh" 2 minutes ago Up 2 minutes 0.0.0.0:3080->3080/tcp, :::3080->3080/tcp, 0.0.0.0:55433->55433/tcp, :::55433->55433/tcp docker-compose_compute_1
|
||||
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
d6968a5ae912 dockercompose_compute "/shell/compute.sh" 5 minutes ago Up 5 minutes 0.0.0.0:3080->3080/tcp, 0.0.0.0:55433->55433/tcp dockercompose_compute_1
|
||||
(...omit...)
|
||||
|
||||
$ docker logs -f docker-compose_compute_1
|
||||
$ docker logs -f dockercompose_compute_1
|
||||
2022-10-21 06:15:48.757 GMT [56] LOG: connection authorized: user=cloud_admin database=postgres application_name=psql
|
||||
2022-10-21 06:17:00.307 GMT [56] LOG: [NEON_SMGR] libpagestore: connected to 'host=pageserver port=6400'
|
||||
(...omit...)
|
||||
|
||||
54
docs/rfcs/033-merged-compute-image.md
Normal file
54
docs/rfcs/033-merged-compute-image.md
Normal file
@@ -0,0 +1,54 @@
|
||||
## Merged compute image
|
||||
https://github.com/neondatabase/neon/issues/6685
|
||||
|
||||
### Motivation:
|
||||
It's hard to manage compute pools for 3 Postgres versions.
|
||||
(we have a compute image for each version of Postgres (currently, it's 3 for neonVM and 3 for k8s pods; eventually, we will have only neonVMs)).
|
||||
We can try putting all Postgres versions into a single image, which should dramatically improve pool usage.
|
||||
|
||||
### TODO
|
||||
#### Compute code changes:
|
||||
|
||||
1. Create merged compute image https://github.com/neondatabase/neon/pull/6808
|
||||
2. Pass compute version in spec from control-plane
|
||||
3. Change path to the postgres in compute_ctl. Now it is not specified explicitly.
|
||||
`compute_ctl` has `pgbin` and `pgdata` arguments, now they are used only in tests.
|
||||
3. Make changes to custom_extension code - fix path handling.
|
||||
|
||||
#### Control-plane changes:
|
||||
1. Pass compute version in spec from control-plane
|
||||
2. Remove old logic of VM pools management
|
||||
|
||||
#### Prewarm changes:
|
||||
Currently, for pooled VMs, we prewarm postgres to improve cold start speed
|
||||
```
|
||||
// If this is a pooled VM, prewarm before starting HTTP server and becoming
|
||||
// available for binding. Prewarming helps Postgres start quicker later,
|
||||
// because QEMU will already have it's memory allocated from the host, and
|
||||
// the necessary binaries will already be cached.
|
||||
```
|
||||
|
||||
Prewarm = initdb + start postgres + rm pgdata
|
||||
|
||||
Q: How should we do prewarm, if we don't know in adwance, what version of postgres will be used?
|
||||
I see two options:
|
||||
- use versioned pgdata directories and run prewarm operations for all existing versions.
|
||||
- chose "default_version" for each pooled VM and run prewarm. Try to start compute in pooled VM with matching version, in case it doesn't exist, spin compute in any existing VM. Start will be slower, because it is not prewarmed.
|
||||
|
||||
#### Extensions support
|
||||
To support merged compute image (image, containing all supported versions of postgres),
|
||||
we need to offload extensions from the image. We can implement this using "custom extensions" mechanism.
|
||||
|
||||
Custom extensions changes:
|
||||
1. We need to move all extensions from main compute image file to the build-custom-extensions repo
|
||||
2. We need to generate spec for all public extensions and pass it to compute image
|
||||
Spec contains information about files in the extension and paths,
|
||||
and also content of the control file. Currently it is set manually per-user, for single users that use "rare" custom extensions. We need to improve spec passing.
|
||||
For public extensions, we can embed this spec into compute image: use artifact from build-custom-extension CI step and put it into compute image.
|
||||
|
||||
3. We need to test performance of the extension downloading and ensure that it doesn't affect cold starts (with proxy the speed should be fine).
|
||||
4. Note that in this task we are not trying to solve extension versioning issue and assume that all extensions are mapped to compute images 1-1 as they are now.
|
||||
|
||||
#### Test changes:
|
||||
- This is general functionality and will be covered by e2e tests.
|
||||
- We will need to add test for extensions, to ensure that they are available for every new compute version. Don't need to run extension regression tests here. Just ensure that `CREATE EXTENSION ext;` works.
|
||||
@@ -1,139 +0,0 @@
|
||||
# Postgres Bundle for Pageserver
|
||||
|
||||
Created on 2024-06-17
|
||||
|
||||
## Summary
|
||||
|
||||
This RFC defines the responsibilities of Compute and Storage team regarding the
|
||||
build & deployment of the Postgres code that Pageserver must run
|
||||
(`initdb`, `postgres --wal-redo`).
|
||||
|
||||
## Motivation
|
||||
|
||||
Pageserver has to run Postgres binaries to do its job, specifically
|
||||
|
||||
* `initdb`
|
||||
* `postgres --wal-redo` mode
|
||||
|
||||
Currently there is **no clear ownership** of
|
||||
* how these binaries are built
|
||||
* including, critically, dynamic linkage against other libraries such as `libicu`
|
||||
* what build of the binaries ends up running on Pageservers
|
||||
* how the binaries and runtime dependencies (e.g., shared libraries) are delivered to Pageservers
|
||||
|
||||
Further, these binaries have dependencies (e.g., libicu) which
|
||||
1. prevent the Storage team from switching Pageserver distro and/or version, and
|
||||
2. some dependencies impact compatibility between Storage and Compute (e.g., [libicu version impacts collation incompatibilty](https://github.com/neondatabase/neon/pull/8074))
|
||||
3. some dependencies can cause database corruption if updated carelessly (locale => libc)
|
||||
|
||||
## Why Is This Worth Solving
|
||||
|
||||
1. Clearly defined ownership generally boosts execution speed & bug triage.
|
||||
* Example for why execution speed matters: CVE in dependency => who takes care of patching & updating.
|
||||
2. Centralize understanding of risks involved with some dependencies.
|
||||
Currently, there is no team clearly responsible for assessing / tracking the risks. As a reminder from previous section, these are
|
||||
* runtime incompatibilities
|
||||
* database corruption
|
||||
|
||||
Also, it is an unlock for additional future value, see "Future Work" section.
|
||||
|
||||
## Impacted components (e.g. pageserver, safekeeper, console, etc)
|
||||
|
||||
Pageserver (neon.git)
|
||||
Compute (neon.git)
|
||||
Deployment process (aws.git)
|
||||
|
||||
## Design
|
||||
|
||||
The basic interface between Compute and Storage team is as follows:
|
||||
|
||||
* Compute team publishes a "bundle" of the binaries required by Pageserver
|
||||
* Storage team uses a pinned bundle in the Pageserver build process
|
||||
* Storage team code review is required to update the pinned version
|
||||
|
||||
The "bundle" provides an interface agreed upon by Compute and Storage teams to run
|
||||
* for each supported Postgres version at Neon (v14, v15, v16, ...)
|
||||
* the `initdb` process
|
||||
* behaving like a vanilla Postgres `initdb`
|
||||
* `postgres --wal-redo` mode process
|
||||
* following the walredo protocol specified elsewhere
|
||||
|
||||
The bundle is self-contained, i.e., it behaves the same way on any Linux system.
|
||||
The only ambient runtime dependency is the Linux kernel.
|
||||
The minimum Linux kernel version is 5.10.
|
||||
|
||||
### Variant 1: bundle = fully statically linked binaries
|
||||
The "bundle" is a tarball of fully statically linked binaries
|
||||
|
||||
```
|
||||
v14/initdb
|
||||
v14/postgres
|
||||
v15/initdb
|
||||
v15/postgres
|
||||
v16/initdb
|
||||
v16/postgres
|
||||
...
|
||||
```
|
||||
|
||||
The directory structure is part of the interface.
|
||||
|
||||
### Variant 2: bundle = chrooted directory
|
||||
|
||||
The "bundle" is a tarball that contains all sorts of files, plus a launcher script.
|
||||
|
||||
```
|
||||
LAUNCHER
|
||||
storage
|
||||
storage/does
|
||||
storage/does/not
|
||||
storage/does/not/care
|
||||
```
|
||||
|
||||
To launch `initdb` or `postgres --wal-redo`, the Pageserver does
|
||||
1. fork child process
|
||||
2. `chroot` into the extracted directory
|
||||
3. inside the chroot, run `/LAUNCHER VERSION PG_BINARY [FLAGS...]`
|
||||
4. The `LAUNCHER` script sets up library search paths, etc, and then `exec`s the correct binary
|
||||
|
||||
We acknowledge this is half-way reinventing OCI + linux containers.
|
||||
However, our needs are much simpler than what OCI & Docker provide.
|
||||
Specifically, we do not want Pageserver to be runtime-dependent on e.g. Docker as the launcher.
|
||||
|
||||
The `chroot` is to enforce that the "bundle" be self-contained.
|
||||
The special path `/inout` int he bundle is reserved, e.g., for `initdb` output.
|
||||
|
||||
### Variant 3: ???
|
||||
|
||||
Your design here, feedback welcome.
|
||||
|
||||
## Security implications
|
||||
|
||||
It's an improvement because a single team (Compute) will be responsible for runtime dependencies.
|
||||
|
||||
## Implementation & Rollout
|
||||
|
||||
Storage and Compute teams agree on a bundle definition.
|
||||
|
||||
Compute team changes their build process to produce both
|
||||
1. existing: compute image / vm compute image
|
||||
2. existing: pg_install tarball (currently built by `neon.git:Dockerfile`)
|
||||
2. new: the bundle
|
||||
|
||||
Storage makes `neon.git` Pageserver changes to support using bundle (behind feature flag).
|
||||
With feature flag disabled, existing `pg_install` tarball is used instead.
|
||||
|
||||
Storage & infra make `aws.git` changes to deploy bundle to pageservers, with feature flag disabled.
|
||||
|
||||
Storage team does gradual rollout.
|
||||
|
||||
Storage & infra teams remove support for `pg_install`, delete it from the nodes (experimentation in staging to ensure no hidden runtime deps!)
|
||||
|
||||
Compute team stops producing `pg_install` tarball.
|
||||
|
||||
|
||||
## Future Work
|
||||
|
||||
We know that we can easily make pageserver fully statically linked.
|
||||
Together with the self-contained "bundle" proposed above, Pageserver can then be deployed to different OSes.
|
||||
For example, we have been entertaining the idea of trying Amazon Linux instead of Debian for Pageserver.
|
||||
That experiment would be a lot simpler.
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user