Compare commits

..

1 Commits

Author SHA1 Message Date
Anna Khanova
136ed19387 Test 2024-03-27 13:42:33 +01:00
717 changed files with 32625 additions and 88528 deletions

View File

@@ -23,30 +23,10 @@ platforms = [
] ]
[final-excludes] [final-excludes]
workspace-members = [ # vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
# vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but # it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
# it is built primarly in separate repo neondatabase/autoscaling and thus is excluded # from depending on workspace-hack because most of the dependencies are not used.
# from depending on workspace-hack because most of the dependencies are not used. workspace-members = ["vm_monitor"]
"vm_monitor",
# All of these exist in libs and are not usually built independently.
# Putting workspace hack there adds a bottleneck for cargo builds.
"compute_api",
"consumption_metrics",
"desim",
"metrics",
"pageserver_api",
"postgres_backend",
"postgres_connection",
"postgres_ffi",
"pq_proto",
"remote_storage",
"safekeeper_api",
"tenant_size_model",
"tracing-utils",
"utils",
"wal_craft",
"walproposer",
]
# Write out exact versions rather than a semver range. (Defaults to false.) # Write out exact versions rather than a semver range. (Defaults to false.)
# exact-versions = true # exact-versions = true

View File

@@ -1,2 +1,2 @@
[profile.default] [profile.default]
slow-timeout = { period = "60s", terminate-after = 3 } slow-timeout = { period = "20s", terminate-after = 3 }

View File

@@ -8,7 +8,6 @@
!scripts/combine_control_files.py !scripts/combine_control_files.py
!scripts/ninstall.sh !scripts/ninstall.sh
!vm-cgconfig.conf !vm-cgconfig.conf
!docker-compose/run-tests.sh
# Directories # Directories
!.cargo/ !.cargo/
@@ -18,13 +17,11 @@
!libs/ !libs/
!neon_local/ !neon_local/
!pageserver/ !pageserver/
!patches/
!pgxn/ !pgxn/
!proxy/ !proxy/
!storage_scrubber/ !s3_scrubber/
!safekeeper/ !safekeeper/
!storage_broker/ !storage_broker/
!storage_controller/
!trace/ !trace/
!vendor/postgres-*/ !vendor/postgres-*/
!workspace_hack/ !workspace_hack/

2
.gitattributes vendored
View File

@@ -1,2 +0,0 @@
# allows for nicer hunk headers with git show
*.rs diff=rust

View File

@@ -1,15 +1,14 @@
self-hosted-runner: self-hosted-runner:
labels: labels:
- arm64 - arm64
- dev
- gen3
- large - large
- large-arm64 # Remove `macos-14` from the list after https://github.com/rhysd/actionlint/pull/392 is merged.
- macos-14
- small - small
- small-arm64
- us-east-2 - us-east-2
config-variables: config-variables:
- BENCHMARK_PROJECT_ID_PUB
- BENCHMARK_PROJECT_ID_SUB
- REMOTE_STORAGE_AZURE_CONTAINER - REMOTE_STORAGE_AZURE_CONTAINER
- REMOTE_STORAGE_AZURE_REGION - REMOTE_STORAGE_AZURE_REGION
- SLACK_UPCOMING_RELEASE_CHANNEL_ID - SLACK_UPCOMING_RELEASE_CHANNEL_ID
- DEV_AWS_OIDC_ROLE_ARN

View File

@@ -150,7 +150,7 @@ runs:
# Use aws s3 cp (instead of aws s3 sync) to keep files from previous runs to make old URLs work, # Use aws s3 cp (instead of aws s3 sync) to keep files from previous runs to make old URLs work,
# and to keep files on the host to upload them to the database # and to keep files on the host to upload them to the database
time s5cmd --log error cp "${WORKDIR}/report/*" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}/" time aws s3 cp --recursive --only-show-errors "${WORKDIR}/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
# Generate redirect # Generate redirect
cat <<EOF > ${WORKDIR}/index.html cat <<EOF > ${WORKDIR}/index.html
@@ -183,7 +183,7 @@ runs:
uses: actions/cache@v4 uses: actions/cache@v4
with: with:
path: ~/.cache/pypoetry/virtualenvs path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }} key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Store Allure test stat in the DB (new) - name: Store Allure test stat in the DB (new)
if: ${{ !cancelled() && inputs.store-test-results-into-db == 'true' }} if: ${{ !cancelled() && inputs.store-test-results-into-db == 'true' }}

View File

@@ -26,7 +26,7 @@ runs:
TARGET: ${{ inputs.path }} TARGET: ${{ inputs.path }}
ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }} SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id, github.run_attempt) }} PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
run: | run: |
BUCKET=neon-github-public-dev BUCKET=neon-github-public-dev
FILENAME=$(basename $ARCHIVE) FILENAME=$(basename $ARCHIVE)

View File

@@ -3,14 +3,14 @@ description: 'Create Branch using API'
inputs: inputs:
api_key: api_key:
description: 'Neon API key' desctiption: 'Neon API key'
required: true required: true
project_id: project_id:
description: 'ID of the Project to create Branch in' desctiption: 'ID of the Project to create Branch in'
required: true required: true
api_host: api_host:
description: 'Neon API host' desctiption: 'Neon API host'
default: console-stage.neon.build default: console.stage.neon.tech
outputs: outputs:
dsn: dsn:
description: 'Created Branch DSN (for main database)' description: 'Created Branch DSN (for main database)'

View File

@@ -3,17 +3,17 @@ description: 'Delete Branch using API'
inputs: inputs:
api_key: api_key:
description: 'Neon API key' desctiption: 'Neon API key'
required: true required: true
project_id: project_id:
description: 'ID of the Project which should be deleted' desctiption: 'ID of the Project which should be deleted'
required: true required: true
branch_id: branch_id:
description: 'ID of the branch to delete' desctiption: 'ID of the branch to delete'
required: true required: true
api_host: api_host:
description: 'Neon API host' desctiption: 'Neon API host'
default: console-stage.neon.build default: console.stage.neon.tech
runs: runs:
using: "composite" using: "composite"

View File

@@ -3,19 +3,22 @@ description: 'Create Neon Project using API'
inputs: inputs:
api_key: api_key:
description: 'Neon API key' desctiption: 'Neon API key'
required: true required: true
region_id: region_id:
description: 'Region ID, if not set the project will be created in the default region' desctiption: 'Region ID, if not set the project will be created in the default region'
default: aws-us-east-2 default: aws-us-east-2
postgres_version: postgres_version:
description: 'Postgres version; default is 16' desctiption: 'Postgres version; default is 15'
default: '16' default: 15
api_host: api_host:
description: 'Neon API host' desctiption: 'Neon API host'
default: console-stage.neon.build default: console.stage.neon.tech
provisioner:
desctiption: 'k8s-pod or k8s-neonvm'
default: 'k8s-pod'
compute_units: compute_units:
description: '[Min, Max] compute units' desctiption: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
default: '[1, 1]' default: '[1, 1]'
outputs: outputs:
@@ -34,6 +37,10 @@ runs:
# A shell without `set -x` to not to expose password/dsn in logs # A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0} shell: bash -euo pipefail {0}
run: | run: |
if [ "${PROVISIONER}" == "k8s-pod" ] && [ "${MIN_CU}" != "${MAX_CU}" ]; then
echo >&2 "For k8s-pod provisioner MIN_CU should be equal to MAX_CU"
fi
project=$(curl \ project=$(curl \
"https://${API_HOST}/api/v2/projects" \ "https://${API_HOST}/api/v2/projects" \
--fail \ --fail \
@@ -45,7 +52,7 @@ runs:
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\", \"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
\"pg_version\": ${POSTGRES_VERSION}, \"pg_version\": ${POSTGRES_VERSION},
\"region_id\": \"${REGION_ID}\", \"region_id\": \"${REGION_ID}\",
\"provisioner\": \"k8s-neonvm\", \"provisioner\": \"${PROVISIONER}\",
\"autoscaling_limit_min_cu\": ${MIN_CU}, \"autoscaling_limit_min_cu\": ${MIN_CU},
\"autoscaling_limit_max_cu\": ${MAX_CU}, \"autoscaling_limit_max_cu\": ${MAX_CU},
\"settings\": { } \"settings\": { }
@@ -68,5 +75,6 @@ runs:
API_KEY: ${{ inputs.api_key }} API_KEY: ${{ inputs.api_key }}
REGION_ID: ${{ inputs.region_id }} REGION_ID: ${{ inputs.region_id }}
POSTGRES_VERSION: ${{ inputs.postgres_version }} POSTGRES_VERSION: ${{ inputs.postgres_version }}
PROVISIONER: ${{ inputs.provisioner }}
MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }} MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }} MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}

View File

@@ -3,14 +3,14 @@ description: 'Delete Neon Project using API'
inputs: inputs:
api_key: api_key:
description: 'Neon API key' desctiption: 'Neon API key'
required: true required: true
project_id: project_id:
description: 'ID of the Project to delete' desctiption: 'ID of the Project to delete'
required: true required: true
api_host: api_host:
description: 'Neon API host' desctiption: 'Neon API host'
default: console-stage.neon.build default: console.stage.neon.tech
runs: runs:
using: "composite" using: "composite"

View File

@@ -43,7 +43,7 @@ inputs:
pg_version: pg_version:
description: 'Postgres version to use for tests' description: 'Postgres version to use for tests'
required: false required: false
default: 'v16' default: 'v14'
benchmark_durations: benchmark_durations:
description: 'benchmark durations JSON' description: 'benchmark durations JSON'
required: false required: false
@@ -56,14 +56,14 @@ runs:
if: inputs.build_type != 'remote' if: inputs.build_type != 'remote'
uses: ./.github/actions/download uses: ./.github/actions/download
with: with:
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon path: /tmp/neon
- name: Download Neon binaries for the previous release - name: Download Neon binaries for the previous release
if: inputs.build_type != 'remote' if: inputs.build_type != 'remote'
uses: ./.github/actions/download uses: ./.github/actions/download
with: with:
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
path: /tmp/neon-previous path: /tmp/neon-previous
prefix: latest prefix: latest
@@ -83,12 +83,13 @@ runs:
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
fetch-depth: 1
- name: Cache poetry deps - name: Cache poetry deps
uses: actions/cache@v4 uses: actions/cache@v4
with: with:
path: ~/.cache/pypoetry/virtualenvs path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }} key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
- name: Install Python deps - name: Install Python deps
shell: bash -euxo pipefail {0} shell: bash -euxo pipefail {0}
@@ -113,8 +114,6 @@ runs:
export PLATFORM=${PLATFORM:-github-actions-selfhosted} export PLATFORM=${PLATFORM:-github-actions-selfhosted}
export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install} export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
export DEFAULT_PG_VERSION=${PG_VERSION#v} export DEFAULT_PG_VERSION=${PG_VERSION#v}
export LD_LIBRARY_PATH=${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/lib
export BENCHMARK_CONNSTR=${BENCHMARK_CONNSTR:-}
if [ "${BUILD_TYPE}" = "remote" ]; then if [ "${BUILD_TYPE}" = "remote" ]; then
export REMOTE_ENV=1 export REMOTE_ENV=1
@@ -130,8 +129,8 @@ runs:
exit 1 exit 1
fi fi
if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
# -n sets the number of parallel processes that pytest-xdist will run # -n16 uses sixteen processes to run tests via pytest-xdist
EXTRA_PARAMS="-n12 $EXTRA_PARAMS" EXTRA_PARAMS="-n16 $EXTRA_PARAMS"
# --dist=loadgroup points tests marked with @pytest.mark.xdist_group # --dist=loadgroup points tests marked with @pytest.mark.xdist_group
# to the same worker to make @pytest.mark.order work with xdist # to the same worker to make @pytest.mark.order work with xdist
@@ -169,28 +168,23 @@ runs:
EXTRA_PARAMS="--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS" EXTRA_PARAMS="--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS"
fi fi
if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then if [[ "${{ inputs.build_type }}" == "debug" ]]; then
cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run) cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
elif [[ "${{ inputs.build_type }}" == "release" ]]; then
cov_prefix=()
else else
cov_prefix=() cov_prefix=()
fi fi
# Wake up the cluster if we use remote neon instance # Wake up the cluster if we use remote neon instance
if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then
QUERIES=("SELECT version()") ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
QUERIES+=("SHOW neon.tenant_id")
QUERIES+=("SHOW neon.timeline_id")
fi
for q in "${QUERIES[@]}"; do
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "${q}"
done
fi fi
# Run the tests. # Run the tests.
# #
# --alluredir saves test results in Allure format (in a specified directory) # The junit.xml file allows CI tools to display more fine-grained test information
# in its "Tests" tab in the results page.
# --verbose prints name of each test (helpful when there are # --verbose prints name of each test (helpful when there are
# multiple tests in one file) # multiple tests in one file)
# -rA prints summary in the end # -rA prints summary in the end
@@ -199,6 +193,7 @@ runs:
# #
mkdir -p $TEST_OUTPUT/allure/results mkdir -p $TEST_OUTPUT/allure/results
"${cov_prefix[@]}" ./scripts/pytest \ "${cov_prefix[@]}" ./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--alluredir=$TEST_OUTPUT/allure/results \ --alluredir=$TEST_OUTPUT/allure/results \
--tb=short \ --tb=short \
--verbose \ --verbose \

View File

@@ -1,36 +0,0 @@
name: "Set custom docker config directory"
description: "Create a directory for docker config and set DOCKER_CONFIG"
# Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
runs:
using: "composite"
steps:
- name: Show warning on GitHub-hosted runners
if: runner.environment == 'github-hosted'
shell: bash -euo pipefail {0}
run: |
# Using the following environment variables to find a path to the workflow file
# ${GITHUB_WORKFLOW_REF} - octocat/hello-world/.github/workflows/my-workflow.yml@refs/heads/my_branch
# ${GITHUB_REPOSITORY} - octocat/hello-world
# ${GITHUB_REF} - refs/heads/my_branch
# From https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/variables
filename_with_ref=${GITHUB_WORKFLOW_REF#"$GITHUB_REPOSITORY/"}
filename=${filename_with_ref%"@$GITHUB_REF"}
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-a-warning-message
title='Unnecessary usage of `.github/actions/set-docker-config-dir`'
message='No need to use `.github/actions/set-docker-config-dir` action on GitHub-hosted runners'
echo "::warning file=${filename},title=${title}::${message}"
- uses: pyTooling/Actions/with-post-step@74afc5a42a17a046c90c68cb5cfa627e5c6c5b6b # v1.0.7
env:
DOCKER_CONFIG: .docker-custom-${{ github.run_id }}-${{ github.run_attempt }}
with:
main: |
mkdir -p "${DOCKER_CONFIG}"
echo DOCKER_CONFIG=${DOCKER_CONFIG} | tee -a $GITHUB_ENV
post: |
if [ -d "${DOCKER_CONFIG}" ]; then
rm -r "${DOCKER_CONFIG}"
fi

View File

@@ -8,7 +8,7 @@ inputs:
description: "A directory or file to upload" description: "A directory or file to upload"
required: true required: true
prefix: prefix:
description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'" description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
required: false required: false
runs: runs:
@@ -45,7 +45,7 @@ runs:
env: env:
SOURCE: ${{ inputs.path }} SOURCE: ${{ inputs.path }}
ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id , github.run_attempt) }} PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
run: | run: |
BUCKET=neon-github-public-dev BUCKET=neon-github-public-dev
FILENAME=$(basename $ARCHIVE) FILENAME=$(basename $ARCHIVE)

View File

@@ -1,154 +0,0 @@
name: Prepare benchmarking databases by restoring dumps
on:
workflow_call:
# no inputs needed
defaults:
run:
shell: bash -euxo pipefail {0}
jobs:
setup-databases:
strategy:
fail-fast: false
matrix:
platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon ]
database: [ clickbench, tpch, userexample ]
env:
LD_LIBRARY_PATH: /tmp/neon/pg_install/v16/lib
PLATFORM: ${{ matrix.platform }}
PG_BINARIES: /tmp/neon/pg_install/v16/bin
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
options: --init
steps:
- name: Set up Connection String
id: set-up-prep-connstr
run: |
case "${PLATFORM}" in
neon)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
;;
aws-rds-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
;;
aws-aurora-serverless-v2-postgres)
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
- uses: actions/checkout@v4
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
# we create a table that has one row for each database that we want to restore with the status whether the restore is done
- name: Create benchmark_restore_status table if it does not exist
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
DATABASE_NAME: ${{ matrix.database }}
# to avoid a race condition of multiple jobs trying to create the table at the same time,
# we use an advisory lock
run: |
${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "
SELECT pg_advisory_lock(4711);
CREATE TABLE IF NOT EXISTS benchmark_restore_status (
databasename text primary key,
restore_done boolean
);
SELECT pg_advisory_unlock(4711);
"
- name: Check if restore is already done
id: check-restore-done
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
DATABASE_NAME: ${{ matrix.database }}
run: |
skip=false
if ${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -tAc "SELECT 1 FROM benchmark_restore_status WHERE databasename='${{ env.DATABASE_NAME }}' AND restore_done=true;" | grep -q 1; then
echo "Restore already done for database ${{ env.DATABASE_NAME }} on platform ${{ env.PLATFORM }}. Skipping this database."
skip=true
fi
echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
- name: Check and create database if it does not exist
if: steps.check-restore-done.outputs.skip != 'true'
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
DATABASE_NAME: ${{ matrix.database }}
run: |
DB_EXISTS=$(${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -tAc "SELECT 1 FROM pg_database WHERE datname='${{ env.DATABASE_NAME }}'")
if [ "$DB_EXISTS" != "1" ]; then
echo "Database ${{ env.DATABASE_NAME }} does not exist. Creating it..."
${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "CREATE DATABASE \"${{ env.DATABASE_NAME }}\";"
else
echo "Database ${{ env.DATABASE_NAME }} already exists."
fi
- name: Download dump from S3 to /tmp/dumps
if: steps.check-restore-done.outputs.skip != 'true'
env:
DATABASE_NAME: ${{ matrix.database }}
run: |
mkdir -p /tmp/dumps
aws s3 cp s3://neon-github-dev/performance/pgdumps/$DATABASE_NAME/$DATABASE_NAME.pg_dump /tmp/dumps/
- name: Replace database name in connection string
if: steps.check-restore-done.outputs.skip != 'true'
id: replace-dbname
env:
DATABASE_NAME: ${{ matrix.database }}
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
run: |
# Extract the part before the database name
base_connstr="${BENCHMARK_CONNSTR%/*}"
# Extract the query parameters (if any) after the database name
query_params="${BENCHMARK_CONNSTR#*\?}"
# Reconstruct the new connection string
if [ "$query_params" != "$BENCHMARK_CONNSTR" ]; then
new_connstr="${base_connstr}/${DATABASE_NAME}?${query_params}"
else
new_connstr="${base_connstr}/${DATABASE_NAME}"
fi
echo "database_connstr=${new_connstr}" >> $GITHUB_OUTPUT
- name: Restore dump
if: steps.check-restore-done.outputs.skip != 'true'
env:
DATABASE_NAME: ${{ matrix.database }}
DATABASE_CONNSTR: ${{ steps.replace-dbname.outputs.database_connstr }}
# the following works only with larger computes:
# PGOPTIONS: "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
# we add the || true because:
# the dumps were created with Neon and contain neon extensions that are not
# available in RDS, so we will always report an error, but we can ignore it
run: |
${PG_BINARIES}/pg_restore --clean --if-exists --no-owner --jobs=4 \
-d "${DATABASE_CONNSTR}" /tmp/dumps/${DATABASE_NAME}.pg_dump || true
- name: Update benchmark_restore_status table
if: steps.check-restore-done.outputs.skip != 'true'
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
DATABASE_NAME: ${{ matrix.database }}
run: |
${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "
INSERT INTO benchmark_restore_status (databasename, restore_done) VALUES ('${{ env.DATABASE_NAME }}', true)
ON CONFLICT (databasename) DO UPDATE SET restore_done = true;
"

View File

@@ -1,297 +0,0 @@
name: Build and Test Locally
on:
workflow_call:
inputs:
arch:
description: 'x64 or arm64'
required: true
type: string
build-tag:
description: 'build tag'
required: true
type: string
build-tools-image:
description: 'build-tools image'
required: true
type: string
build-type:
description: 'debug or release'
required: true
type: string
pg-versions:
description: 'a json array of postgres versions to run regression tests on'
required: true
type: string
defaults:
run:
shell: bash -euxo pipefail {0}
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
jobs:
build-neon:
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
container:
image: ${{ inputs.build-tools-image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
# Raise locked memory limit for tokio-epoll-uring.
# On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
# io_uring will account the memory of the CQ and SQ as locked.
# More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
env:
BUILD_TYPE: ${{ inputs.build-type }}
GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}
BUILD_TAG: ${{ inputs.build-tag }}
steps:
- name: Fix git ownership
run: |
# Workaround for `fatal: detected dubious ownership in repository at ...`
#
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
# Ref https://github.com/actions/checkout/issues/785
#
git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE}
for r in 14 15 16; do
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
done
- uses: actions/checkout@v4
with:
submodules: true
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
- name: Set pg 16 revision for caching
id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
# Set some environment variables used by all the steps.
#
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
# It also includes --features, if any
#
# CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
# because "cargo metadata" doesn't accept --release or --debug options
#
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
# corresponding Cargo.toml files for their descriptions.
- name: Set env variables
env:
ARCH: ${{ inputs.arch }}
run: |
CARGO_FEATURES="--features testing"
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
CARGO_FLAGS="--locked"
elif [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=""
CARGO_FLAGS="--locked"
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=""
CARGO_FLAGS="--locked --release"
fi
{
echo "cov_prefix=${cov_prefix}"
echo "CARGO_FEATURES=${CARGO_FEATURES}"
echo "CARGO_FLAGS=${CARGO_FLAGS}"
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
} >> $GITHUB_ENV
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v4
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v4
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Cache postgres v16 build
id: cache_pg_16
uses: actions/cache@v4
with:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: mold -run make postgres-v14 -j$(nproc)
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: mold -run make postgres-v15 -j$(nproc)
- name: Build postgres v16
if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: mold -run make postgres-v16 -j$(nproc)
- name: Build neon extensions
run: mold -run make neon-pg-ext -j$(nproc)
- name: Build walproposer-lib
run: mold -run make walproposer-lib -j$(nproc)
- name: Run cargo build
run: |
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
export PQ_LIB_DIR
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
# Do install *before* running rust tests because they might recompile the
# binaries with different features/flags.
- name: Install rust binaries
env:
ARCH: ${{ inputs.arch }}
run: |
# Install target binaries
mkdir -p /tmp/neon/bin/
binaries=$(
${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
)
for bin in $binaries; do
SRC=target/$BUILD_TYPE/$bin
DST=/tmp/neon/bin/$bin
cp "$SRC" "$DST"
done
# Install test executables and write list of all binaries (for code coverage)
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
# Keep bloated coverage data files away from the rest of the artifact
mkdir -p /tmp/coverage/
mkdir -p /tmp/neon/test_bin/
test_exe_paths=$(
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
jq -r '.executable | select(. != null)'
)
for bin in $test_exe_paths; do
SRC=$bin
DST=/tmp/neon/test_bin/$(basename $bin)
# We don't need debug symbols for code coverage, so strip them out to make
# the artifact smaller.
strip "$SRC" -o "$DST"
echo "$DST" >> /tmp/coverage/binaries.list
done
for bin in $binaries; do
echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
done
fi
- name: Run rust tests
env:
NEXTEST_RETRIES: 3
run: |
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
export PQ_LIB_DIR
LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
export LD_LIBRARY_PATH
#nextest does not yet support running doctests
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
for io_engine in std-fs tokio-epoll-uring ; do
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
done
# Run separate tests for real S3
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
export REMOTE_STORAGE_S3_REGION=eu-central-1
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
# Run separate tests for real Azure Blob Storage
# XXX: replace region with `eu-central-1`-like region
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
- name: Install postgres binaries
run: cp -a pg_install /tmp/neon/pg_install
- name: Upload Neon artifact
uses: ./.github/actions/upload
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
path: /tmp/neon
# XXX: keep this after the binaries.list is formed, so the coverage can properly work later
- name: Merge and upload coverage data
if: inputs.build-type == 'debug'
uses: ./.github/actions/save-coverage-data
regress-tests:
# Don't run regression tests on debug arm64 builds
if: inputs.build-type != 'debug' || inputs.arch != 'arm64'
needs: [ build-neon ]
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
container:
image: ${{ inputs.build-tools-image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
# for changed limits, see comments on `options:` earlier in this file
options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
strategy:
fail-fast: false
matrix:
pg_version: ${{ fromJson(inputs.pg-versions) }}
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Pytest regression tests
uses: ./.github/actions/run-python-test-set
timeout-minutes: 60
with:
build_type: ${{ inputs.build-type }}
test_selection: regress
needs_postgres_source: true
run_with_real_s3: true
real_s3_bucket: neon-github-ci-tests
real_s3_region: eu-central-1
rerun_flaky: true
pg_version: ${{ matrix.pg_version }}
env:
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
BUILD_TAG: ${{ inputs.build-tag }}
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
# Temporary disable this step until we figure out why it's so flaky
# Ref https://github.com/neondatabase/neon/issues/4540
- name: Merge and upload coverage data
if: |
false &&
inputs.build-type == 'debug' && matrix.pg_version == 'v16'
uses: ./.github/actions/save-coverage-data

View File

@@ -24,7 +24,7 @@ jobs:
actionlint: actionlint:
needs: [ check-permissions ] needs: [ check-permissions ]
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: reviewdog/action-actionlint@v1 - uses: reviewdog/action-actionlint@v1
@@ -36,16 +36,3 @@ jobs:
fail_on_error: true fail_on_error: true
filter_mode: nofilter filter_mode: nofilter
level: error level: error
- name: Disallow 'ubuntu-latest' runners
run: |
PAT='^\s*runs-on:.*-latest'
if grep -ERq $PAT .github/workflows; then
grep -ERl $PAT .github/workflows |\
while read -r f
do
l=$(grep -nE $PAT $f | awk -F: '{print $1}' | head -1)
echo "::error file=$f,line=$l::Please use 'ubuntu-22.04' instead of 'ubuntu-latest'"
done
exit 1
fi

View File

@@ -18,7 +18,6 @@ on:
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number }} group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
cancel-in-progress: false
env: env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -44,7 +43,7 @@ jobs:
contains(fromJSON('["opened", "synchronize", "reopened", "closed"]'), github.event.action) && contains(fromJSON('["opened", "synchronize", "reopened", "closed"]'), github.event.action) &&
contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run') contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
steps: steps:
- run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run" - run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
@@ -60,7 +59,7 @@ jobs:
github.event.action == 'labeled' && github.event.action == 'labeled' &&
contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run') contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
steps: steps:
- run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run" - run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
@@ -69,41 +68,15 @@ jobs:
with: with:
ref: main ref: main
token: ${{ secrets.CI_ACCESS_TOKEN }} token: ${{ secrets.CI_ACCESS_TOKEN }}
- name: Look for existing PR
id: get-pr
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
echo "ALREADY_CREATED=${ALREADY_CREATED}" >> ${GITHUB_OUTPUT}
- name: Get changed labels
id: get-labels
if: steps.get-pr.outputs.ALREADY_CREATED != ''
env:
ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
LABELS_TO_REMOVE=$(comm -23 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) | sort) \
<(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
( grep -v run-e2e-tests-in-draft || true ) | paste -sd , -)
LABELS_TO_ADD=$(comm -13 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) |sort) \
<(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
paste -sd , -)
echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
- run: gh pr checkout "${PR_NUMBER}" - run: gh pr checkout "${PR_NUMBER}"
- run: git checkout -b "${BRANCH}" - run: git checkout -b "${BRANCH}"
- run: git push --force origin "${BRANCH}" - run: git push --force origin "${BRANCH}"
if: steps.get-pr.outputs.ALREADY_CREATED == ''
- name: Create a Pull Request for CI run (if required) - name: Create a Pull Request for CI run (if required)
if: steps.get-pr.outputs.ALREADY_CREATED == '' env:
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: | run: |
cat << EOF > body.md cat << EOF > body.md
@@ -114,33 +87,16 @@ jobs:
Feel free to review/comment/discuss the original PR #${PR_NUMBER}. Feel free to review/comment/discuss the original PR #${PR_NUMBER}.
EOF EOF
LABELS=$( (gh pr --repo "${GITHUB_REPOSITORY}" view ${PR_NUMBER} --json labels --jq '.labels.[].name'; echo run-e2e-tests-in-draft )| \ ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
grep -E '^run' | paste -sd , -) if [ -z "${ALREADY_CREATED}" ]; then
gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \ gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
--body-file "body.md" \ --body-file "body.md" \
--head "${BRANCH}" \ --head "${BRANCH}" \
--base "main" \ --base "main" \
--label ${LABELS} \ --label "run-e2e-tests-in-draft" \
--draft --draft
- name: Modify the existing pull request (if required)
if: steps.get-pr.outputs.ALREADY_CREATED != ''
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
LABELS_TO_ADD: ${{ steps.get-labels.outputs.LABELS_TO_ADD }}
LABELS_TO_REMOVE: ${{ steps.get-labels.outputs.LABELS_TO_REMOVE }}
ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
run: |
ADD_CMD=
REMOVE_CMD=
[ -z "${LABELS_TO_ADD}" ] || ADD_CMD="--add-label ${LABELS_TO_ADD}"
[ -z "${LABELS_TO_REMOVE}" ] || REMOVE_CMD="--remove-label ${LABELS_TO_REMOVE}"
if [ -n "${ADD_CMD}" ] || [ -n "${REMOVE_CMD}" ]; then
gh pr --repo "${GITHUB_REPOSITORY}" edit ${ALREADY_CREATED} ${ADD_CMD} ${REMOVE_CMD}
fi fi
- run: git push --force origin "${BRANCH}"
if: steps.get-pr.outputs.ALREADY_CREATED != ''
cleanup: cleanup:
# Close PRs and delete branchs if the original PR is closed. # Close PRs and delete branchs if the original PR is closed.
@@ -152,7 +108,7 @@ jobs:
github.event.action == 'closed' && github.event.action == 'closed' &&
github.event.pull_request.head.repo.full_name != github.repository github.event.pull_request.head.repo.full_name != github.repository
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
steps: steps:
- name: Close PR and delete `ci-run/pr-${{ env.PR_NUMBER }}` branch - name: Close PR and delete `ci-run/pr-${{ env.PR_NUMBER }}` branch

View File

@@ -38,11 +38,6 @@ on:
description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch' description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch'
required: false required: false
default: false default: false
run_only_pgvector_tests:
type: boolean
description: 'Run pgvector tests but no other tests. If not set, all tests including pgvector tests will be run'
required: false
default: false
defaults: defaults:
run: run:
@@ -55,118 +50,11 @@ concurrency:
jobs: jobs:
bench: bench:
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
permissions:
contents: write
statuses: write
id-token: write # Required for OIDC authentication in azure runners
strategy:
fail-fast: false
matrix:
include:
- DEFAULT_PG_VERSION: 16
PLATFORM: "neon-staging"
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
RUNNER: [ self-hosted, us-east-2, x64 ]
IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
- DEFAULT_PG_VERSION: 16
PLATFORM: "azure-staging"
region_id: 'azure-eastus2'
RUNNER: [ self-hosted, eastus2, x64 ]
IMAGE: neondatabase/build-tools:pinned
env: env:
TEST_PG_BENCH_DURATIONS_MATRIX: "300" TEST_PG_BENCH_DURATIONS_MATRIX: "300"
TEST_PG_BENCH_SCALES_MATRIX: "10,100" TEST_PG_BENCH_SCALES_MATRIX: "10,100"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: ${{ matrix.DEFAULT_PG_VERSION }} DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.PLATFORM }}
runs-on: ${{ matrix.RUNNER }}
container:
image: ${{ matrix.IMAGE }}
options: --init
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials # necessary on Azure runners
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ matrix.region_id }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Run benchmark
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
pg_version: ${{ env.DEFAULT_PG_VERSION }}
# Set --sparse-ordering option of pytest-order plugin
# to ensure tests are running in order of appears in the file.
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
extra_params:
-m remote_cluster
--sparse-ordering
--timeout 14400
--ignore test_runner/performance/test_perf_olap.py
--ignore test_runner/performance/test_perf_pgvector_queries.py
--ignore test_runner/performance/test_logical_replication.py
--ignore test_runner/performance/test_physical_replication.py
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: |
Periodic perf testing: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
replication-tests:
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16
TEST_OUTPUT: /tmp/test_output TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -180,81 +68,69 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Download Neon artifact - name: Download Neon artifact
uses: ./.github/actions/download uses: ./.github/actions/download
with: with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/ path: /tmp/neon/
prefix: latest prefix: latest
- name: Run Logical Replication benchmarks - name: Create Neon Project
uses: ./.github/actions/run-python-test-set id: create-neon-project
uses: ./.github/actions/neon-project-create
with: with:
build_type: ${{ env.BUILD_TYPE }} region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
test_selection: performance/test_logical_replication.py postgres_version: ${{ env.DEFAULT_PG_VERSION }}
run_in_parallel: false api_key: ${{ secrets.NEON_STAGING_API_KEY }}
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 5400
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
BENCHMARK_PROJECT_ID_PUB: ${{ vars.BENCHMARK_PROJECT_ID_PUB }}
BENCHMARK_PROJECT_ID_SUB: ${{ vars.BENCHMARK_PROJECT_ID_SUB }}
- name: Run Physical Replication benchmarks - name: Run benchmark
uses: ./.github/actions/run-python-test-set uses: ./.github/actions/run-python-test-set
with: with:
build_type: ${{ env.BUILD_TYPE }} build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_physical_replication.py test_selection: performance
run_in_parallel: false run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }} save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 5400 # Set --sparse-ordering option of pytest-order plugin
pg_version: ${{ env.DEFAULT_PG_VERSION }} # to ensure tests are running in order of appears in the file.
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
env: env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report - name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }} if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
- name: Post to a Slack channel - name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }} if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1 uses: slackapi/slack-github-action@v1
with: with:
channel-id: "C06T9AMNDQQ" # on-call-compute-staging-stream channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: | slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
Periodic replication testing: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env: env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
generate-matrices: generate-matrices:
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
# Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday) # Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
# #
# Available platforms: # Available platforms:
# - neonvm-captest-new: Freshly created project (1 CU) # - neon-captest-new: Freshly created project (1 CU)
# - neonvm-captest-freetier: Use freetier-sized compute (0.25 CU) # - neon-captest-freetier: Use freetier-sized compute (0.25 CU)
# - neonvm-captest-azure-new: Freshly created project (1 CU) in azure region # - neon-captest-reuse: Reusing existing project
# - neonvm-captest-azure-freetier: Use freetier-sized compute (0.25 CU) in azure region
# - neonvm-captest-reuse: Reusing existing project
# - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs # - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
# - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage # - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
env: env:
RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }} RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}
DEFAULT_REGION_ID: ${{ github.event.inputs.region_id || 'aws-us-east-2' }} runs-on: ubuntu-latest
runs-on: ubuntu-22.04
outputs: outputs:
pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }} pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }} olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
@@ -264,37 +140,22 @@ jobs:
- name: Generate matrix for pgbench benchmark - name: Generate matrix for pgbench benchmark
id: pgbench-compare-matrix id: pgbench-compare-matrix
run: | run: |
region_id_default=${{ env.DEFAULT_REGION_ID }}
runner_default='["self-hosted", "us-east-2", "x64"]'
runner_azure='["self-hosted", "eastus2", "x64"]'
image_default="369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned"
matrix='{ matrix='{
"pg_version" : [
16
],
"region_id" : [
"'"$region_id_default"'"
],
"platform": [ "platform": [
"neonvm-captest-new", "neon-captest-new",
"neonvm-captest-reuse", "neon-captest-reuse",
"neonvm-captest-new" "neonvm-captest-new"
], ],
"db_size": [ "10gb" ], "db_size": [ "10gb" ],
"runner": ['"$runner_default"'], "include": [{ "platform": "neon-captest-freetier", "db_size": "3gb" },
"image": [ "'"$image_default"'" ], { "platform": "neon-captest-new", "db_size": "50gb" },
"include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" }, { "platform": "neonvm-captest-freetier", "db_size": "3gb" },
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }, { "platform": "neonvm-captest-new", "db_size": "50gb" }]
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
{ "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb","runner": '"$runner_azure"', "image": "neondatabase/build-tools:pinned" },
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
}' }'
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then if [ "$(date +%A)" = "Saturday" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "rds-postgres", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }, matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "rds-aurora", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]') { "platform": "rds-aurora", "db_size": "50gb"}]')
fi fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -304,13 +165,13 @@ jobs:
run: | run: |
matrix='{ matrix='{
"platform": [ "platform": [
"neonvm-captest-reuse" "neon-captest-reuse"
] ]
}' }'
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres" }, matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres" },
{ "platform": "rds-aurora" }]') { "platform": "rds-aurora" }]')
fi fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -320,7 +181,7 @@ jobs:
run: | run: |
matrix='{ matrix='{
"platform": [ "platform": [
"neonvm-captest-reuse" "neon-captest-reuse"
], ],
"scale": [ "scale": [
"10" "10"
@@ -329,22 +190,13 @@ jobs:
if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "scale": "10" }, matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "scale": "10" },
{ "platform": "rds-aurora", "scale": "10" }]') { "platform": "rds-aurora", "scale": "10" }]')
fi fi
echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
prepare_AWS_RDS_databases:
uses: ./.github/workflows/_benchmarking_preparation.yml
secrets: inherit
pgbench-compare: pgbench-compare:
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }} needs: [ generate-matrices ]
needs: [ generate-matrices, prepare_AWS_RDS_databases ]
permissions:
contents: write
statuses: write
id-token: write # Required for OIDC authentication in azure runners
strategy: strategy:
fail-fast: false fail-fast: false
@@ -354,15 +206,15 @@ jobs:
TEST_PG_BENCH_DURATIONS_MATRIX: "60m" TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }} TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: ${{ matrix.pg_version }} DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.platform }} PLATFORM: ${{ matrix.platform }}
runs-on: ${{ matrix.runner }} runs-on: [ self-hosted, us-east-2, x64 ]
container: container:
image: ${{ matrix.image }} image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
options: --init options: --init
# Increase timeout to 8h, default timeout is 6h # Increase timeout to 8h, default timeout is 6h
@@ -371,41 +223,37 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Configure AWS credentials # necessary on Azure runners
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- name: Download Neon artifact - name: Download Neon artifact
uses: ./.github/actions/download uses: ./.github/actions/download
with: with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/ path: /tmp/neon/
prefix: latest prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Create Neon Project - name: Create Neon Project
if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform) if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier"]'), matrix.platform)
id: create-neon-project id: create-neon-project
uses: ./.github/actions/neon-project-create uses: ./.github/actions/neon-project-create
with: with:
region_id: ${{ matrix.region_id }} region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }} postgres_version: ${{ env.DEFAULT_PG_VERSION }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }} api_key: ${{ secrets.NEON_STAGING_API_KEY }}
compute_units: ${{ (contains(matrix.platform, 'captest-freetier') && '[0.25, 0.25]') || '[1, 1]' }} compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }}
- name: Set up Connection String - name: Set up Connection String
id: set-up-connstr id: set-up-connstr
run: | run: |
case "${PLATFORM}" in case "${PLATFORM}" in
neonvm-captest-reuse) neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }} CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
;; ;;
neonvm-captest-sharding-reuse) neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
;;
neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
CONNSTR=${{ steps.create-neon-project.outputs.dsn }} CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
;; ;;
rds-aurora) rds-aurora)
@@ -422,6 +270,12 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
- name: Benchmark init - name: Benchmark init
uses: ./.github/actions/run-python-test-set uses: ./.github/actions/run-python-test-set
with: with:
@@ -430,7 +284,6 @@ jobs:
run_in_parallel: false run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }} save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env: env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -444,7 +297,6 @@ jobs:
run_in_parallel: false run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }} save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env: env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -458,7 +310,6 @@ jobs:
run_in_parallel: false run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }} save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env: env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -472,7 +323,6 @@ jobs:
api_key: ${{ secrets.NEON_STAGING_API_KEY }} api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report - name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }} if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate uses: ./.github/actions/allure-report-generate
@@ -481,133 +331,7 @@ jobs:
uses: slackapi/slack-github-action@v1 uses: slackapi/slack-github-action@v1
with: with:
channel-id: "C033QLM5P7D" # dev-staging-stream channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: | slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
Periodic perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
pgbench-pgvector:
permissions:
contents: write
statuses: write
id-token: write # Required for OIDC authentication in azure runners
strategy:
fail-fast: false
matrix:
include:
- PLATFORM: "neonvm-captest-pgvector"
RUNNER: [ self-hosted, us-east-2, x64 ]
IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
- PLATFORM: "azure-captest-pgvector"
RUNNER: [ self-hosted, eastus2, x64 ]
IMAGE: neondatabase/build-tools:pinned
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
TEST_PG_BENCH_SCALES_MATRIX: "1"
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
LD_LIBRARY_PATH: /home/nonroot/pg/usr/lib/x86_64-linux-gnu
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
PLATFORM: ${{ matrix.PLATFORM }}
runs-on: ${{ matrix.RUNNER }}
container:
image: ${{ matrix.IMAGE }}
options: --init
steps:
- uses: actions/checkout@v4
# until https://github.com/neondatabase/neon/issues/8275 is fixed we temporarily install postgresql-16
# instead of using Neon artifacts containing pgbench
- name: Install postgresql-16 where pytest expects it
run: |
cd /home/nonroot
wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/libpq5_16.4-1.pgdg110%2B1_amd64.deb
wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110%2B1_amd64.deb
wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110%2B1_amd64.deb
dpkg -x libpq5_16.4-1.pgdg110+1_amd64.deb pg
dpkg -x postgresql-client-16_16.4-1.pgdg110+1_amd64.deb pg
dpkg -x postgresql-16_16.4-1.pgdg110+1_amd64.deb pg
mkdir -p /tmp/neon/pg_install/v16/bin
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql
ln -s /home/nonroot/pg/usr/lib/x86_64-linux-gnu /tmp/neon/pg_install/v16/lib
/tmp/neon/pg_install/v16/bin/pgbench --version
/tmp/neon/pg_install/v16/bin/psql --version
- name: Set up Connection String
id: set-up-connstr
run: |
case "${PLATFORM}" in
neonvm-captest-pgvector)
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
;;
azure-captest-pgvector)
CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_AZURE }}
;;
*)
echo >&2 "Unknown PLATFORM=${PLATFORM}"
exit 1
;;
esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
- name: Configure AWS credentials # necessary on Azure runners to read/write from/to S3
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours
- name: Benchmark pgvector hnsw indexing
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_olap.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Benchmark pgvector queries
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance/test_perf_pgvector_queries.py
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: |
Periodic perf testing on ${{ env.PLATFORM }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env: env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -619,8 +343,8 @@ jobs:
# #
# *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
# *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }} if: ${{ !cancelled() }}
needs: [ generate-matrices, pgbench-compare, prepare_AWS_RDS_databases ] needs: [ generate-matrices, pgbench-compare ]
strategy: strategy:
fail-fast: false fail-fast: false
@@ -628,7 +352,7 @@ jobs:
env: env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16 DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output TEST_OUTPUT: /tmp/test_output
TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain }} TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain }}
TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements }} TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements }}
@@ -647,15 +371,20 @@ jobs:
- name: Download Neon artifact - name: Download Neon artifact
uses: ./.github/actions/download uses: ./.github/actions/download
with: with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/ path: /tmp/neon/
prefix: latest prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String - name: Set up Connection String
id: set-up-connstr id: set-up-connstr
run: | run: |
case "${PLATFORM}" in case "${PLATFORM}" in
neonvm-captest-reuse) neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }} CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
;; ;;
rds-aurora) rds-aurora)
@@ -665,13 +394,19 @@ jobs:
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }} CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
;; ;;
*) *)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'" echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1 exit 1
;; ;;
esac esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
- name: ClickBench benchmark - name: ClickBench benchmark
uses: ./.github/actions/run-python-test-set uses: ./.github/actions/run-python-test-set
with: with:
@@ -680,7 +415,6 @@ jobs:
run_in_parallel: false run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }} save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_clickbench extra_params: -m remote_cluster --timeout 21600 -k test_clickbench
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env: env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -690,7 +424,6 @@ jobs:
TEST_OLAP_SCALE: 10 TEST_OLAP_SCALE: 10
- name: Create Allure report - name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }} if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate uses: ./.github/actions/allure-report-generate
@@ -699,10 +432,7 @@ jobs:
uses: slackapi/slack-github-action@v1 uses: slackapi/slack-github-action@v1
with: with:
channel-id: "C033QLM5P7D" # dev-staging-stream channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: | slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
Periodic OLAP perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env: env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -713,8 +443,8 @@ jobs:
# We might change it after https://github.com/neondatabase/neon/issues/2900. # We might change it after https://github.com/neondatabase/neon/issues/2900.
# #
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB) # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }} if: ${{ !cancelled() }}
needs: [ generate-matrices, clickbench-compare, prepare_AWS_RDS_databases ] needs: [ generate-matrices, clickbench-compare ]
strategy: strategy:
fail-fast: false fail-fast: false
@@ -722,7 +452,7 @@ jobs:
env: env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16 DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -740,24 +470,29 @@ jobs:
- name: Download Neon artifact - name: Download Neon artifact
uses: ./.github/actions/download uses: ./.github/actions/download
with: with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/ path: /tmp/neon/
prefix: latest prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Get Connstring Secret Name - name: Get Connstring Secret Name
run: | run: |
case "${PLATFORM}" in case "${PLATFORM}" in
neonvm-captest-reuse) neon-captest-reuse)
ENV_PLATFORM=CAPTEST_TPCH ENV_PLATFORM=CAPTEST_TPCH
;; ;;
rds-aurora) rds-aurora)
ENV_PLATFORM=RDS_AURORA_TPCH ENV_PLATFORM=RDS_AURORA_TPCH
;; ;;
rds-postgres) rds-postgres)
ENV_PLATFORM=RDS_POSTGRES_TPCH ENV_PLATFORM=RDS_AURORA_TPCH
;; ;;
*) *)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'" echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1 exit 1
;; ;;
esac esac
@@ -772,6 +507,12 @@ jobs:
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
- name: Run TPC-H benchmark - name: Run TPC-H benchmark
uses: ./.github/actions/run-python-test-set uses: ./.github/actions/run-python-test-set
with: with:
@@ -780,7 +521,6 @@ jobs:
run_in_parallel: false run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }} save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_tpch extra_params: -m remote_cluster --timeout 21600 -k test_tpch
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env: env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -788,7 +528,6 @@ jobs:
TEST_OLAP_SCALE: ${{ matrix.scale }} TEST_OLAP_SCALE: ${{ matrix.scale }}
- name: Create Allure report - name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }} if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate uses: ./.github/actions/allure-report-generate
@@ -797,16 +536,13 @@ jobs:
uses: slackapi/slack-github-action@v1 uses: slackapi/slack-github-action@v1
with: with:
channel-id: "C033QLM5P7D" # dev-staging-stream channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: | slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env: env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
user-examples-compare: user-examples-compare:
if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }} if: ${{ !cancelled() }}
needs: [ generate-matrices, tpch-compare, prepare_AWS_RDS_databases ] needs: [ generate-matrices, tpch-compare ]
strategy: strategy:
fail-fast: false fail-fast: false
@@ -814,7 +550,7 @@ jobs:
env: env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16 DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote BUILD_TYPE: remote
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -831,15 +567,20 @@ jobs:
- name: Download Neon artifact - name: Download Neon artifact
uses: ./.github/actions/download uses: ./.github/actions/download
with: with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact name: neon-${{ runner.os }}-release-artifact
path: /tmp/neon/ path: /tmp/neon/
prefix: latest prefix: latest
- name: Add Postgres binaries to PATH
run: |
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
- name: Set up Connection String - name: Set up Connection String
id: set-up-connstr id: set-up-connstr
run: | run: |
case "${PLATFORM}" in case "${PLATFORM}" in
neonvm-captest-reuse) neon-captest-reuse)
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }} CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
;; ;;
rds-aurora) rds-aurora)
@@ -849,13 +590,19 @@ jobs:
CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }} CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
;; ;;
*) *)
echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'" echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
exit 1 exit 1
;; ;;
esac esac
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
QUERY="SELECT version();"
if [[ "${PLATFORM}" = "neon"* ]]; then
QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
fi
psql ${CONNSTR} -c "${QUERY}"
- name: Run user examples - name: Run user examples
uses: ./.github/actions/run-python-test-set uses: ./.github/actions/run-python-test-set
with: with:
@@ -864,14 +611,12 @@ jobs:
run_in_parallel: false run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }} save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env: env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
- name: Create Allure report - name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }} if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate uses: ./.github/actions/allure-report-generate
@@ -880,10 +625,6 @@ jobs:
uses: slackapi/slack-github-action@v1 uses: slackapi/slack-github-action@v1
with: with:
channel-id: "C033QLM5P7D" # dev-staging-stream channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: | slack-message: "Periodic User example perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env: env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -21,7 +21,6 @@ defaults:
concurrency: concurrency:
group: build-build-tools-image-${{ inputs.image-tag }} group: build-build-tools-image-${{ inputs.image-tag }}
cancel-in-progress: false
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job. # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {} permissions: {}
@@ -30,6 +29,7 @@ jobs:
check-image: check-image:
uses: ./.github/workflows/check-build-tools-image.yml uses: ./.github/workflows/check-build-tools-image.yml
# This job uses older version of GitHub Actions because it's run on gen2 runners, which don't support node 20 (for newer versions)
build-image: build-image:
needs: [ check-image ] needs: [ check-image ]
if: needs.check-image.outputs.found == 'false' if: needs.check-image.outputs.found == 'false'
@@ -38,7 +38,7 @@ jobs:
matrix: matrix:
arch: [ x64, arm64 ] arch: [ x64, arm64 ]
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} runs-on: ${{ fromJson(format('["self-hosted", "dev", "{0}"]', matrix.arch)) }}
env: env:
IMAGE_TAG: ${{ inputs.image-tag }} IMAGE_TAG: ${{ inputs.image-tag }}
@@ -54,38 +54,40 @@ jobs:
exit 1 exit 1
fi fi
- uses: actions/checkout@v4 - uses: actions/checkout@v3
- uses: ./.github/actions/set-docker-config-dir # Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
- uses: docker/setup-buildx-action@v3 # The default value is ~/.docker
with: - name: Set custom docker config directory
cache-binary: false run: |
mkdir -p /tmp/.docker-custom
echo DOCKER_CONFIG=/tmp/.docker-custom >> $GITHUB_ENV
- uses: docker/login-action@v3 - uses: docker/setup-buildx-action@v2
- uses: docker/login-action@v2
with: with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- uses: docker/login-action@v3 - uses: docker/build-push-action@v4
with:
registry: cache.neon.build
username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}
password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}
- uses: docker/build-push-action@v6
with: with:
context: . context: .
provenance: false provenance: false
push: true push: true
pull: true pull: true
file: Dockerfile.build-tools file: Dockerfile.build-tools
cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.arch }} cache-from: type=registry,ref=neondatabase/build-tools:cache-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0},mode=max', matrix.arch) || '' }} cache-to: type=registry,ref=neondatabase/build-tools:cache-${{ matrix.arch }},mode=max
tags: neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.arch }} tags: neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.arch }}
- name: Remove custom docker config directory
run: |
rm -rf /tmp/.docker-custom
merge-images: merge-images:
needs: [ build-image ] needs: [ build-image ]
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
env: env:
IMAGE_TAG: ${{ inputs.image-tag }} IMAGE_TAG: ${{ inputs.image-tag }}

File diff suppressed because it is too large Load Diff

View File

@@ -19,23 +19,30 @@ permissions: {}
jobs: jobs:
check-image: check-image:
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
outputs: outputs:
tag: ${{ steps.get-build-tools-tag.outputs.image-tag }} tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
found: ${{ steps.check-image.outputs.found }} found: ${{ steps.check-image.outputs.found }}
steps: steps:
- uses: actions/checkout@v4
- name: Get build-tools image tag for the current commit - name: Get build-tools image tag for the current commit
id: get-build-tools-tag id: get-build-tools-tag
env: env:
IMAGE_TAG: | COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
${{ hashFiles('Dockerfile.build-tools', GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
'.github/workflows/check-build-tools-image.yml',
'.github/workflows/build-build-tools-image.yml') }}
run: | run: |
echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT LAST_BUILD_TOOLS_SHA=$(
gh api \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
--method GET \
--field path=Dockerfile.build-tools \
--field sha=${COMMIT_SHA} \
--field per_page=1 \
--jq ".[0].sha" \
"/repos/${GITHUB_REPOSITORY}/commits"
)
echo "image-tag=${LAST_BUILD_TOOLS_SHA}" | tee -a $GITHUB_OUTPUT
- name: Check if such tag found in the registry - name: Check if such tag found in the registry
id: check-image id: check-image

View File

@@ -16,7 +16,7 @@ permissions: {}
jobs: jobs:
check-permissions: check-permissions:
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
steps: steps:
- name: Disallow CI runs on PRs from forks - name: Disallow CI runs on PRs from forks
if: | if: |

View File

@@ -9,7 +9,7 @@ on:
jobs: jobs:
cleanup: cleanup:
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
steps: steps:
- name: Cleanup - name: Cleanup
run: | run: |

View File

@@ -1,54 +0,0 @@
name: Add `external` label to issues and PRs created by external users
on:
issues:
types:
- opened
pull_request_target:
types:
- opened
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
env:
LABEL: external
jobs:
check-user:
runs-on: ubuntu-22.04
outputs:
is-member: ${{ steps.check-user.outputs.is-member }}
steps:
- name: Check whether `${{ github.actor }}` is a member of `${{ github.repository_owner }}`
id: check-user
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: |
if gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" "/orgs/${GITHUB_REPOSITORY_OWNER}/members/${GITHUB_ACTOR}"; then
is_member=true
else
is_member=false
fi
echo "is-member=${is_member}" | tee -a ${GITHUB_OUTPUT}
add-label:
if: needs.check-user.outputs.is-member == 'false'
needs: [ check-user ]
runs-on: ubuntu-22.04
permissions:
pull-requests: write # for `gh pr edit`
issues: write # for `gh issue edit`
steps:
- name: Add `${{ env.LABEL }}` label
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ITEM_NUMBER: ${{ github.event[github.event_name == 'pull_request_target' && 'pull_request' || 'issue'].number }}
GH_CLI_COMMAND: ${{ github.event_name == 'pull_request_target' && 'pr' || 'issue' }}
run: |
gh ${GH_CLI_COMMAND} --repo ${GITHUB_REPOSITORY} edit --add-label=${LABEL} ${ITEM_NUMBER}

View File

@@ -56,6 +56,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
fetch-depth: 1
- name: Install macOS postgres dependencies - name: Install macOS postgres dependencies
run: brew install flex bison openssl protobuf icu4c pkg-config run: brew install flex bison openssl protobuf icu4c pkg-config
@@ -132,13 +133,212 @@ jobs:
- name: Check that no warnings are produced - name: Check that no warnings are produced
run: ./run_clippy.sh run: ./run_clippy.sh
check-linux-arm-build:
needs: [ check-permissions, build-build-tools-image ]
timeout-minutes: 90
runs-on: [ self-hosted, dev, arm64 ]
env:
# Use release build only, to have less debug info around
# Hence keeping target/ (and general cache size) smaller
BUILD_TYPE: release
CARGO_FEATURES: --features testing
CARGO_FLAGS: --release
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
container:
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
steps:
- name: Fix git ownership
run: |
# Workaround for `fatal: detected dubious ownership in repository at ...`
#
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
# Ref https://github.com/actions/checkout/issues/785
#
git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE}
for r in 14 15 16; do
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
done
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 1
- name: Set pg 14 revision for caching
id: pg_v14_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
- name: Set pg 15 revision for caching
id: pg_v15_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
- name: Set pg 16 revision for caching
id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
- name: Set env variables
run: |
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo" >> $GITHUB_ENV
- name: Cache postgres v14 build
id: cache_pg_14
uses: actions/cache@v4
with:
path: pg_install/v14
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_15
uses: actions/cache@v4
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v16 build
id: cache_pg_16
uses: actions/cache@v4
with:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: mold -run make postgres-v14 -j$(nproc)
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: mold -run make postgres-v15 -j$(nproc)
- name: Build postgres v16
if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: mold -run make postgres-v16 -j$(nproc)
- name: Build neon extensions
run: mold -run make neon-pg-ext -j$(nproc)
- name: Build walproposer-lib
run: mold -run make walproposer-lib -j$(nproc)
- name: Run cargo build
run: |
mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests
- name: Run cargo test
env:
NEXTEST_RETRIES: 3
run: |
cargo nextest run $CARGO_FEATURES
# Run separate tests for real S3
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
export REMOTE_STORAGE_S3_REGION=eu-central-1
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
cargo nextest run --package remote_storage --test test_real_s3
# Run separate tests for real Azure Blob Storage
# XXX: replace region with `eu-central-1`-like region
export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
# Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
cargo nextest run --package remote_storage --test test_real_azure
check-codestyle-rust-arm:
needs: [ check-permissions, build-build-tools-image ]
timeout-minutes: 90
runs-on: [ self-hosted, dev, arm64 ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
steps:
- name: Fix git ownership
run: |
# Workaround for `fatal: detected dubious ownership in repository at ...`
#
# Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
# Ref https://github.com/actions/checkout/issues/785
#
git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE}
for r in 14 15 16; do
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
done
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 1
# Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers
run: make postgres-headers -j$(nproc)
# cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
# This will catch compiler & clippy warnings in all feature combinations.
# TODO: use cargo hack for build and test as well, but, that's quite expensive.
# NB: keep clippy args in sync with ./run_clippy.sh
- run: |
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
echo "No clippy args found in .neon_clippy_args"
exit 1
fi
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
- name: Run cargo clippy (debug)
run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
- name: Run cargo clippy (release)
run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
- name: Check documentation generation
run: cargo doc --workspace --no-deps --document-private-items
env:
RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
# Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
- name: Check formatting
if: ${{ !cancelled() }}
run: cargo fmt --all -- --check
# https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
- name: Check rust dependencies
if: ${{ !cancelled() }}
run: |
cargo hakari generate --diff # workspace-hack Cargo.toml is up-to-date
cargo hakari manage-deps --dry-run # all workspace crates depend on workspace-hack
# https://github.com/EmbarkStudios/cargo-deny
- name: Check rust licenses/bans/advisories/sources
if: ${{ !cancelled() }}
run: cargo deny check
gather-rust-build-stats: gather-rust-build-stats:
needs: [ check-permissions, build-build-tools-image ] needs: [ check-permissions, build-build-tools-image ]
if: | if: |
contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') || contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') || contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main' github.ref_name == 'main'
runs-on: [ self-hosted, large ] runs-on: [ self-hosted, gen3, large ]
container: container:
image: ${{ needs.build-build-tools-image.outputs.image }} image: ${{ needs.build-build-tools-image.outputs.image }}
credentials: credentials:
@@ -148,6 +348,8 @@ jobs:
env: env:
BUILD_TYPE: release BUILD_TYPE: release
# remove the cachepot wrapper and build without crate caches
RUSTC_WRAPPER: ""
# build with incremental compilation produce partial results # build with incremental compilation produce partial results
# so do not attempt to cache this build, also disable the incremental compilation # so do not attempt to cache this build, also disable the incremental compilation
CARGO_INCREMENTAL: 0 CARGO_INCREMENTAL: 0
@@ -157,6 +359,7 @@ jobs:
uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
fetch-depth: 1
# Some of our rust modules use FFI and need those to be checked # Some of our rust modules use FFI and need those to be checked
- name: Get postgres headers - name: Get postgres headers
@@ -166,7 +369,7 @@ jobs:
run: make walproposer-lib -j$(nproc) run: make walproposer-lib -j$(nproc)
- name: Produce the build stats - name: Produce the build stats
run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release --timings -j$(nproc) run: cargo build --all --release --timings
- name: Upload the build stats - name: Upload the build stats
id: upload-stats id: upload-stats

View File

@@ -1,155 +0,0 @@
name: Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 18 * * *' # Runs at 6 PM UTC every day
workflow_dispatch: # Allows manual triggering of the workflow
inputs:
commit_hash:
type: string
description: 'The long neon repo commit hash for the system under test (pageserver) to be tested.'
required: false
default: ''
defaults:
run:
shell: bash -euo pipefail {0}
concurrency:
group: ${{ github.workflow }}
cancel-in-progress: false
jobs:
trigger_bench_on_ec2_machine_in_eu_central_1:
runs-on: [ self-hosted, small ]
container:
image: neondatabase/build-tools:pinned
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init
timeout-minutes: 360 # Set the timeout to 6 hours
env:
API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }}
RUN_ID: ${{ github.run_id }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_SECRET }}
AWS_DEFAULT_REGION : "eu-central-1"
AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
steps:
# we don't need the neon source code because we run everything remotely
# however we still need the local github actions to run the allure step below
- uses: actions/checkout@v4
- name: Show my own (github runner) external IP address - usefull for IP allowlisting
run: curl https://ifconfig.me
- name: Start EC2 instance and wait for the instance to boot up
run: |
aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID
aws ec2 wait instance-running --instance-ids $AWS_INSTANCE_ID
sleep 60 # sleep some time to allow cloudinit and our API server to start up
- name: Determine public IP of the EC2 instance and set env variable EC2_MACHINE_URL_US
run: |
public_ip=$(aws ec2 describe-instances --instance-ids $AWS_INSTANCE_ID --query 'Reservations[*].Instances[*].PublicIpAddress' --output text)
echo "Public IP of the EC2 instance: $public_ip"
echo "EC2_MACHINE_URL_US=https://${public_ip}:8443" >> $GITHUB_ENV
- name: Determine commit hash
env:
INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }}
run: |
if [ -z "$INPUT_COMMIT_HASH" ]; then
echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV
else
echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
fi
- name: Start Bench with run_id
run: |
curl -k -X 'POST' \
"${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\"}"
- name: Poll Test Status
id: poll_step
run: |
status=""
while [[ "$status" != "failure" && "$status" != "success" ]]; do
response=$(curl -k -X 'GET' \
"${EC2_MACHINE_URL_US}/test_status/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H "Authorization: Bearer $API_KEY")
echo "Response: $response"
set +x
status=$(echo $response | jq -r '.status')
echo "Test status: $status"
if [[ "$status" == "failure" ]]; then
echo "Test failed"
exit 1 # Fail the job step if status is failure
elif [[ "$status" == "success" || "$status" == "null" ]]; then
break
elif [[ "$status" == "too_many_runs" ]]; then
echo "Too many runs already running"
echo "too_many_runs=true" >> "$GITHUB_OUTPUT"
exit 1
fi
sleep 60 # Poll every 60 seconds
done
- name: Retrieve Test Logs
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
curl -k -X 'GET' \
"${EC2_MACHINE_URL_US}/test_log/${GITHUB_RUN_ID}" \
-H 'accept: application/gzip' \
-H "Authorization: Bearer $API_KEY" \
--output "test_log_${GITHUB_RUN_ID}.gz"
- name: Unzip Test Log and Print it into this job's log
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
gzip -d "test_log_${GITHUB_RUN_ID}.gz"
cat "test_log_${GITHUB_RUN_ID}"
- name: Create Allure report
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
- name: Cleanup Test Resources
if: always()
run: |
curl -k -X 'POST' \
"${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d ''
- name: Stop EC2 instance and wait for the instance to be stopped
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
aws ec2 stop-instances --instance-ids $AWS_INSTANCE_ID
aws ec2 wait instance-stopped --instance-ids $AWS_INSTANCE_ID

View File

@@ -1,211 +0,0 @@
name: Test Postgres client libraries
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '23 02 * * *' # run once a day, timezone is utc
pull_request:
paths:
- '.github/workflows/pg-clients.yml'
- 'test_runner/pg_clients/**'
- 'test_runner/logical_repl/**'
- 'poetry.lock'
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
defaults:
run:
shell: bash -euxo pipefail {0}
env:
DEFAULT_PG_VERSION: 16
PLATFORM: neon-captest-new
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
AWS_DEFAULT_REGION: eu-central-1
jobs:
check-permissions:
if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
uses: ./.github/workflows/check-permissions.yml
with:
github-event-name: ${{ github.event_name }}
check-build-tools-image:
needs: [ check-permissions ]
uses: ./.github/workflows/check-build-tools-image.yml
build-build-tools-image:
needs: [ check-build-tools-image ]
uses: ./.github/workflows/build-build-tools-image.yml
with:
image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
secrets: inherit
test-logical-replication:
needs: [ build-build-tools-image ]
runs-on: ubuntu-22.04
container:
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init --user root
services:
clickhouse:
image: clickhouse/clickhouse-server:24.6.3.64
ports:
- 9000:9000
- 8123:8123
zookeeper:
image: quay.io/debezium/zookeeper:2.7
ports:
- 2181:2181
kafka:
image: quay.io/debezium/kafka:2.7
env:
ZOOKEEPER_CONNECT: "zookeeper:2181"
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
KAFKA_BROKER_ID: 1
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_JMX_PORT: 9991
ports:
- 9092:9092
debezium:
image: quay.io/debezium/connect:2.7
env:
BOOTSTRAP_SERVERS: kafka:9092
GROUP_ID: 1
CONFIG_STORAGE_TOPIC: debezium-config
OFFSET_STORAGE_TOPIC: debezium-offset
STATUS_STORAGE_TOPIC: debezium-status
DEBEZIUM_CONFIG_CONNECTOR_CLASS: io.debezium.connector.postgresql.PostgresConnector
ports:
- 8083:8083
steps:
- uses: actions/checkout@v4
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
- name: Run tests
uses: ./.github/actions/run-python-test-set
with:
build_type: remote
test_selection: logical_repl
run_in_parallel: false
extra_params: -m remote_cluster
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
- name: Delete Neon Project
if: always()
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: ${{ !cancelled() }}
id: create-allure-report
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
- name: Post to a Slack channel
if: github.event.schedule && failure()
uses: slackapi/slack-github-action@v1
with:
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Testing the logical replication: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
test-postgres-client-libs:
needs: [ build-build-tools-image ]
runs-on: ubuntu-22.04
container:
image: ${{ needs.build-build-tools-image.outputs.image }}
credentials:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
options: --init --user root
steps:
- uses: actions/checkout@v4
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
- name: Run tests
uses: ./.github/actions/run-python-test-set
with:
build_type: remote
test_selection: pg_clients
run_in_parallel: false
extra_params: -m remote_cluster
pg_version: ${{ env.DEFAULT_PG_VERSION }}
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
- name: Delete Neon Project
if: always()
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Create Allure report
if: ${{ !cancelled() }}
id: create-allure-report
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
- name: Post to a Slack channel
if: github.event.schedule && failure()
uses: slackapi/slack-github-action@v1
with:
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Testing Postgres clients: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

98
.github/workflows/pg_clients.yml vendored Normal file
View File

@@ -0,0 +1,98 @@
name: Test Postgres client libraries
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '23 02 * * *' # run once a day, timezone is utc
workflow_dispatch:
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true
jobs:
test-postgres-client-libs:
# TODO: switch to gen2 runner, requires docker
runs-on: [ ubuntu-latest ]
env:
DEFAULT_PG_VERSION: 14
TEST_OUTPUT: /tmp/test_output
steps:
- name: Checkout
uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Cache poetry deps
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-python-deps-ubunutu-latest-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
- name: Run pytest
env:
REMOTE_ENV: 1
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
shell: bash -euxo pipefail {0}
run: |
# Test framework expects we have psql binary;
# but since we don't really need it in this test, let's mock it
mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql";
./scripts/pytest \
--junitxml=$TEST_OUTPUT/junit.xml \
--tb=short \
--verbose \
-m "remote_cluster" \
-rA "test_runner/pg_clients"
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
# We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
# It will be fixed after switching to gen2 runner
- name: Upload python test logs
if: always()
uses: actions/upload-artifact@v4
with:
retention-days: 7
name: python-test-pg_clients-${{ runner.os }}-stage-logs
path: ${{ env.TEST_OUTPUT }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # dev-staging-stream
slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -7,20 +7,12 @@ on:
description: 'Source tag' description: 'Source tag'
required: true required: true
type: string type: string
force:
description: 'Force the image to be pinned'
default: false
type: boolean
workflow_call: workflow_call:
inputs: inputs:
from-tag: from-tag:
description: 'Source tag' description: 'Source tag'
required: true required: true
type: string type: string
force:
description: 'Force the image to be pinned'
default: false
type: boolean
defaults: defaults:
run: run:
@@ -28,20 +20,16 @@ defaults:
concurrency: concurrency:
group: pin-build-tools-image-${{ inputs.from-tag }} group: pin-build-tools-image-${{ inputs.from-tag }}
cancel-in-progress: false
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {} permissions: {}
env:
FROM_TAG: ${{ inputs.from-tag }}
TO_TAG: pinned
jobs: jobs:
check-manifests: tag-image:
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
outputs:
skip: ${{ steps.check-manifests.outputs.skip }} env:
FROM_TAG: ${{ inputs.from-tag }}
TO_TAG: pinned
steps: steps:
- name: Check if we really need to pin the image - name: Check if we really need to pin the image
@@ -58,44 +46,27 @@ jobs:
echo "skip=${skip}" | tee -a $GITHUB_OUTPUT echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
tag-image:
needs: check-manifests
# use format(..) to catch both inputs.force = true AND inputs.force = 'true'
if: needs.check-manifests.outputs.skip == 'false' || format('{0}', inputs.force) == 'true'
runs-on: ubuntu-22.04
permissions:
id-token: write # for `azure/login`
steps:
- uses: docker/login-action@v3 - uses: docker/login-action@v3
if: steps.check-manifests.outputs.skip == 'false'
with: with:
username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
- name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub
if: steps.check-manifests.outputs.skip == 'false'
run: |
docker buildx imagetools create -t neondatabase/build-tools:${TO_TAG} \
neondatabase/build-tools:${FROM_TAG}
- uses: docker/login-action@v3 - uses: docker/login-action@v3
if: steps.check-manifests.outputs.skip == 'false'
with: with:
registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
username: ${{ secrets.AWS_ACCESS_KEY_DEV }} username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
password: ${{ secrets.AWS_SECRET_KEY_DEV }} password: ${{ secrets.AWS_SECRET_KEY_DEV }}
- name: Azure login - name: Tag build-tools with `${{ env.TO_TAG }}` in ECR
uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1 if: steps.check-manifests.outputs.skip == 'false'
with:
client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
- name: Login to ACR
run: |
az acr login --name=neoneastus2
- name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
run: | run: |
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG} \ docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG} \
-t neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG} \
-t neondatabase/build-tools:${TO_TAG} \
neondatabase/build-tools:${FROM_TAG} neondatabase/build-tools:${FROM_TAG}

View File

@@ -19,7 +19,7 @@ on:
jobs: jobs:
notify: notify:
runs-on: ubuntu-22.04 runs-on: [ ubuntu-latest ]
steps: steps:
- uses: neondatabase/dev-actions/release-pr-notify@main - uses: neondatabase/dev-actions/release-pr-notify@main

View File

@@ -26,7 +26,7 @@ defaults:
jobs: jobs:
create-storage-release-branch: create-storage-release-branch:
if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }} if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }}
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
permissions: permissions:
contents: write # for `git push` contents: write # for `git push`
@@ -52,22 +52,20 @@ jobs:
env: env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: | run: |
TITLE="Storage & Compute release ${RELEASE_DATE}"
cat << EOF > body.md cat << EOF > body.md
## ${TITLE} ## Release ${RELEASE_DATE}
**Please merge this Pull Request using 'Create a merge commit' button** **Please merge this Pull Request using 'Create a merge commit' button**
EOF EOF
gh pr create --title "${TITLE}" \ gh pr create --title "Release ${RELEASE_DATE}" \
--body-file "body.md" \ --body-file "body.md" \
--head "${RELEASE_BRANCH}" \ --head "${RELEASE_BRANCH}" \
--base "release" --base "release"
create-proxy-release-branch: create-proxy-release-branch:
if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }} if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }}
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
permissions: permissions:
contents: write # for `git push` contents: write # for `git push`
@@ -93,15 +91,13 @@ jobs:
env: env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: | run: |
TITLE="Proxy release ${RELEASE_DATE}"
cat << EOF > body.md cat << EOF > body.md
## ${TITLE} ## Proxy release ${RELEASE_DATE}
**Please merge this Pull Request using 'Create a merge commit' button** **Please merge this Pull Request using 'Create a merge commit' button**
EOF EOF
gh pr create --title "${TITLE}" \ gh pr create --title "Proxy release ${RELEASE_DATE}" \
--body-file "body.md" \ --body-file "body.md" \
--head "${RELEASE_BRANCH}" \ --head "${RELEASE_BRANCH}" \
--base "release-proxy" --base "release-proxy"

View File

@@ -13,11 +13,13 @@ defaults:
env: env:
# A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix # A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
jobs: jobs:
cancel-previous-e2e-tests: cancel-previous-e2e-tests:
if: github.event_name == 'pull_request' if: github.event_name == 'pull_request'
runs-on: ubuntu-22.04 runs-on: ubuntu-latest
steps: steps:
- name: Cancel previous e2e-tests runs for this PR - name: Cancel previous e2e-tests runs for this PR
@@ -29,7 +31,7 @@ jobs:
--field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}" --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
tag: tag:
runs-on: ubuntu-22.04 runs-on: [ ubuntu-latest ]
outputs: outputs:
build-tag: ${{ steps.build-tag.outputs.tag }} build-tag: ${{ steps.build-tag.outputs.tag }}
@@ -60,88 +62,58 @@ jobs:
trigger-e2e-tests: trigger-e2e-tests:
needs: [ tag ] needs: [ tag ]
runs-on: ubuntu-22.04 runs-on: [ self-hosted, gen3, small ]
env: env:
EVENT_ACTION: ${{ github.event.action }}
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
TAG: ${{ needs.tag.outputs.build-tag }} TAG: ${{ needs.tag.outputs.build-tag }}
container:
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
options: --init
steps: steps:
- name: Wait for `promote-images` job to finish - name: check if ecr image are present
# It's important to have a timeout here, the script in the step can run infinitely
timeout-minutes: 60
run: | run: |
if [ "${GITHUB_EVENT_NAME}" != "pull_request" ] || [ "${EVENT_ACTION}" != "ready_for_review" ]; then for REPO in neon compute-tools compute-node-v14 vm-compute-node-v14 compute-node-v15 vm-compute-node-v15 compute-node-v16 vm-compute-node-v16; do
exit 0 OUTPUT=$(aws ecr describe-images --repository-name ${REPO} --region eu-central-1 --query "imageDetails[?imageTags[?contains(@, '${TAG}')]]" --output text)
fi if [ "$OUTPUT" == "" ]; then
echo "$REPO with image tag $TAG not found" >> $GITHUB_OUTPUT
# For PRs we use the run id as the tag exit 1
BUILD_AND_TEST_RUN_ID=${TAG} fi
while true; do
conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
case "$conclusion" in
success)
break
;;
failure | cancelled | skipped)
echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
exit 1
;;
*)
echo "The 'promote-images' hasn't succeed yet. Waiting..."
sleep 60
;;
esac
done done
- name: Set e2e-platforms
id: e2e-platforms
env:
PR_NUMBER: ${{ github.event.pull_request.number }}
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Default set of platforms to run e2e tests on
platforms='["docker", "k8s"]'
# If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
# If the workflow run is not a pull request, add k8s-neonvm to the list.
if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
case "$f" in
vendor/*|pgxn/*|libs/vm_monitor/*|Dockerfile.compute-node)
platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
;;
*)
# no-op
;;
esac
done
else
platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
fi
echo "e2e-platforms=${platforms}" | tee -a $GITHUB_OUTPUT
- name: Set PR's status to pending and request a remote CI test - name: Set PR's status to pending and request a remote CI test
env:
E2E_PLATFORMS: ${{ steps.e2e-platforms.outputs.e2e-platforms }}
COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
run: | run: |
REMOTE_REPO="${GITHUB_REPOSITORY_OWNER}/cloud" # For pull requests, GH Actions set "github.sha" variable to point at a fake merge commit
# but we need to use a real sha of a latest commit in the PR's branch for the e2e job,
# to place a job run status update later.
COMMIT_SHA=${{ github.event.pull_request.head.sha }}
# For non-PR kinds of runs, the above will produce an empty variable, pick the original sha value for those
COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
gh api "/repos/${GITHUB_REPOSITORY}/statuses/${COMMIT_SHA}" \ REMOTE_REPO="${{ github.repository_owner }}/cloud"
--method POST \
--raw-field "state=pending" \
--raw-field "description=[$REMOTE_REPO] Remote CI job is about to start" \
--raw-field "context=neon-cloud-e2e"
gh workflow --repo ${REMOTE_REPO} \ curl -f -X POST \
run testing.yml \ https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
--ref "main" \ -H "Accept: application/vnd.github.v3+json" \
--raw-field "ci_job_name=neon-cloud-e2e" \ --user "${{ secrets.CI_ACCESS_TOKEN }}" \
--raw-field "commit_hash=$COMMIT_SHA" \ --data \
--raw-field "remote_repo=${GITHUB_REPOSITORY}" \ "{
--raw-field "storage_image_tag=${TAG}" \ \"state\": \"pending\",
--raw-field "compute_image_tag=${TAG}" \ \"context\": \"neon-cloud-e2e\",
--raw-field "concurrency_group=${E2E_CONCURRENCY_GROUP}" \ \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
--raw-field "e2e-platforms=${E2E_PLATFORMS}" }"
curl -f -X POST \
https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
-H "Accept: application/vnd.github.v3+json" \
--user "${{ secrets.CI_ACCESS_TOKEN }}" \
--data \
"{
\"ref\": \"main\",
\"inputs\": {
\"ci_job_name\": \"neon-cloud-e2e\",
\"commit_hash\": \"$COMMIT_SHA\",
\"remote_repo\": \"${{ github.repository }}\",
\"storage_image_tag\": \"${TAG}\",
\"compute_image_tag\": \"${TAG}\",
\"concurrency_group\": \"${{ env.E2E_CONCURRENCY_GROUP }}\"
}
}"

View File

@@ -1,5 +1,4 @@
# * `-A unknown_lints` do not warn about unknown lint suppressions # * `-A unknown_lints` do not warn about unknown lint suppressions
# that people with newer toolchains might use # that people with newer toolchains might use
# * `-D warnings` - fail on any warnings (`cargo` returns non-zero exit status) # * `-D warnings` - fail on any warnings (`cargo` returns non-zero exit status)
# * `-D clippy::todo` - don't let `todo!()` slip into `main` export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings"
export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings -D clippy::todo"

View File

@@ -1,13 +1,13 @@
/compute_tools/ @neondatabase/control-plane @neondatabase/compute /compute_tools/ @neondatabase/control-plane @neondatabase/compute
/storage_controller @neondatabase/storage /control_plane/attachment_service @neondatabase/storage
/libs/pageserver_api/ @neondatabase/storage /libs/pageserver_api/ @neondatabase/storage
/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage /libs/postgres_ffi/ @neondatabase/compute @neondatabase/safekeepers
/libs/remote_storage/ @neondatabase/storage /libs/remote_storage/ @neondatabase/storage
/libs/safekeeper_api/ @neondatabase/storage /libs/safekeeper_api/ @neondatabase/safekeepers
/libs/vm_monitor/ @neondatabase/autoscaling /libs/vm_monitor/ @neondatabase/autoscaling
/pageserver/ @neondatabase/storage /pageserver/ @neondatabase/storage
/pgxn/ @neondatabase/compute /pgxn/ @neondatabase/compute
/pgxn/neon/ @neondatabase/compute @neondatabase/storage /pgxn/neon/ @neondatabase/compute @neondatabase/safekeepers
/proxy/ @neondatabase/proxy /proxy/ @neondatabase/proxy
/safekeeper/ @neondatabase/storage /safekeeper/ @neondatabase/safekeepers
/vendor/ @neondatabase/compute /vendor/ @neondatabase/compute

1968
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@ resolver = "2"
members = [ members = [
"compute_tools", "compute_tools",
"control_plane", "control_plane",
"control_plane/storcon_cli", "control_plane/attachment_service",
"pageserver", "pageserver",
"pageserver/compaction", "pageserver/compaction",
"pageserver/ctl", "pageserver/ctl",
@@ -12,10 +12,9 @@ members = [
"proxy", "proxy",
"safekeeper", "safekeeper",
"storage_broker", "storage_broker",
"storage_controller", "s3_scrubber",
"storage_controller/client",
"storage_scrubber",
"workspace_hack", "workspace_hack",
"trace",
"libs/compute_api", "libs/compute_api",
"libs/pageserver_api", "libs/pageserver_api",
"libs/postgres_ffi", "libs/postgres_ffi",
@@ -41,26 +40,24 @@ license = "Apache-2.0"
## All dependency versions, used in the project ## All dependency versions, used in the project
[workspace.dependencies] [workspace.dependencies]
ahash = "0.8"
anyhow = { version = "1.0", features = ["backtrace"] } anyhow = { version = "1.0", features = ["backtrace"] }
arc-swap = "1.6" arc-swap = "1.6"
async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] } async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
atomic-take = "1.1.0" azure_core = "0.18"
azure_core = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls", "hmac_rust"] } azure_identity = "0.18"
azure_identity = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] } azure_storage = "0.18"
azure_storage = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] } azure_storage_blobs = "0.18"
azure_storage_blobs = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
flate2 = "1.0.26" flate2 = "1.0.26"
async-stream = "0.3" async-stream = "0.3"
async-trait = "0.1" async-trait = "0.1"
aws-config = { version = "1.3", default-features = false, features=["rustls"] } aws-config = { version = "1.1.4", default-features = false, features=["rustls"] }
aws-sdk-s3 = "1.26" aws-sdk-s3 = "1.14"
aws-sdk-iam = "1.15.0" aws-sdk-iam = "1.15.0"
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] } aws-smithy-async = { version = "1.1.4", default-features = false, features=["rt-tokio"] }
aws-smithy-types = "1.1.9" aws-smithy-types = "1.1.4"
aws-credential-types = "1.2.0" aws-credential-types = "1.1.4"
aws-sigv4 = { version = "1.2.1", features = ["sign-http"] } aws-sigv4 = { version = "1.2.0", features = ["sign-http"] }
aws-types = "1.2.0" aws-types = "1.1.7"
axum = { version = "0.6.20", features = ["ws"] } axum = { version = "0.6.20", features = ["ws"] }
base64 = "0.13.0" base64 = "0.13.0"
bincode = "1.3" bincode = "1.3"
@@ -75,7 +72,6 @@ clap = { version = "4.0", features = ["derive"] }
comfy-table = "6.1" comfy-table = "6.1"
const_format = "0.2" const_format = "0.2"
crc32c = "0.6" crc32c = "0.6"
crossbeam-deque = "0.8.5"
crossbeam-utils = "0.8.5" crossbeam-utils = "0.8.5"
dashmap = { version = "5.5.0", features = ["raw-api"] } dashmap = { version = "5.5.0", features = ["raw-api"] }
either = "1.8" either = "1.8"
@@ -83,13 +79,13 @@ enum-map = "2.4.2"
enumset = "1.0.12" enumset = "1.0.12"
fail = "0.5.0" fail = "0.5.0"
fallible-iterator = "0.2" fallible-iterator = "0.2"
framed-websockets = { version = "0.1.0", git = "https://github.com/neondatabase/framed-websockets" } fs2 = "0.4.3"
futures = "0.3" futures = "0.3"
futures-core = "0.3" futures-core = "0.3"
futures-util = "0.3" futures-util = "0.3"
git-version = "0.3" git-version = "0.3"
hashbrown = "0.14" hashbrown = "0.13"
hashlink = "0.9.1" hashlink = "0.8.4"
hdrhistogram = "7.5.2" hdrhistogram = "7.5.2"
hex = "0.4" hex = "0.4"
hex-literal = "0.4" hex-literal = "0.4"
@@ -100,8 +96,7 @@ http-types = { version = "2", default-features = false }
humantime = "2.1" humantime = "2.1"
humantime-serde = "1.1.1" humantime-serde = "1.1.1"
hyper = "0.14" hyper = "0.14"
tokio-tungstenite = "0.20.0" hyper-tungstenite = "0.11"
indexmap = "2"
inotify = "0.10.2" inotify = "0.10.2"
ipnet = "2.9.0" ipnet = "2.9.0"
itertools = "0.10" itertools = "0.10"
@@ -110,32 +105,32 @@ lasso = "0.7"
leaky-bucket = "1.0.1" leaky-bucket = "1.0.1"
libc = "0.2" libc = "0.2"
md5 = "0.7.0" md5 = "0.7.0"
measured = { version = "0.0.22", features=["lasso"] } measured = { version = "0.0.13", features=["default", "lasso"] }
measured-process = { version = "0.0.22" }
memoffset = "0.8" memoffset = "0.8"
native-tls = "0.2"
nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] } nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
notify = "6.0.0" notify = "6.0.0"
num_cpus = "1.15" num_cpus = "1.15"
num-traits = "0.2.15" num-traits = "0.2.15"
once_cell = "1.13" once_cell = "1.13"
opentelemetry = "0.20.0" opentelemetry = "0.20.0"
opentelemetry-otlp = { version = "0.13.0", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] } opentelemetry-otlp = { version = "0.13.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
opentelemetry-semantic-conventions = "0.12.0" opentelemetry-semantic-conventions = "0.12.0"
parking_lot = "0.12" parking_lot = "0.12"
parquet = { version = "51.0.0", default-features = false, features = ["zstd"] } parquet = { version = "49.0.0", default-features = false, features = ["zstd"] }
parquet_derive = "51.0.0" parquet_derive = "49.0.0"
pbkdf2 = { version = "0.12.1", features = ["simple", "std"] } pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
pin-project-lite = "0.2" pin-project-lite = "0.2"
procfs = "0.16" procfs = "0.14"
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
prost = "0.11" prost = "0.11"
rand = "0.8" rand = "0.8"
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] } redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
regex = "1.10.2" regex = "1.10.2"
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_20"] } reqwest-tracing = { version = "0.4.7", features = ["opentelemetry_0_20"] }
reqwest-middleware = "0.3.0" reqwest-middleware = "0.2.0"
reqwest-retry = "0.5" reqwest-retry = "0.2.2"
routerify = "3" routerify = "3"
rpds = "0.13" rpds = "0.13"
rustc-hash = "1.1.0" rustc-hash = "1.1.0"
@@ -145,7 +140,7 @@ rustls-split = "0.3"
scopeguard = "1.1" scopeguard = "1.1"
sysinfo = "0.29.2" sysinfo = "0.29.2"
sd-notify = "0.4.1" sd-notify = "0.4.1"
sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] } sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1" serde_json = "1"
serde_path_to_error = "0.1" serde_path_to_error = "0.1"
@@ -159,12 +154,11 @@ socket2 = "0.5"
strum = "0.24" strum = "0.24"
strum_macros = "0.24" strum_macros = "0.24"
"subtle" = "2.5.0" "subtle" = "2.5.0"
# Our PR https://github.com/nical/rust_debug/pull/4 has been merged but no new version released yet svg_fmt = "0.4.1"
svg_fmt = { git = "https://github.com/nical/rust_debug", rev = "28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4" }
sync_wrapper = "0.1.2" sync_wrapper = "0.1.2"
tar = "0.4" tar = "0.4"
task-local-extensions = "0.1.4" task-local-extensions = "0.1.4"
test-context = "0.3" test-context = "0.1"
thiserror = "1.0" thiserror = "1.0"
tikv-jemallocator = "0.5" tikv-jemallocator = "0.5"
tikv-jemalloc-ctl = "0.5" tikv-jemalloc-ctl = "0.5"
@@ -179,21 +173,17 @@ tokio-util = { version = "0.7.10", features = ["io", "rt"] }
toml = "0.7" toml = "0.7"
toml_edit = "0.19" toml_edit = "0.19"
tonic = {version = "0.9", features = ["tls", "tls-roots"]} tonic = {version = "0.9", features = ["tls", "tls-roots"]}
tower-service = "0.3.2"
tracing = "0.1" tracing = "0.1"
tracing-error = "0.2.0" tracing-error = "0.2.0"
tracing-opentelemetry = "0.21.0" tracing-opentelemetry = "0.20.0"
tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] } tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
try-lock = "0.2.5"
twox-hash = { version = "1.6.3", default-features = false } twox-hash = { version = "1.6.3", default-features = false }
typed-json = "0.1"
url = "2.2" url = "2.2"
urlencoding = "2.1" urlencoding = "2.1"
uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] } uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
walkdir = "2.3.2" walkdir = "2.3.2"
rustls-native-certs = "0.7" webpki-roots = "0.25"
x509-parser = "0.15" x509-parser = "0.15"
whoami = "1.5.1"
## TODO replace this with tracing ## TODO replace this with tracing
env_logger = "0.10" env_logger = "0.10"
@@ -201,10 +191,14 @@ log = "0.4"
## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" } postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" } postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" } postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" } tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
## Other git libraries
heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
## Local libraries ## Local libraries
compute_api = { version = "0.1", path = "./libs/compute_api/" } compute_api = { version = "0.1", path = "./libs/compute_api/" }
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" } consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
@@ -220,7 +214,6 @@ remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" } safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
desim = { version = "0.1", path = "./libs/desim" } desim = { version = "0.1", path = "./libs/desim" }
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy. storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
storage_controller_client = { path = "./storage_controller/client" }
tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" } tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" } tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
utils = { version = "0.1", path = "./libs/utils/" } utils = { version = "0.1", path = "./libs/utils/" }
@@ -239,12 +232,13 @@ tonic-build = "0.9"
[patch.crates-io] [patch.crates-io]
# Needed to get `tokio-postgres-rustls` to depend on our fork. # This is only needed for proxy's tests.
# TODO: we should probably fork `tokio-postgres-rustls` instead.
tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" } tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
# bug fixes for UUID # bug fixes for UUID
parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" } parquet = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
parquet_derive = { git = "https://github.com/apache/arrow-rs", branch = "master" } parquet_derive = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
################# Binary contents sections ################# Binary contents sections

View File

@@ -17,7 +17,7 @@ COPY --chown=nonroot pgxn pgxn
COPY --chown=nonroot Makefile Makefile COPY --chown=nonroot Makefile Makefile
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
ENV BUILD_TYPE=release ENV BUILD_TYPE release
RUN set -e \ RUN set -e \
&& mold -run make -j $(nproc) -s neon-pg-ext \ && mold -run make -j $(nproc) -s neon-pg-ext \
&& rm -rf pg_install/build \ && rm -rf pg_install/build \
@@ -29,15 +29,25 @@ WORKDIR /home/nonroot
ARG GIT_VERSION=local ARG GIT_VERSION=local
ARG BUILD_TAG ARG BUILD_TAG
# Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
# Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
# cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
ARG RUSTC_WRAPPER=cachepot
ENV AWS_REGION=eu-central-1
ENV CACHEPOT_S3_KEY_PREFIX=cachepot
ARG CACHEPOT_BUCKET=neon-github-dev
#ARG AWS_ACCESS_KEY_ID
#ARG AWS_SECRET_ACCESS_KEY
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
COPY --chown=nonroot . . COPY --chown=nonroot . .
ARG ADDITIONAL_RUSTFLAGS # Show build caching stats to check if it was used in the end.
# Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
RUN set -e \ RUN set -e \
&& PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \ && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build \
--bin pg_sni_router \ --bin pg_sni_router \
--bin pageserver \ --bin pageserver \
--bin pagectl \ --bin pagectl \
@@ -46,8 +56,8 @@ RUN set -e \
--bin storage_controller \ --bin storage_controller \
--bin proxy \ --bin proxy \
--bin neon_local \ --bin neon_local \
--bin storage_scrubber \ --locked --release \
--locked --release && cachepot -s
# Build final image # Build final image
# #
@@ -59,6 +69,8 @@ RUN set -e \
&& apt install -y \ && apt install -y \
libreadline-dev \ libreadline-dev \
libseccomp-dev \ libseccomp-dev \
libicu67 \
openssl \
ca-certificates \ ca-certificates \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& useradd -d /data neon \ && useradd -d /data neon \
@@ -72,7 +84,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_controller /usr/local/bin COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_controller /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local /usr/local/bin COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local /usr/local/bin
COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber /usr/local/bin
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/ COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/ COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
@@ -81,24 +92,20 @@ COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config. # By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
# Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values. # Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
RUN mkdir -p /data/.neon/ && \ RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
echo "id=1234" > "/data/.neon/identity.toml" && \ && /usr/local/bin/pageserver -D /data/.neon/ --init \
echo "broker_endpoint='http://storage_broker:50051'\n" \ -c "id=1234" \
"pg_distrib_dir='/usr/local/'\n" \ -c "broker_endpoint='http://storage_broker:50051'" \
"listen_pg_addr='0.0.0.0:6400'\n" \ -c "pg_distrib_dir='/usr/local/'" \
"listen_http_addr='0.0.0.0:9898'\n" \ -c "listen_pg_addr='0.0.0.0:6400'" \
> /data/.neon/pageserver.toml && \ -c "listen_http_addr='0.0.0.0:9898'"
chown -R neon:neon /data/.neon
# When running a binary that links with libpq, default to using our most recent postgres version. Binaries # When running a binary that links with libpq, default to using our most recent postgres version. Binaries
# that want a particular postgres version will select it explicitly: this is just a default. # that want a particular postgres version will select it explicitly: this is just a default.
ENV LD_LIBRARY_PATH=/usr/local/v16/lib ENV LD_LIBRARY_PATH /usr/local/v16/lib
VOLUME ["/data"] VOLUME ["/data"]
USER neon USER neon
EXPOSE 6400 EXPOSE 6400
EXPOSE 9898 EXPOSE 9898
CMD ["/usr/local/bin/pageserver", "-D", "/data/.neon"]

View File

@@ -1,13 +1,5 @@
FROM debian:bullseye-slim FROM debian:bullseye-slim
# Use ARG as a build-time environment variable here to allow.
# It's not supposed to be set outside.
# Alternatively it can be obtained using the following command
# ```
# . /etc/os-release && echo "${VERSION_CODENAME}"
# ```
ARG DEBIAN_VERSION_CODENAME=bullseye
# Add nonroot user # Add nonroot user
RUN useradd -ms /bin/bash nonroot -b /home RUN useradd -ms /bin/bash nonroot -b /home
SHELL ["/bin/bash", "-c"] SHELL ["/bin/bash", "-c"]
@@ -34,6 +26,7 @@ RUN set -e \
liblzma-dev \ liblzma-dev \
libncurses5-dev \ libncurses5-dev \
libncursesw5-dev \ libncursesw5-dev \
libpq-dev \
libreadline-dev \ libreadline-dev \
libseccomp-dev \ libseccomp-dev \
libsqlite3-dev \ libsqlite3-dev \
@@ -58,40 +51,29 @@ RUN set -e \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# protobuf-compiler (protoc) # protobuf-compiler (protoc)
ENV PROTOC_VERSION=25.1 ENV PROTOC_VERSION 25.1
RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \ RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
&& unzip -q protoc.zip -d protoc \ && unzip -q protoc.zip -d protoc \
&& mv protoc/bin/protoc /usr/local/bin/protoc \ && mv protoc/bin/protoc /usr/local/bin/protoc \
&& mv protoc/include/google /usr/local/include/google \ && mv protoc/include/google /usr/local/include/google \
&& rm -rf protoc.zip protoc && rm -rf protoc.zip protoc
# s5cmd
ENV S5CMD_VERSION=2.2.2
RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/s5cmd_${S5CMD_VERSION}_Linux-$(uname -m | sed 's/x86_64/64bit/g' | sed 's/aarch64/arm64/g').tar.gz" | tar zxvf - s5cmd \
&& chmod +x s5cmd \
&& mv s5cmd /usr/local/bin/s5cmd
# LLVM # LLVM
ENV LLVM_VERSION=18 ENV LLVM_VERSION=17
RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \ RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
&& echo "deb http://apt.llvm.org/${DEBIAN_VERSION_CODENAME}/ llvm-toolchain-${DEBIAN_VERSION_CODENAME}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \ && echo "deb http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
&& apt update \ && apt update \
&& apt install -y clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \ && apt install -y clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \
&& bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \ && bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Install docker # PostgreSQL 14
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \ RUN curl -fsSL 'https://www.postgresql.org/media/keys/ACCC4CF8.asc' | apt-key add - \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION_CODENAME} stable" > /etc/apt/sources.list.d/docker.list \ && echo 'deb http://apt.postgresql.org/pub/repos/apt bullseye-pgdg main' > /etc/apt/sources.list.d/pgdg.list \
&& apt update \ && apt update \
&& apt install -y docker-ce docker-ce-cli \ && apt install -y postgresql-client-14 \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Configure sudo & docker
RUN usermod -aG sudo nonroot && \
echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \
usermod -aG docker nonroot
# AWS CLI # AWS CLI
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip" \ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip" \
&& unzip -q awscliv2.zip \ && unzip -q awscliv2.zip \
@@ -99,7 +81,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
&& rm awscliv2.zip && rm awscliv2.zip
# Mold: A Modern Linker # Mold: A Modern Linker
ENV MOLD_VERSION=v2.33.0 ENV MOLD_VERSION v2.4.0
RUN set -e \ RUN set -e \
&& git clone https://github.com/rui314/mold.git \ && git clone https://github.com/rui314/mold.git \
&& mkdir mold/build \ && mkdir mold/build \
@@ -124,51 +106,12 @@ RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JS
&& make install \ && make install \
&& rm -rf ../lcov.tar.gz && rm -rf ../lcov.tar.gz
# Compile and install the static OpenSSL library
ENV OPENSSL_VERSION=1.1.1w
ENV OPENSSL_PREFIX=/usr/local/openssl
RUN wget -O /tmp/openssl-${OPENSSL_VERSION}.tar.gz https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz && \
echo "cf3098950cb4d853ad95c0841f1f9c6d3dc102dccfcacd521d93925208b76ac8 /tmp/openssl-${OPENSSL_VERSION}.tar.gz" | sha256sum --check && \
cd /tmp && \
tar xzvf /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
rm /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
cd /tmp/openssl-${OPENSSL_VERSION} && \
./config --prefix=${OPENSSL_PREFIX} -static --static no-shared -fPIC && \
make -j "$(nproc)" && \
make install && \
cd /tmp && \
rm -rf /tmp/openssl-${OPENSSL_VERSION}
# Use the same version of libicu as the compute nodes so that
# clusters created using inidb on pageserver can be used by computes.
#
# TODO: at this time, Dockerfile.compute-node uses the debian bullseye libicu
# package, which is 67.1. We're duplicating that knowledge here, and also, technically,
# Debian has a few patches on top of 67.1 that we're not adding here.
ENV ICU_VERSION=67.1
ENV ICU_PREFIX=/usr/local/icu
# Download and build static ICU
RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \
echo "94a80cd6f251a53bd2a997f6f1b5ac6653fe791dfab66e1eb0227740fb86d5dc /tmp/libicu-${ICU_VERSION}.tgz" | sha256sum --check && \
mkdir /tmp/icu && \
pushd /tmp/icu && \
tar -xzf /tmp/libicu-${ICU_VERSION}.tgz && \
pushd icu/source && \
./configure --prefix=${ICU_PREFIX} --enable-static --enable-shared=no CXXFLAGS="-fPIC" CFLAGS="-fPIC" && \
make -j "$(nproc)" && \
make install && \
popd && \
rm -rf icu && \
rm -f /tmp/libicu-${ICU_VERSION}.tgz && \
popd
# Switch to nonroot user # Switch to nonroot user
USER nonroot:nonroot USER nonroot:nonroot
WORKDIR /home/nonroot WORKDIR /home/nonroot
# Python # Python
ENV PYTHON_VERSION=3.9.19 \ ENV PYTHON_VERSION=3.9.18 \
PYENV_ROOT=/home/nonroot/.pyenv \ PYENV_ROOT=/home/nonroot/.pyenv \
PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
RUN set -e \ RUN set -e \
@@ -192,14 +135,9 @@ WORKDIR /home/nonroot
# Rust # Rust
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`) # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
ENV RUSTC_VERSION=1.80.1 ENV RUSTC_VERSION=1.77.0
ENV RUSTUP_HOME="/home/nonroot/.rustup" ENV RUSTUP_HOME="/home/nonroot/.rustup"
ENV PATH="/home/nonroot/.cargo/bin:${PATH}" ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
ARG RUSTFILT_VERSION=0.2.1
ARG CARGO_HAKARI_VERSION=0.9.30
ARG CARGO_DENY_VERSION=0.16.1
ARG CARGO_HACK_VERSION=0.6.31
ARG CARGO_NEXTEST_VERSION=0.9.72
RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
chmod +x rustup-init && \ chmod +x rustup-init && \
./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \ ./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
@@ -208,13 +146,15 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
. "$HOME/.cargo/env" && \ . "$HOME/.cargo/env" && \
cargo --version && rustup --version && \ cargo --version && rustup --version && \
rustup component add llvm-tools-preview rustfmt clippy && \ rustup component add llvm-tools-preview rustfmt clippy && \
cargo install rustfilt --version ${RUSTFILT_VERSION} && \ cargo install --git https://github.com/paritytech/cachepot && \
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} && \ cargo install rustfilt && \
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \ cargo install cargo-hakari && \
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \ cargo install cargo-deny --locked && \
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \ cargo install cargo-hack && \
cargo install cargo-nextest && \
rm -rf /home/nonroot/.cargo/registry && \ rm -rf /home/nonroot/.cargo/registry && \
rm -rf /home/nonroot/.cargo/git rm -rf /home/nonroot/.cargo/git
ENV RUSTC_WRAPPER=cachepot
# Show versions # Show versions
RUN whoami \ RUN whoami \
@@ -224,6 +164,3 @@ RUN whoami \
&& rustup --version --verbose \ && rustup --version --verbose \
&& rustc --version --verbose \ && rustc --version --verbose \
&& clang --version && clang --version
# Set following flag to check in Makefile if its running in Docker
RUN touch /home/nonroot/.docker_build

View File

@@ -89,16 +89,16 @@ RUN apt update && \
# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2 # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \ echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \ mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \ cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \ DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
make clean && cp -R /sfcgal/* / make clean && cp -R /sfcgal/* /
ENV PATH="/usr/local/pgsql/bin:$PATH" ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \ echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \ mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\ find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
./autogen.sh && \ ./autogen.sh && \
./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \ ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
@@ -124,7 +124,7 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postg
RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \ RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \ echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \ mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
mkdir build && cd build && \ mkdir build && cd build && \
cmake -DCMAKE_BUILD_TYPE=Release .. && \ cmake -DCMAKE_BUILD_TYPE=Release .. && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -149,7 +149,7 @@ RUN apt update && \
RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \ RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \ echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \ mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
# generate and copy upgrade scripts # generate and copy upgrade scripts
mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \ mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \
cp upgrade/* /usr/local/pgsql/share/extension/ && \ cp upgrade/* /usr/local/pgsql/share/extension/ && \
@@ -194,7 +194,7 @@ RUN case "$(uname -m)" in \
RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \ echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \ mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
mkdir build && cd build && \ mkdir build && cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release && \ cmake .. -DCMAKE_BUILD_TYPE=Release && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -204,7 +204,7 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz
RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \ echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \ mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \ export PATH="/usr/local/pgsql/bin:$PATH" && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \ make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -222,7 +222,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \ echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \ mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
# unit extension's "create extension" script relies on absolute install path to fill some reference tables. # unit extension's "create extension" script relies on absolute install path to fill some reference tables.
@@ -241,17 +241,11 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
FROM build-deps AS vector-pg-build FROM build-deps AS vector-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY patches/pgvector.patch /pgvector.patch RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.5.1.tar.gz -O pgvector.tar.gz && \
echo "cc7a8e034a96e30a819911ac79d32f6bc47bdd1aa2de4d7d4904e26b83209dc8 pgvector.tar.gz" | sha256sum --check && \
# By default, pgvector Makefile uses `-march=native`. We don't want that, mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
# because we build the images on different machines than where we run them. make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
# Pass OPTFLAGS="" to remove it. make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \
mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
patch -p1 < /pgvector.patch && \
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
######################################################################################### #########################################################################################
@@ -266,7 +260,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021 # 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \ RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \ echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \ mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
@@ -281,7 +275,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \ RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \ echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \ mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
@@ -297,7 +291,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \ echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \ mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
@@ -311,12 +305,9 @@ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz
FROM build-deps AS rum-pg-build FROM build-deps AS rum-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY patches/rum.patch /rum.patch
RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \ RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \ echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \ mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
patch -p1 < /rum.patch && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
@@ -332,7 +323,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \ RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \ echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \ mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
@@ -348,7 +339,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \ RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \ echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \ mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
@@ -364,7 +355,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \ RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \ echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \ mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
@@ -380,7 +371,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \ RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \ echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \ mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
@@ -396,7 +387,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \ RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \ echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \ mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
@@ -411,7 +402,7 @@ FROM build-deps AS timescaledb-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ARG PG_VERSION ARG PG_VERSION
ENV PATH="/usr/local/pgsql/bin:$PATH" ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in \ RUN case "${PG_VERSION}" in \
"v14" | "v15") \ "v14" | "v15") \
@@ -427,7 +418,7 @@ RUN case "${PG_VERSION}" in \
apt-get install -y cmake && \ apt-get install -y cmake && \
wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \ wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \ echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
mkdir timescaledb-src && cd timescaledb-src && tar xzf ../timescaledb.tar.gz --strip-components=1 -C . && \ mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \ ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
cd build && \ cd build && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -444,7 +435,7 @@ FROM build-deps AS pg-hint-plan-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ARG PG_VERSION ARG PG_VERSION
ENV PATH="/usr/local/pgsql/bin:$PATH" ENV PATH "/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in \ RUN case "${PG_VERSION}" in \
"v14") \ "v14") \
@@ -465,11 +456,36 @@ RUN case "${PG_VERSION}" in \
esac && \ esac && \
wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \ wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \
echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \ echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \
mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \ mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
make install -j $(getconf _NPROCESSORS_ONLN) && \ make install -j $(getconf _NPROCESSORS_ONLN) && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control
#########################################################################################
#
# Layer "kq-imcx-pg-build"
# compile kq_imcx extension
#
#########################################################################################
FROM build-deps AS kq-imcx-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN apt-get update && \
apt-get install -y git libgtk2.0-dev libpq-dev libpam-dev libxslt-dev libkrb5-dev cmake && \
wget https://github.com/ketteq-neon/postgres-exts/archive/e0bd1a9d9313d7120c1b9c7bb15c48c0dede4c4e.tar.gz -O kq_imcx.tar.gz && \
echo "dc93a97ff32d152d32737ba7e196d9687041cda15e58ab31344c2f2de8855336 kq_imcx.tar.gz" | sha256sum --check && \
mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
mkdir build && cd build && \
cmake -DCMAKE_BUILD_TYPE=Release .. && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control && \
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\
mkdir -p /extensions/kq_imcx && cp /usr/local/pgsql/share/extension/kq_imcx.control /extensions/kq_imcx && \
sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \
comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/kq_imcx.tar.zst -T -
######################################################################################### #########################################################################################
# #
@@ -480,10 +496,10 @@ RUN case "${PG_VERSION}" in \
FROM build-deps AS pg-cron-pg-build FROM build-deps AS pg-cron-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \ RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \ echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \ mkdir pg_cron-src && cd pg_cron-src && tar xvzf ../pg_cron.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \ make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
@@ -506,10 +522,10 @@ RUN apt-get update && \
libboost-system1.74-dev \ libboost-system1.74-dev \
libeigen3-dev libeigen3-dev
ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH" ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \ echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \ mkdir rdkit-src && cd rdkit-src && tar xvzf ../rdkit.tar.gz --strip-components=1 -C . && \
cmake \ cmake \
-D RDK_BUILD_CAIRO_SUPPORT=OFF \ -D RDK_BUILD_CAIRO_SUPPORT=OFF \
-D RDK_BUILD_INCHI_SUPPORT=ON \ -D RDK_BUILD_INCHI_SUPPORT=ON \
@@ -546,10 +562,10 @@ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.
FROM build-deps AS pg-uuidv7-pg-build FROM build-deps AS pg-uuidv7-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \ RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \ echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \ mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xvzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \ make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
@@ -563,10 +579,10 @@ RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz
FROM build-deps AS pg-roaringbitmap-pg-build FROM build-deps AS pg-roaringbitmap-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \ RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \ echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \ mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xvzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \ make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
@@ -580,10 +596,10 @@ RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4
FROM build-deps AS pg-semver-pg-build FROM build-deps AS pg-semver-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \ RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \ echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \ mkdir pg_semver-src && cd pg_semver-src && tar xvzf ../pg_semver.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \ make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control
@@ -598,7 +614,7 @@ FROM build-deps AS pg-embedding-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ARG PG_VERSION ARG PG_VERSION
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in \ RUN case "${PG_VERSION}" in \
"v14" | "v15") \ "v14" | "v15") \
export PG_EMBEDDING_VERSION=0.3.5 \ export PG_EMBEDDING_VERSION=0.3.5 \
@@ -609,7 +625,7 @@ RUN case "${PG_VERSION}" in \
esac && \ esac && \
wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/${PG_EMBEDDING_VERSION}.tar.gz -O pg_embedding.tar.gz && \ wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/${PG_EMBEDDING_VERSION}.tar.gz -O pg_embedding.tar.gz && \
echo "${PG_EMBEDDING_CHECKSUM} pg_embedding.tar.gz" | sha256sum --check && \ echo "${PG_EMBEDDING_CHECKSUM} pg_embedding.tar.gz" | sha256sum --check && \
mkdir pg_embedding-src && cd pg_embedding-src && tar xzf ../pg_embedding.tar.gz --strip-components=1 -C . && \ mkdir pg_embedding-src && cd pg_embedding-src && tar xvzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install make -j $(getconf _NPROCESSORS_ONLN) install
@@ -622,10 +638,10 @@ RUN case "${PG_VERSION}" in \
FROM build-deps AS pg-anon-pg-build FROM build-deps AS pg-anon-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \ RUN wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \ echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \ mkdir pg_anon-src && cd pg_anon-src && tar xvzf ../pg_anon.tar.gz --strip-components=1 -C . && \
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\ find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \
@@ -657,7 +673,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
chmod +x rustup-init && \ chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
rm rustup-init && \ rm rustup-init && \
cargo install --locked --version 0.11.3 cargo-pgrx && \ cargo install --locked --version 0.10.2 cargo-pgrx && \
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config' /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
USER root USER root
@@ -672,15 +688,10 @@ USER root
FROM rust-extensions-build AS pg-jsonschema-pg-build FROM rust-extensions-build AS pg-jsonschema-pg-build
ARG PG_VERSION ARG PG_VERSION
RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \ RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.2.0.tar.gz -O pg_jsonschema.tar.gz && \
echo "61df3db1ed83cf24f6aa39c826f8818bfa4f0bd33b587fd6b2b1747985642297 pg_jsonschema.tar.gz" | sha256sum --check && \ echo "9118fc508a6e231e7a39acaa6f066fcd79af17a5db757b47d2eefbe14f7794f0 pg_jsonschema.tar.gz" | sha256sum --check && \
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \ mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
# see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8 sed -i 's/pgrx = "0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
# `unsafe-postgres` feature allows to build pgx extensions
# against postgres forks that decided to change their ABI name (like us).
# With that we can build extensions without forking them and using stock
# pgx. As this feature is new few manual version bumps were required.
sed -i 's/pgrx = "0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
cargo pgrx install --release && \ cargo pgrx install --release && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
@@ -694,10 +705,10 @@ RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.
FROM rust-extensions-build AS pg-graphql-pg-build FROM rust-extensions-build AS pg-graphql-pg-build
ARG PG_VERSION ARG PG_VERSION
RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \ RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.4.0.tar.gz -O pg_graphql.tar.gz && \
echo "2b3e567a5b31019cb97ae0e33263c1bcc28580be5a444ac4c8ece5c4be2aea41 pg_graphql.tar.gz" | sha256sum --check && \ echo "bd8dc7230282b3efa9ae5baf053a54151ed0e66881c7c53750e2d0c765776edc pg_graphql.tar.gz" | sha256sum --check && \
mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \ mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \ sed -i 's/pgrx = "=0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
cargo pgrx install --release && \ cargo pgrx install --release && \
# it's needed to enable extension because it uses untrusted C language # it's needed to enable extension because it uses untrusted C language
sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \ sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
@@ -716,10 +727,7 @@ ARG PG_VERSION
# 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023 # 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \ RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \ echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \ mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
# TODO update pgrx version in the pg_tiktoken repo and remove this line
sed -i 's/pgrx = { version = "=0.10.2",/pgrx = { version = "0.11.3",/g' Cargo.toml && \
sed -i 's/pgrx-tests = "=0.10.2"/pgrx-tests = "0.11.3"/g' Cargo.toml && \
cargo pgrx install --release && \ cargo pgrx install --release && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
@@ -733,10 +741,14 @@ RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6
FROM rust-extensions-build AS pg-pgx-ulid-build FROM rust-extensions-build AS pg-pgx-ulid-build
ARG PG_VERSION ARG PG_VERSION
RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -O pgx_ulid.tar.gz && \
echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \ echo "ee5db82945d2d9f2d15597a80cf32de9dca67b897f605beb830561705f12683c pgx_ulid.tar.gz" | sha256sum --check && \
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \ mkdir pgx_ulid-src && cd pgx_ulid-src && tar xvzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
sed -i 's/pgrx = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \ echo "******************* Apply a patch for Postgres 16 support; delete in the next release ******************" && \
wget https://github.com/pksunkara/pgx_ulid/commit/f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
patch -p1 < f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
echo "********************************************************************************************************" && \
sed -i 's/pgrx = "=0.10.2"/pgrx = { version = "=0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
cargo pgrx install --release && \ cargo pgrx install --release && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control
@@ -750,10 +762,10 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -
FROM build-deps AS wal2json-pg-build FROM build-deps AS wal2json-pg-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \ echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \ mkdir wal2json-src && cd wal2json-src && tar xvzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install make -j $(getconf _NPROCESSORS_ONLN) install
@@ -766,10 +778,10 @@ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.
FROM build-deps AS pg-ivm-build FROM build-deps AS pg-ivm-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \ RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \ echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \ mkdir pg_ivm-src && cd pg_ivm-src && tar xvzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \ make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_ivm.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_ivm.control
@@ -783,10 +795,10 @@ RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_iv
FROM build-deps AS pg-partman-build FROM build-deps AS pg-partman-build
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH "/usr/local/pgsql/bin/:$PATH"
RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \ RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \ echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \ mkdir pg_partman-src && cd pg_partman-src && tar xvzf ../pg_partman.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \ make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control
@@ -822,6 +834,7 @@ COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -909,70 +922,6 @@ RUN rm -r /usr/local/pgsql/include
# if they were to be used by other libraries. # if they were to be used by other libraries.
RUN rm /usr/local/pgsql/lib/lib*.a RUN rm /usr/local/pgsql/lib/lib*.a
#########################################################################################
#
# Layer neon-pg-ext-test
#
#########################################################################################
FROM neon-pg-ext-build AS neon-pg-ext-test
ARG PG_VERSION
RUN mkdir /ext-src
#COPY --from=postgis-build /postgis.tar.gz /ext-src/
#COPY --from=postgis-build /sfcgal/* /usr
COPY --from=plv8-build /plv8.tar.gz /ext-src/
COPY --from=h3-pg-build /h3-pg.tar.gz /ext-src/
COPY --from=unit-pg-build /postgresql-unit.tar.gz /ext-src/
COPY --from=vector-pg-build /pgvector.tar.gz /ext-src/
COPY --from=vector-pg-build /pgvector.patch /ext-src/
COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
#COPY --from=pg-jsonschema-pg-build /home/nonroot/pg_jsonschema.tar.gz /ext-src
#COPY --from=pg-graphql-pg-build /home/nonroot/pg_graphql.tar.gz /ext-src
#COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
COPY --from=rum-pg-build /rum.tar.gz /ext-src
COPY patches/rum.patch /ext-src
#COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
COPY --from=hll-pg-build /hll.tar.gz /ext-src
COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
#COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
COPY patches/pg_hintplan.patch /ext-src
COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
COPY patches/pg_cron.patch /ext-src
#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
#COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
COPY patches/pg_anon.patch /ext-src
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
RUN cd /ext-src/ && for f in *.tar.gz; \
do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
|| exit 1; rm -f $f; done
RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
# cmake is required for the h3 test
RUN apt-get update && apt-get install -y cmake
RUN patch -p1 < /ext-src/pg_hintplan.patch
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
RUN patch -p1 </ext-src/pg_anon.patch
RUN patch -p1 </ext-src/pg_cron.patch
ENV PATH=/usr/local/pgsql/bin:$PATH
ENV PGHOST=compute
ENV PGPORT=55433
ENV PGUSER=cloud_admin
ENV PGDATABASE=postgres
######################################################################################### #########################################################################################
# #
# Final layer # Final layer
@@ -995,9 +944,6 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
# Create remote extension download directory
RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions
# Install: # Install:
# libreadline8 for psql # libreadline8 for psql
# libicu67, locales for collations (including ICU and plpgsql_check) # libicu67, locales for collations (including ICU and plpgsql_check)
@@ -1034,6 +980,6 @@ RUN apt update && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
ENV LANG=en_US.utf8 ENV LANG en_US.utf8
USER postgres USER postgres
ENTRYPOINT ["/usr/local/bin/compute_ctl"] ENTRYPOINT ["/usr/local/bin/compute_ctl"]

View File

@@ -3,9 +3,6 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
# Where to install Postgres, default is ./pg_install, maybe useful for package managers # Where to install Postgres, default is ./pg_install, maybe useful for package managers
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/ POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
OPENSSL_PREFIX_DIR := /usr/local/openssl
ICU_PREFIX_DIR := /usr/local/icu
# #
# We differentiate between release / debug build types using the BUILD_TYPE # We differentiate between release / debug build types using the BUILD_TYPE
# environment variable. # environment variable.
@@ -23,31 +20,19 @@ else
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options) $(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
endif endif
ifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)
# Exclude static build openssl, icu for local build (MacOS, Linux)
# Only keep for build type release and debug
PG_CFLAGS += -I$(OPENSSL_PREFIX_DIR)/include
PG_CONFIGURE_OPTS += --with-icu
PG_CONFIGURE_OPTS += ICU_CFLAGS='-I/$(ICU_PREFIX_DIR)/include -DU_STATIC_IMPLEMENTATION'
PG_CONFIGURE_OPTS += ICU_LIBS='-L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -licui18n -licuuc -licudata -lstdc++ -Wl,-Bdynamic -lm'
PG_CONFIGURE_OPTS += LDFLAGS='-L$(OPENSSL_PREFIX_DIR)/lib -L$(OPENSSL_PREFIX_DIR)/lib64 -L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -Wl,-Bstatic -lssl -lcrypto -Wl,-Bdynamic -lrt -lm -ldl -lpthread'
endif
UNAME_S := $(shell uname -s) UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Linux) ifeq ($(UNAME_S),Linux)
# Seccomp BPF is only available for Linux # Seccomp BPF is only available for Linux
PG_CONFIGURE_OPTS += --with-libseccomp PG_CONFIGURE_OPTS += --with-libseccomp
else ifeq ($(UNAME_S),Darwin) else ifeq ($(UNAME_S),Darwin)
ifndef DISABLE_HOMEBREW # macOS with brew-installed openssl requires explicit paths
# macOS with brew-installed openssl requires explicit paths # It can be configured with OPENSSL_PREFIX variable
# It can be configured with OPENSSL_PREFIX variable OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
OPENSSL_PREFIX := $(shell brew --prefix openssl@3) PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig # macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure # brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
endif
endif endif
# Use -C option so that when PostgreSQL "make install" installs the # Use -C option so that when PostgreSQL "make install" installs the
@@ -69,8 +54,6 @@ CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
# Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel) # Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
# #
# Top level Makefile to build Neon and PostgreSQL # Top level Makefile to build Neon and PostgreSQL
# #
@@ -81,38 +64,26 @@ all: neon postgres neon-pg-ext
# #
# The 'postgres_ffi' depends on the Postgres headers. # The 'postgres_ffi' depends on the Postgres headers.
.PHONY: neon .PHONY: neon
neon: postgres-headers walproposer-lib cargo-target-dir neon: postgres-headers walproposer-lib
+@echo "Compiling Neon" +@echo "Compiling Neon"
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS) $(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
.PHONY: cargo-target-dir
cargo-target-dir:
# https://github.com/rust-lang/cargo/issues/14281
mkdir -p target
test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG
### PostgreSQL parts ### PostgreSQL parts
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor # Some rules are duplicated for Postgres v14 and 15. We may want to refactor
# to avoid the duplication in the future, but it's tolerable for now. # to avoid the duplication in the future, but it's tolerable for now.
# #
$(POSTGRES_INSTALL_DIR)/build/%/config.status: $(POSTGRES_INSTALL_DIR)/build/%/config.status:
mkdir -p $(POSTGRES_INSTALL_DIR)
test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
+@echo "Configuring Postgres $* build" +@echo "Configuring Postgres $* build"
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \ @test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \ echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \ echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
exit 1; } exit 1; }
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
VERSION=$*; \ env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
CFLAGS='$(PG_CFLAGS)' \ CFLAGS='$(PG_CFLAGS)' \
$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \ $(PG_CONFIGURE_OPTS) \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log) --prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log)
# nicer alias to run 'configure' # nicer alias to run 'configure'
# Note: I've been unable to use templates for this part of our configuration. # Note: I've been unable to use templates for this part of our configuration.
@@ -148,8 +119,6 @@ postgres-%: postgres-configure-% \
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
+@echo "Compiling amcheck $*" +@echo "Compiling amcheck $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
+@echo "Compiling test_decoding $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
.PHONY: postgres-clean-% .PHONY: postgres-clean-%
postgres-clean-%: postgres-clean-%:

View File

@@ -1,6 +1,4 @@
[![Neon](https://github.com/neondatabase/neon/assets/11527560/f15a17f0-836e-40c5-b35d-030606a6b660)](https://neon.tech) [![Neon](https://user-images.githubusercontent.com/13738772/236813940-dcfdcb5b-69d3-449b-a686-013febe834d4.png)](https://neon.tech)
# Neon # Neon
@@ -126,7 +124,7 @@ make -j`sysctl -n hw.logicalcpu` -s
To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively. To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
To run the integration tests or Python scripts (not required to use the code), install To run the integration tests or Python scripts (not required to use the code), install
Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory. Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.3](https://python-poetry.org/)) in the project directory.
#### Running neon database #### Running neon database
@@ -262,7 +260,7 @@ By default, this runs both debug and release modes, and all supported postgres v
testing locally, it is convenient to run just one set of permutations, like this: testing locally, it is convenient to run just one set of permutations, like this:
```sh ```sh
DEFAULT_PG_VERSION=16 BUILD_TYPE=release ./scripts/pytest DEFAULT_PG_VERSION=15 BUILD_TYPE=release ./scripts/pytest
``` ```
## Flamegraphs ## Flamegraphs

View File

@@ -4,11 +4,6 @@ version = "0.1.0"
edition.workspace = true edition.workspace = true
license.workspace = true license.workspace = true
[features]
default = []
# Enables test specific features.
testing = []
[dependencies] [dependencies]
anyhow.workspace = true anyhow.workspace = true
async-compression.workspace = true async-compression.workspace = true
@@ -32,12 +27,10 @@ reqwest = { workspace = true, features = ["json"] }
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tokio-postgres.workspace = true tokio-postgres.workspace = true
tokio-util.workspace = true tokio-util.workspace = true
tokio-stream.workspace = true
tracing.workspace = true tracing.workspace = true
tracing-opentelemetry.workspace = true tracing-opentelemetry.workspace = true
tracing-subscriber.workspace = true tracing-subscriber.workspace = true
tracing-utils.workspace = true tracing-utils.workspace = true
thiserror.workspace = true
url.workspace = true url.workspace = true
compute_api.workspace = true compute_api.workspace = true
@@ -49,4 +42,3 @@ vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
zstd = "0.13" zstd = "0.13"
bytes = "1.0" bytes = "1.0"
rust-ini = "0.20.0" rust-ini = "0.20.0"
rlimit = "0.10.1"

View File

@@ -6,7 +6,7 @@
//! - Every start is a fresh start, so the data directory is removed and //! - Every start is a fresh start, so the data directory is removed and
//! initialized again on each run. //! initialized again on each run.
//! - If remote_extension_config is provided, it will be used to fetch extensions list //! - If remote_extension_config is provided, it will be used to fetch extensions list
//! and download `shared_preload_libraries` from the remote storage. //! and download `shared_preload_libraries` from the remote storage.
//! - Next it will put configuration files into the `PGDATA` directory. //! - Next it will put configuration files into the `PGDATA` directory.
//! - Sync safekeepers and get commit LSN. //! - Sync safekeepers and get commit LSN.
//! - Get `basebackup` from pageserver using the returned on the previous step LSN. //! - Get `basebackup` from pageserver using the returned on the previous step LSN.
@@ -33,6 +33,7 @@
//! -b /usr/local/bin/postgres \ //! -b /usr/local/bin/postgres \
//! -r http://pg-ext-s3-gateway \ //! -r http://pg-ext-s3-gateway \
//! ``` //! ```
//!
use std::collections::HashMap; use std::collections::HashMap;
use std::fs::File; use std::fs::File;
use std::path::Path; use std::path::Path;
@@ -46,11 +47,10 @@ use chrono::Utc;
use clap::Arg; use clap::Arg;
use signal_hook::consts::{SIGQUIT, SIGTERM}; use signal_hook::consts::{SIGQUIT, SIGTERM};
use signal_hook::{consts::SIGINT, iterator::Signals}; use signal_hook::{consts::SIGINT, iterator::Signals};
use tracing::{error, info, warn}; use tracing::{error, info};
use url::Url; use url::Url;
use compute_api::responses::ComputeStatus; use compute_api::responses::ComputeStatus;
use compute_api::spec::ComputeSpec;
use compute_tools::compute::{ use compute_tools::compute::{
forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID, forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
@@ -62,45 +62,12 @@ use compute_tools::logger::*;
use compute_tools::monitor::launch_monitor; use compute_tools::monitor::launch_monitor;
use compute_tools::params::*; use compute_tools::params::*;
use compute_tools::spec::*; use compute_tools::spec::*;
use compute_tools::swap::resize_swap;
use rlimit::{setrlimit, Resource};
// this is an arbitrary build tag. Fine as a default / for testing purposes // this is an arbitrary build tag. Fine as a default / for testing purposes
// in-case of not-set environment var // in-case of not-set environment var
const BUILD_TAG_DEFAULT: &str = "latest"; const BUILD_TAG_DEFAULT: &str = "latest";
fn main() -> Result<()> { fn main() -> Result<()> {
let (build_tag, clap_args) = init()?;
// enable core dumping for all child processes
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
let (pg_handle, start_pg_result) = {
// Enter startup tracing context
let _startup_context_guard = startup_context_from_env();
let cli_args = process_cli(&clap_args)?;
let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
start_postgres(&clap_args, wait_spec_result)?
// Startup is finished, exit the startup tracing span
};
// PostgreSQL is now running, if startup was successful. Wait until it exits.
let wait_pg_result = wait_postgres(pg_handle)?;
let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
maybe_delay_exit(delay_exit);
deinit_and_exit(wait_pg_result);
}
fn init() -> Result<(String, clap::ArgMatches)> {
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?; init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?; let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
@@ -115,15 +82,9 @@ fn init() -> Result<(String, clap::ArgMatches)> {
.to_string(); .to_string();
info!("build_tag: {build_tag}"); info!("build_tag: {build_tag}");
Ok((build_tag, cli().get_matches())) let matches = cli().get_matches();
} let pgbin_default = String::from("postgres");
let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);
fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
let pgbin_default = "postgres";
let pgbin = matches
.get_one::<String>("pgbin")
.map(|s| s.as_str())
.unwrap_or(pgbin_default);
let ext_remote_storage = matches let ext_remote_storage = matches
.get_one::<String>("remote-ext-config") .get_one::<String>("remote-ext-config")
@@ -149,32 +110,7 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
.expect("Postgres connection string is required"); .expect("Postgres connection string is required");
let spec_json = matches.get_one::<String>("spec"); let spec_json = matches.get_one::<String>("spec");
let spec_path = matches.get_one::<String>("spec-path"); let spec_path = matches.get_one::<String>("spec-path");
let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
Ok(ProcessCliResult {
connstr,
pgdata,
pgbin,
ext_remote_storage,
http_port,
spec_json,
spec_path,
resize_swap_on_bind,
})
}
struct ProcessCliResult<'clap> {
connstr: &'clap str,
pgdata: &'clap str,
pgbin: &'clap str,
ext_remote_storage: Option<&'clap str>,
http_port: u16,
spec_json: Option<&'clap String>,
spec_path: Option<&'clap String>,
resize_swap_on_bind: bool,
}
fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
// Extract OpenTelemetry context for the startup actions from the // Extract OpenTelemetry context for the startup actions from the
// TRACEPARENT and TRACESTATE env variables, and attach it to the current // TRACEPARENT and TRACESTATE env variables, and attach it to the current
// tracing context. // tracing context.
@@ -211,7 +147,7 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
if let Ok(val) = std::env::var("TRACESTATE") { if let Ok(val) = std::env::var("TRACESTATE") {
startup_tracing_carrier.insert("tracestate".to_string(), val); startup_tracing_carrier.insert("tracestate".to_string(), val);
} }
if !startup_tracing_carrier.is_empty() { let startup_context_guard = if !startup_tracing_carrier.is_empty() {
use opentelemetry::propagation::TextMapPropagator; use opentelemetry::propagation::TextMapPropagator;
use opentelemetry::sdk::propagation::TraceContextPropagator; use opentelemetry::sdk::propagation::TraceContextPropagator;
let guard = TraceContextPropagator::new() let guard = TraceContextPropagator::new()
@@ -221,17 +157,8 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
Some(guard) Some(guard)
} else { } else {
None None
} };
}
fn try_spec_from_cli(
matches: &clap::ArgMatches,
ProcessCliResult {
spec_json,
spec_path,
..
}: &ProcessCliResult,
) -> Result<CliSpecParams> {
let compute_id = matches.get_one::<String>("compute-id"); let compute_id = matches.get_one::<String>("compute-id");
let control_plane_uri = matches.get_one::<String>("control-plane-uri"); let control_plane_uri = matches.get_one::<String>("control-plane-uri");
@@ -272,34 +199,6 @@ fn try_spec_from_cli(
} }
}; };
Ok(CliSpecParams {
spec,
live_config_allowed,
})
}
struct CliSpecParams {
/// If a spec was provided via CLI or file, the [`ComputeSpec`]
spec: Option<ComputeSpec>,
live_config_allowed: bool,
}
fn wait_spec(
build_tag: String,
ProcessCliResult {
connstr,
pgdata,
pgbin,
ext_remote_storage,
resize_swap_on_bind,
http_port,
..
}: ProcessCliResult,
CliSpecParams {
spec,
live_config_allowed,
}: CliSpecParams,
) -> Result<WaitSpecResult> {
let mut new_state = ComputeState::new(); let mut new_state = ComputeState::new();
let spec_set; let spec_set;
@@ -327,17 +226,19 @@ fn wait_spec(
// If this is a pooled VM, prewarm before starting HTTP server and becoming // If this is a pooled VM, prewarm before starting HTTP server and becoming
// available for binding. Prewarming helps Postgres start quicker later, // available for binding. Prewarming helps Postgres start quicker later,
// because QEMU will already have its memory allocated from the host, and // because QEMU will already have it's memory allocated from the host, and
// the necessary binaries will already be cached. // the necessary binaries will already be cached.
if !spec_set { if !spec_set {
compute.prewarm_postgres()?; compute.prewarm_postgres()?;
} }
// Launch http service first, so that we can serve control-plane requests // Launch http service first, so we were able to serve control-plane
// while configuration is still in progress. // requests, while configuration is still in progress.
let _http_handle = let _http_handle =
launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread"); launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
let extension_server_port: u16 = http_port;
if !spec_set { if !spec_set {
// No spec provided, hang waiting for it. // No spec provided, hang waiting for it.
info!("no compute spec provided, waiting"); info!("no compute spec provided, waiting");
@@ -352,45 +253,21 @@ fn wait_spec(
break; break;
} }
} }
// Record for how long we slept waiting for the spec.
let now = Utc::now();
state.metrics.wait_for_spec_ms = now
.signed_duration_since(state.start_time)
.to_std()
.unwrap()
.as_millis() as u64;
// Reset start time, so that the total startup time that is calculated later will
// not include the time that we waited for the spec.
state.start_time = now;
} }
Ok(WaitSpecResult {
compute,
http_port,
resize_swap_on_bind,
})
}
struct WaitSpecResult {
compute: Arc<ComputeNode>,
// passed through from ProcessCliResult
http_port: u16,
resize_swap_on_bind: bool,
}
fn start_postgres(
// need to allow unused because `matches` is only used if target_os = "linux"
#[allow(unused_variables)] matches: &clap::ArgMatches,
WaitSpecResult {
compute,
http_port,
resize_swap_on_bind,
}: WaitSpecResult,
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
// We got all we need, update the state. // We got all we need, update the state.
let mut state = compute.state.lock().unwrap(); let mut state = compute.state.lock().unwrap();
// Record for how long we slept waiting for the spec.
state.metrics.wait_for_spec_ms = Utc::now()
.signed_duration_since(state.start_time)
.to_std()
.unwrap()
.as_millis() as u64;
// Reset start time to the actual start of the configuration, so that
// total startup time was properly measured at the end.
state.start_time = Utc::now();
state.status = ComputeStatus::Init; state.status = ComputeStatus::Init;
compute.state_changed.notify_all(); compute.state_changed.notify_all();
@@ -398,72 +275,33 @@ fn start_postgres(
"running compute with features: {:?}", "running compute with features: {:?}",
state.pspec.as_ref().unwrap().spec.features state.pspec.as_ref().unwrap().spec.features
); );
// before we release the mutex, fetch the swap size (if any) for later.
let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
drop(state); drop(state);
// Launch remaining service threads // Launch remaining service threads
let _monitor_handle = launch_monitor(&compute); let _monitor_handle = launch_monitor(&compute);
let _configurator_handle = launch_configurator(&compute); let _configurator_handle = launch_configurator(&compute);
let mut prestartup_failed = false;
let mut delay_exit = false;
// Resize swap to the desired size if the compute spec says so
if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
// To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
// *before* starting postgres.
//
// In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
// carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
// OOM-killed during startup because swap wasn't available yet.
match resize_swap(size_bytes) {
Ok(()) => {
let size_gib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
info!(%size_bytes, %size_gib, "resized swap");
}
Err(err) => {
let err = err.context("failed to resize swap");
error!("{err:#}");
// Mark compute startup as failed; don't try to start postgres, and report this
// error to the control plane when it next asks.
prestartup_failed = true;
let mut state = compute.state.lock().unwrap();
state.error = Some(format!("{err:?}"));
state.status = ComputeStatus::Failed;
compute.state_changed.notify_all();
delay_exit = true;
}
}
}
let extension_server_port: u16 = http_port;
// Start Postgres // Start Postgres
let mut pg = None; let mut delay_exit = false;
if !prestartup_failed { let mut exit_code = None;
pg = match compute.start_compute(extension_server_port) { let pg = match compute.start_compute(extension_server_port) {
Ok(pg) => Some(pg), Ok(pg) => Some(pg),
Err(err) => { Err(err) => {
error!("could not start the compute node: {:#}", err); error!("could not start the compute node: {:#}", err);
let mut state = compute.state.lock().unwrap(); let mut state = compute.state.lock().unwrap();
state.error = Some(format!("{:?}", err)); state.error = Some(format!("{:?}", err));
state.status = ComputeStatus::Failed; state.status = ComputeStatus::Failed;
// Notify others that Postgres failed to start. In case of configuring the // Notify others that Postgres failed to start. In case of configuring the
// empty compute, it's likely that API handler is still waiting for compute // empty compute, it's likely that API handler is still waiting for compute
// state change. With this we will notify it that compute is in Failed state, // state change. With this we will notify it that compute is in Failed state,
// so control plane will know about it earlier and record proper error instead // so control plane will know about it earlier and record proper error instead
// of timeout. // of timeout.
compute.state_changed.notify_all(); compute.state_changed.notify_all();
drop(state); // unlock drop(state); // unlock
delay_exit = true; delay_exit = true;
None None
} }
}; };
} else {
warn!("skipping postgres startup because pre-startup step failed");
}
// Start the vm-monitor if directed to. The vm-monitor only runs on linux // Start the vm-monitor if directed to. The vm-monitor only runs on linux
// because it requires cgroups. // because it requires cgroups.
@@ -496,7 +334,7 @@ fn start_postgres(
// This token is used internally by the monitor to clean up all threads // This token is used internally by the monitor to clean up all threads
let token = CancellationToken::new(); let token = CancellationToken::new();
let vm_monitor = rt.as_ref().map(|rt| { let vm_monitor = &rt.as_ref().map(|rt| {
rt.spawn(vm_monitor::start( rt.spawn(vm_monitor::start(
Box::leak(Box::new(vm_monitor::Args { Box::leak(Box::new(vm_monitor::Args {
cgroup: cgroup.cloned(), cgroup: cgroup.cloned(),
@@ -509,41 +347,12 @@ fn start_postgres(
} }
} }
Ok((
pg,
StartPostgresResult {
delay_exit,
compute,
#[cfg(target_os = "linux")]
rt,
#[cfg(target_os = "linux")]
token,
#[cfg(target_os = "linux")]
vm_monitor,
},
))
}
type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
struct StartPostgresResult {
delay_exit: bool,
// passed through from WaitSpecResult
compute: Arc<ComputeNode>,
#[cfg(target_os = "linux")]
rt: Option<tokio::runtime::Runtime>,
#[cfg(target_os = "linux")]
token: tokio_util::sync::CancellationToken,
#[cfg(target_os = "linux")]
vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
}
fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
// Wait for the child Postgres process forever. In this state Ctrl+C will // Wait for the child Postgres process forever. In this state Ctrl+C will
// propagate to Postgres and it will be shut down as well. // propagate to Postgres and it will be shut down as well.
let mut exit_code = None;
if let Some((mut pg, logs_handle)) = pg { if let Some((mut pg, logs_handle)) = pg {
// Startup is finished, exit the startup tracing span
drop(startup_context_guard);
let ecode = pg let ecode = pg
.wait() .wait()
.expect("failed to start waiting on Postgres process"); .expect("failed to start waiting on Postgres process");
@@ -558,25 +367,6 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
exit_code = ecode.code() exit_code = ecode.code()
} }
Ok(WaitPostgresResult { exit_code })
}
struct WaitPostgresResult {
exit_code: Option<i32>,
}
fn cleanup_after_postgres_exit(
StartPostgresResult {
mut delay_exit,
compute,
#[cfg(target_os = "linux")]
vm_monitor,
#[cfg(target_os = "linux")]
token,
#[cfg(target_os = "linux")]
rt,
}: StartPostgresResult,
) -> Result<bool> {
// Terminate the vm_monitor so it releases the file watcher on // Terminate the vm_monitor so it releases the file watcher on
// /sys/fs/cgroup/neon-postgres. // /sys/fs/cgroup/neon-postgres.
// Note: the vm-monitor only runs on linux because it requires cgroups. // Note: the vm-monitor only runs on linux because it requires cgroups.
@@ -618,19 +408,13 @@ fn cleanup_after_postgres_exit(
error!("error while checking for core dumps: {err:?}"); error!("error while checking for core dumps: {err:?}");
} }
Ok(delay_exit)
}
fn maybe_delay_exit(delay_exit: bool) {
// If launch failed, keep serving HTTP requests for a while, so the cloud // If launch failed, keep serving HTTP requests for a while, so the cloud
// control plane can get the actual error. // control plane can get the actual error.
if delay_exit { if delay_exit {
info!("giving control plane 30s to collect the error before shutdown"); info!("giving control plane 30s to collect the error before shutdown");
thread::sleep(Duration::from_secs(30)); thread::sleep(Duration::from_secs(30));
} }
}
fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
// Shutdown trace pipeline gracefully, so that it has a chance to send any // Shutdown trace pipeline gracefully, so that it has a chance to send any
// pending traces before we exit. Shutting down OTEL tracing provider may // pending traces before we exit. Shutting down OTEL tracing provider may
// hang for quite some time, see, for example: // hang for quite some time, see, for example:
@@ -738,15 +522,10 @@ fn cli() -> clap::Command {
Arg::new("filecache-connstr") Arg::new("filecache-connstr")
.long("filecache-connstr") .long("filecache-connstr")
.default_value( .default_value(
"host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor", "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable",
) )
.value_name("FILECACHE_CONNSTR"), .value_name("FILECACHE_CONNSTR"),
) )
.arg(
Arg::new("resize-swap-on-bind")
.long("resize-swap-on-bind")
.action(clap::ArgAction::SetTrue),
)
} }
/// When compute_ctl is killed, send also termination signal to sync-safekeepers /// When compute_ctl is killed, send also termination signal to sync-safekeepers

View File

@@ -1,116 +0,0 @@
use compute_api::{
responses::CatalogObjects,
spec::{Database, Role},
};
use futures::Stream;
use postgres::{Client, NoTls};
use std::{path::Path, process::Stdio, result::Result, sync::Arc};
use tokio::{
io::{AsyncBufReadExt, BufReader},
process::Command,
task,
};
use tokio_stream::{self as stream, StreamExt};
use tokio_util::codec::{BytesCodec, FramedRead};
use tracing::warn;
use crate::{
compute::ComputeNode,
pg_helpers::{get_existing_dbs, get_existing_roles},
};
pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
let connstr = compute.connstr.clone();
task::spawn_blocking(move || {
let mut client = Client::connect(connstr.as_str(), NoTls)?;
let roles: Vec<Role>;
{
let mut xact = client.transaction()?;
roles = get_existing_roles(&mut xact)?;
}
let databases: Vec<Database> = get_existing_dbs(&mut client)?.values().cloned().collect();
Ok(CatalogObjects { roles, databases })
})
.await?
}
#[derive(Debug, thiserror::Error)]
pub enum SchemaDumpError {
#[error("Database does not exist.")]
DatabaseDoesNotExist,
#[error("Failed to execute pg_dump.")]
IO(#[from] std::io::Error),
}
// It uses the pg_dump utility to dump the schema of the specified database.
// The output is streamed back to the caller and supposed to be streamed via HTTP.
//
// Before return the result with the output, it checks that pg_dump produced any output.
// If not, it tries to parse the stderr output to determine if the database does not exist
// and special error is returned.
//
// To make sure that the process is killed when the caller drops the stream, we use tokio kill_on_drop feature.
pub async fn get_database_schema(
compute: &Arc<ComputeNode>,
dbname: &str,
) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>>, SchemaDumpError> {
let pgbin = &compute.pgbin;
let basepath = Path::new(pgbin).parent().unwrap();
let pgdump = basepath.join("pg_dump");
let mut connstr = compute.connstr.clone();
connstr.set_path(dbname);
let mut cmd = Command::new(pgdump)
.arg("--schema-only")
.arg(connstr.as_str())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.kill_on_drop(true)
.spawn()?;
let stdout = cmd.stdout.take().ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stdout.")
})?;
let stderr = cmd.stderr.take().ok_or_else(|| {
std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stderr.")
})?;
let mut stdout_reader = FramedRead::new(stdout, BytesCodec::new());
let stderr_reader = BufReader::new(stderr);
let first_chunk = match stdout_reader.next().await {
Some(Ok(bytes)) if !bytes.is_empty() => bytes,
Some(Err(e)) => {
return Err(SchemaDumpError::IO(e));
}
_ => {
let mut lines = stderr_reader.lines();
if let Some(line) = lines.next_line().await? {
if line.contains(&format!("FATAL: database \"{}\" does not exist", dbname)) {
return Err(SchemaDumpError::DatabaseDoesNotExist);
}
warn!("pg_dump stderr: {}", line)
}
tokio::spawn(async move {
while let Ok(Some(line)) = lines.next_line().await {
warn!("pg_dump stderr: {}", line)
}
});
return Err(SchemaDumpError::IO(std::io::Error::new(
std::io::ErrorKind::Other,
"failed to start pg_dump",
)));
}
};
let initial_stream = stream::once(Ok(first_chunk.freeze()));
// Consume stderr and log warnings
tokio::spawn(async move {
let mut lines = stderr_reader.lines();
while let Ok(Some(line)) = lines.next_line().await {
warn!("pg_dump stderr: {}", line)
}
});
Ok(initial_stream.chain(stdout_reader.map(|res| res.map(|b| b.freeze()))))
}

View File

@@ -56,7 +56,6 @@ pub struct ComputeNode {
/// - we push new spec and it does reconfiguration /// - we push new spec and it does reconfiguration
/// - but then something happens and compute pod / VM is destroyed, /// - but then something happens and compute pod / VM is destroyed,
/// so k8s controller starts it again with the **old** spec /// so k8s controller starts it again with the **old** spec
///
/// and the same for empty computes: /// and the same for empty computes:
/// - we started compute without any spec /// - we started compute without any spec
/// - we push spec and it does configuration /// - we push spec and it does configuration
@@ -400,15 +399,7 @@ impl ComputeNode {
pub fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> { pub fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
let mut retry_period_ms = 500.0; let mut retry_period_ms = 500.0;
let mut attempts = 0; let mut attempts = 0;
const DEFAULT_ATTEMPTS: u16 = 10; let max_attempts = 10;
#[cfg(feature = "testing")]
let max_attempts = if let Ok(v) = env::var("NEON_COMPUTE_TESTING_BASEBACKUP_RETRIES") {
u16::from_str(&v).unwrap()
} else {
DEFAULT_ATTEMPTS
};
#[cfg(not(feature = "testing"))]
let max_attempts = DEFAULT_ATTEMPTS;
loop { loop {
let result = self.try_get_basebackup(compute_state, lsn); let result = self.try_get_basebackup(compute_state, lsn);
match result { match result {
@@ -807,11 +798,7 @@ impl ComputeNode {
// In this case we need to connect with old `zenith_admin` name // In this case we need to connect with old `zenith_admin` name
// and create new user. We cannot simply rename connected user, // and create new user. We cannot simply rename connected user,
// but we can create a new one and grant it all privileges. // but we can create a new one and grant it all privileges.
let mut connstr = self.connstr.clone(); let connstr = self.connstr.clone();
connstr
.query_pairs_mut()
.append_pair("application_name", "apply_config");
let mut client = match Client::connect(connstr.as_str(), NoTls) { let mut client = match Client::connect(connstr.as_str(), NoTls) {
Err(e) => match e.code() { Err(e) => match e.code() {
Some(&SqlState::INVALID_PASSWORD) Some(&SqlState::INVALID_PASSWORD)
@@ -831,15 +818,9 @@ impl ComputeNode {
Client::connect(zenith_admin_connstr.as_str(), NoTls) Client::connect(zenith_admin_connstr.as_str(), NoTls)
.context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?; .context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
// Disable forwarding so that users don't get a cloud_admin role // Disable forwarding so that users don't get a cloud_admin role
client.simple_query("SET neon.forward_ddl = false")?;
let mut func = || { client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
client.simple_query("SET neon.forward_ddl = false")?; client.simple_query("GRANT zenith_admin TO cloud_admin")?;
client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
client.simple_query("GRANT zenith_admin TO cloud_admin")?;
Ok::<_, anyhow::Error>(())
};
func().context("apply_config setup cloud_admin")?;
drop(client); drop(client);
// reconnect with connstring with expected name // reconnect with connstring with expected name
@@ -851,48 +832,39 @@ impl ComputeNode {
}; };
// Disable DDL forwarding because control plane already knows about these roles/databases. // Disable DDL forwarding because control plane already knows about these roles/databases.
client client.simple_query("SET neon.forward_ddl = false")?;
.simple_query("SET neon.forward_ddl = false")
.context("apply_config SET neon.forward_ddl = false")?;
// Proceed with post-startup configuration. Note, that order of operations is important. // Proceed with post-startup configuration. Note, that order of operations is important.
let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec; let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?; create_neon_superuser(spec, &mut client)?;
cleanup_instance(&mut client).context("apply_config cleanup_instance")?; cleanup_instance(&mut client)?;
handle_roles(spec, &mut client).context("apply_config handle_roles")?; handle_roles(spec, &mut client)?;
handle_databases(spec, &mut client).context("apply_config handle_databases")?; handle_databases(spec, &mut client)?;
handle_role_deletions(spec, connstr.as_str(), &mut client) handle_role_deletions(spec, connstr.as_str(), &mut client)?;
.context("apply_config handle_role_deletions")?;
handle_grants( handle_grants(
spec, spec,
&mut client, &mut client,
connstr.as_str(), connstr.as_str(),
self.has_feature(ComputeFeature::AnonExtension), self.has_feature(ComputeFeature::AnonExtension),
) )?;
.context("apply_config handle_grants")?; handle_extensions(spec, &mut client)?;
handle_extensions(spec, &mut client).context("apply_config handle_extensions")?; handle_extension_neon(&mut client)?;
handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?; create_availability_check_data(&mut client)?;
create_availability_check_data(&mut client)
.context("apply_config create_availability_check_data")?;
// 'Close' connection // 'Close' connection
drop(client); drop(client);
// Run migrations separately to not hold up cold starts // Run migrations separately to not hold up cold starts
thread::spawn(move || { thread::spawn(move || {
let mut connstr = connstr.clone();
connstr
.query_pairs_mut()
.append_pair("application_name", "migrations");
let mut client = Client::connect(connstr.as_str(), NoTls)?; let mut client = Client::connect(connstr.as_str(), NoTls)?;
handle_migrations(&mut client).context("apply_config handle_migrations") handle_migrations(&mut client)
}); });
Ok(()) Ok(())
} }
// Wrapped this around `pg_ctl reload`, but right now we don't use // We could've wrapped this around `pg_ctl reload`, but right now we don't use
// `pg_ctl` for start / stop. // `pg_ctl` for start / stop, so this just seems much easier to do as we already
// have opened connection to Postgres and superuser access.
#[instrument(skip_all)] #[instrument(skip_all)]
fn pg_reload_conf(&self) -> Result<()> { fn pg_reload_conf(&self) -> Result<()> {
let pgctl_bin = Path::new(&self.pgbin).parent().unwrap().join("pg_ctl"); let pgctl_bin = Path::new(&self.pgbin).parent().unwrap().join("pg_ctl");
@@ -935,39 +907,38 @@ impl ComputeNode {
// temporarily reset max_cluster_size in config // temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are reconfiguring: // to avoid the possibility of hitting the limit, while we are reconfiguring:
// creating new extensions, roles, etc... // creating new extensions, roles, etc...
config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || { config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
self.pg_reload_conf()?; self.pg_reload_conf()?;
let mut client = Client::connect(self.connstr.as_str(), NoTls)?; let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
// Proceed with post-startup configuration. Note, that order of operations is important. // Proceed with post-startup configuration. Note, that order of operations is important.
// Disable DDL forwarding because control plane already knows about these roles/databases. // Disable DDL forwarding because control plane already knows about these roles/databases.
if spec.mode == ComputeMode::Primary { if spec.mode == ComputeMode::Primary {
client.simple_query("SET neon.forward_ddl = false")?; client.simple_query("SET neon.forward_ddl = false")?;
cleanup_instance(&mut client)?; cleanup_instance(&mut client)?;
handle_roles(&spec, &mut client)?; handle_roles(&spec, &mut client)?;
handle_databases(&spec, &mut client)?; handle_databases(&spec, &mut client)?;
handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?; handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
handle_grants( handle_grants(
&spec, &spec,
&mut client, &mut client,
self.connstr.as_str(), self.connstr.as_str(),
self.has_feature(ComputeFeature::AnonExtension), self.has_feature(ComputeFeature::AnonExtension),
)?; )?;
handle_extensions(&spec, &mut client)?; handle_extensions(&spec, &mut client)?;
handle_extension_neon(&mut client)?; handle_extension_neon(&mut client)?;
// We can skip handle_migrations here because a new migration can only appear // We can skip handle_migrations here because a new migration can only appear
// if we have a new version of the compute_ctl binary, which can only happen // if we have a new version of the compute_ctl binary, which can only happen
// if compute got restarted, in which case we'll end up inside of apply_config // if compute got restarted, in which case we'll end up inside of apply_config
// instead of reconfigure. // instead of reconfigure.
} }
// 'Close' connection // 'Close' connection
drop(client); drop(client);
Ok(())
})?;
// reset max_cluster_size in config back to original value and reload config
config::compute_ctl_temp_override_remove(pgdata_path)?;
self.pg_reload_conf()?; self.pg_reload_conf()?;
let unknown_op = "unknown".to_string(); let unknown_op = "unknown".to_string();
@@ -1058,17 +1029,12 @@ impl ComputeNode {
// temporarily reset max_cluster_size in config // temporarily reset max_cluster_size in config
// to avoid the possibility of hitting the limit, while we are applying config: // to avoid the possibility of hitting the limit, while we are applying config:
// creating new extensions, roles, etc... // creating new extensions, roles, etc...
config::with_compute_ctl_tmp_override( config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
pgdata_path, self.pg_reload_conf()?;
"neon.max_cluster_size=-1",
|| {
self.pg_reload_conf()?;
self.apply_config(&compute_state)?; self.apply_config(&compute_state)?;
Ok(()) config::compute_ctl_temp_override_remove(pgdata_path)?;
},
)?;
self.pg_reload_conf()?; self.pg_reload_conf()?;
} }
self.post_apply_config()?; self.post_apply_config()?;
@@ -1125,7 +1091,7 @@ impl ComputeNode {
// EKS worker nodes have following core dump settings: // EKS worker nodes have following core dump settings:
// /proc/sys/kernel/core_pattern -> core // /proc/sys/kernel/core_pattern -> core
// /proc/sys/kernel/core_uses_pid -> 1 // /proc/sys/kernel/core_uses_pid -> 1
// ulimit -c -> unlimited // ulimint -c -> unlimited
// which results in core dumps being written to postgres data directory as core.<pid>. // which results in core dumps being written to postgres data directory as core.<pid>.
// //
// Use that as a default location and pattern, except macos where core dumps are written // Use that as a default location and pattern, except macos where core dumps are written
@@ -1296,12 +1262,10 @@ LIMIT 100",
.await .await
.map_err(DownloadError::Other); .map_err(DownloadError::Other);
if download_size.is_ok() { self.ext_download_progress
self.ext_download_progress .write()
.write() .expect("bad lock")
.expect("bad lock") .insert(ext_archive_name.to_string(), (download_start, true));
.insert(ext_archive_name.to_string(), (download_start, true));
}
download_size download_size
} }
@@ -1404,9 +1368,7 @@ pub fn forward_termination_signal() {
let pg_pid = PG_PID.load(Ordering::SeqCst); let pg_pid = PG_PID.load(Ordering::SeqCst);
if pg_pid != 0 { if pg_pid != 0 {
let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32); let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
// Use 'fast' shutdown (SIGINT) because it also creates a shutdown checkpoint, which is important for // use 'immediate' shutdown (SIGQUIT): https://www.postgresql.org/docs/current/server-shutdown.html
// ROs to get a list of running xacts faster instead of going through the CLOG. kill(pg_pid, Signal::SIGQUIT).ok();
// See https://www.postgresql.org/docs/current/server-shutdown.html for the list of modes and signals.
kill(pg_pid, Signal::SIGINT).ok();
} }
} }

View File

@@ -6,8 +6,8 @@ use std::path::Path;
use anyhow::Result; use anyhow::Result;
use crate::pg_helpers::escape_conf_value; use crate::pg_helpers::escape_conf_value;
use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize}; use crate::pg_helpers::PgOptionsSerialize;
use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption}; use compute_api::spec::{ComputeMode, ComputeSpec};
/// Check that `line` is inside a text file and put it there if it is not. /// Check that `line` is inside a text file and put it there if it is not.
/// Create file if it doesn't exist. /// Create file if it doesn't exist.
@@ -83,27 +83,12 @@ pub fn write_postgres_conf(
ComputeMode::Replica => { ComputeMode::Replica => {
// hot_standby is 'on' by default, but let's be explicit // hot_standby is 'on' by default, but let's be explicit
writeln!(file, "hot_standby=on")?; writeln!(file, "hot_standby=on")?;
}
}
if cfg!(target_os = "linux") { // Inform the replica about the primary state
// Check /proc/sys/vm/overcommit_memory -- if it equals 2 (i.e. linux memory overcommit is // Default is 'false'
// disabled), then the control plane has enabled swap and we should set if let Some(primary_is_running) = spec.primary_is_running {
// dynamic_shared_memory_type = 'mmap'. writeln!(file, "neon.primary_is_running={}", primary_is_running)?;
// }
// This is (maybe?) temporary - for more, see https://github.com/neondatabase/cloud/issues/12047.
let overcommit_memory_contents = std::fs::read_to_string("/proc/sys/vm/overcommit_memory")
// ignore any errors - they may be expected to occur under certain situations (e.g. when
// not running in Linux).
.unwrap_or_else(|_| String::new());
if overcommit_memory_contents.trim() == "2" {
let opt = GenericOption {
name: "dynamic_shared_memory_type".to_owned(),
value: Some("mmap".to_owned()),
vartype: "enum".to_owned(),
};
write!(file, "{}", opt.to_pg_setting())?;
} }
} }
@@ -125,17 +110,18 @@ pub fn write_postgres_conf(
Ok(()) Ok(())
} }
pub fn with_compute_ctl_tmp_override<F>(pgdata_path: &Path, options: &str, exec: F) -> Result<()> /// create file compute_ctl_temp_override.conf in pgdata_dir
where /// add provided options to this file
F: FnOnce() -> Result<()>, pub fn compute_ctl_temp_override_create(pgdata_path: &Path, options: &str) -> Result<()> {
{
let path = pgdata_path.join("compute_ctl_temp_override.conf"); let path = pgdata_path.join("compute_ctl_temp_override.conf");
let mut file = File::create(path)?; let mut file = File::create(path)?;
write!(file, "{}", options)?; write!(file, "{}", options)?;
Ok(())
let res = exec(); }
file.set_len(0)?; /// remove file compute_ctl_temp_override.conf in pgdata_dir
pub fn compute_ctl_temp_override_remove(pgdata_path: &Path) -> Result<()> {
res let path = pgdata_path.join("compute_ctl_temp_override.conf");
std::fs::remove_file(path)?;
Ok(())
} }

View File

@@ -5,21 +5,17 @@ use std::net::SocketAddr;
use std::sync::Arc; use std::sync::Arc;
use std::thread; use std::thread;
use crate::catalog::SchemaDumpError;
use crate::catalog::{get_database_schema, get_dbs_and_roles};
use crate::compute::forward_termination_signal; use crate::compute::forward_termination_signal;
use crate::compute::{ComputeNode, ComputeState, ParsedSpec}; use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
use compute_api::requests::ConfigurationRequest; use compute_api::requests::ConfigurationRequest;
use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError}; use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
use anyhow::Result; use anyhow::Result;
use hyper::header::CONTENT_TYPE;
use hyper::service::{make_service_fn, service_fn}; use hyper::service::{make_service_fn, service_fn};
use hyper::{Body, Method, Request, Response, Server, StatusCode}; use hyper::{Body, Method, Request, Response, Server, StatusCode};
use tokio::task; use tokio::task;
use tracing::{debug, error, info, warn}; use tracing::{error, info, warn};
use tracing_utils::http::OtelName; use tracing_utils::http::OtelName;
use utils::http::request::must_get_query_param;
fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse { fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
ComputeStatusResponse { ComputeStatusResponse {
@@ -48,7 +44,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
match (req.method(), req.uri().path()) { match (req.method(), req.uri().path()) {
// Serialized compute state. // Serialized compute state.
(&Method::GET, "/status") => { (&Method::GET, "/status") => {
debug!("serving /status GET request"); info!("serving /status GET request");
let state = compute.state.lock().unwrap(); let state = compute.state.lock().unwrap();
let status_response = status_response_from_state(&state); let status_response = status_response_from_state(&state);
Response::new(Body::from(serde_json::to_string(&status_response).unwrap())) Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
@@ -137,34 +133,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
} }
} }
(&Method::GET, "/dbs_and_roles") => {
info!("serving /dbs_and_roles GET request",);
match get_dbs_and_roles(compute).await {
Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
Err(_) => {
render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
}
}
}
(&Method::GET, "/database_schema") => {
let database = match must_get_query_param(&req, "database") {
Err(e) => return e.into_response(),
Ok(database) => database,
};
info!("serving /database_schema GET request with database: {database}",);
match get_database_schema(compute, &database).await {
Ok(res) => render_plain(Body::wrap_stream(res)),
Err(SchemaDumpError::DatabaseDoesNotExist) => {
render_json_error("database does not exist", StatusCode::NOT_FOUND)
}
Err(e) => {
error!("can't get schema dump: {}", e);
render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
}
}
}
// download extension files from remote extension storage on demand // download extension files from remote extension storage on demand
(&Method::POST, route) if route.starts_with("/extension_server/") => { (&Method::POST, route) if route.starts_with("/extension_server/") => {
info!("serving {:?} POST request", route); info!("serving {:?} POST request", route);
@@ -335,25 +303,10 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
}; };
Response::builder() Response::builder()
.status(status) .status(status)
.header(CONTENT_TYPE, "application/json")
.body(Body::from(serde_json::to_string(&error).unwrap())) .body(Body::from(serde_json::to_string(&error).unwrap()))
.unwrap() .unwrap()
} }
fn render_json(body: Body) -> Response<Body> {
Response::builder()
.header(CONTENT_TYPE, "application/json")
.body(body)
.unwrap()
}
fn render_plain(body: Body) -> Response<Body> {
Response::builder()
.header(CONTENT_TYPE, "text/plain")
.body(body)
.unwrap()
}
async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> { async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
{ {
let mut state = compute.state.lock().unwrap(); let mut state = compute.state.lock().unwrap();

View File

@@ -68,51 +68,6 @@ paths:
schema: schema:
$ref: "#/components/schemas/Info" $ref: "#/components/schemas/Info"
/dbs_and_roles:
get:
tags:
- Info
summary: Get databases and roles in the catalog.
description: ""
operationId: getDbsAndRoles
responses:
200:
description: Compute schema objects
content:
application/json:
schema:
$ref: "#/components/schemas/DbsAndRoles"
/database_schema:
get:
tags:
- Info
summary: Get schema dump
parameters:
- name: database
in: query
description: Database name to dump.
required: true
schema:
type: string
example: "postgres"
description: Get schema dump in SQL format.
operationId: getDatabaseSchema
responses:
200:
description: Schema dump
content:
text/plain:
schema:
type: string
description: Schema dump in SQL format.
404:
description: Non existing database.
content:
application/json:
schema:
$ref: "#/components/schemas/GenericError"
/check_writability: /check_writability:
post: post:
tags: tags:
@@ -274,73 +229,6 @@ components:
num_cpus: num_cpus:
type: integer type: integer
DbsAndRoles:
type: object
description: Databases and Roles
required:
- roles
- databases
properties:
roles:
type: array
items:
$ref: "#/components/schemas/Role"
databases:
type: array
items:
$ref: "#/components/schemas/Database"
Database:
type: object
description: Database
required:
- name
- owner
- restrict_conn
- invalid
properties:
name:
type: string
owner:
type: string
options:
type: array
items:
$ref: "#/components/schemas/GenericOption"
restrict_conn:
type: boolean
invalid:
type: boolean
Role:
type: object
description: Role
required:
- name
properties:
name:
type: string
encrypted_password:
type: string
options:
type: array
items:
$ref: "#/components/schemas/GenericOption"
GenericOption:
type: object
description: Schema Generic option
required:
- name
- vartype
properties:
name:
type: string
value:
type: string
vartype:
type: string
ComputeState: ComputeState:
type: object type: object
required: required:

View File

@@ -8,13 +8,10 @@ pub mod configurator;
pub mod http; pub mod http;
#[macro_use] #[macro_use]
pub mod logger; pub mod logger;
pub mod catalog;
pub mod compute; pub mod compute;
pub mod extension_server; pub mod extension_server;
mod migration;
pub mod monitor; pub mod monitor;
pub mod params; pub mod params;
pub mod pg_helpers; pub mod pg_helpers;
pub mod spec; pub mod spec;
pub mod swap;
pub mod sync_sk; pub mod sync_sk;

View File

@@ -1,105 +0,0 @@
use anyhow::{Context, Result};
use postgres::Client;
use tracing::info;
pub(crate) struct MigrationRunner<'m> {
client: &'m mut Client,
migrations: &'m [&'m str],
}
impl<'m> MigrationRunner<'m> {
pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
// The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64
assert!(migrations.len() + 1 < i64::MAX as usize);
Self { client, migrations }
}
fn get_migration_id(&mut self) -> Result<i64> {
let query = "SELECT id FROM neon_migration.migration_id";
let row = self
.client
.query_one(query, &[])
.context("run_migrations get migration_id")?;
Ok(row.get::<&str, i64>("id"))
}
fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);
self.client
.simple_query(&setval)
.context("run_migrations update id")?;
Ok(())
}
fn prepare_migrations(&mut self) -> Result<()> {
let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
self.client.simple_query(query)?;
let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
self.client.simple_query(query)?;
let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
self.client.simple_query(query)?;
let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
self.client.simple_query(query)?;
let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
self.client.simple_query(query)?;
Ok(())
}
pub fn run_migrations(mut self) -> Result<()> {
self.prepare_migrations()?;
let mut current_migration = self.get_migration_id()? as usize;
while current_migration < self.migrations.len() {
macro_rules! migration_id {
($cm:expr) => {
($cm + 1) as i64
};
}
let migration = self.migrations[current_migration];
if migration.starts_with("-- SKIP") {
info!("Skipping migration id={}", migration_id!(current_migration));
} else {
info!(
"Running migration id={}:\n{}\n",
migration_id!(current_migration),
migration
);
self.client
.simple_query("BEGIN")
.context("begin migration")?;
self.client.simple_query(migration).with_context(|| {
format!(
"run_migrations migration id={}",
migration_id!(current_migration)
)
})?;
// Migration IDs start at 1
self.update_migration_id(migration_id!(current_migration))?;
self.client
.simple_query("COMMIT")
.context("commit migration")?;
info!("Finished migration id={}", migration_id!(current_migration));
}
current_migration += 1;
}
Ok(())
}
}

View File

@@ -1 +0,0 @@
ALTER ROLE neon_superuser BYPASSRLS;

View File

@@ -1,18 +0,0 @@
DO $$
DECLARE
role_name text;
BEGIN
FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
END LOOP;
FOR role_name IN SELECT rolname FROM pg_roles
WHERE
NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
END LOOP;
END $$;

View File

@@ -1,6 +0,0 @@
DO $$
BEGIN
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
END IF;
END $$;

View File

@@ -1 +0,0 @@
GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;

View File

@@ -1,4 +0,0 @@
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
-- interacted with by neon_superuser without permission issues.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;

View File

@@ -1,4 +0,0 @@
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
-- interacted with by neon_superuser without permission issues.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;

View File

@@ -1,3 +0,0 @@
-- SKIP: Moved inline to the handle_grants() functions.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;

View File

@@ -1,3 +0,0 @@
-- SKIP: Moved inline to the handle_grants() functions.
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;

View File

@@ -1,13 +0,0 @@
-- SKIP: The original goal of this migration was to prevent creating
-- subscriptions, but this migration was insufficient.
DO $$
DECLARE
role_name TEXT;
BEGIN
FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
END LOOP;
END $$;

View File

@@ -1,7 +0,0 @@
DO $$
BEGIN
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
END IF;
END $$;

View File

@@ -17,11 +17,7 @@ const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
// should be handled gracefully. // should be handled gracefully.
fn watch_compute_activity(compute: &ComputeNode) { fn watch_compute_activity(compute: &ComputeNode) {
// Suppose that `connstr` doesn't change // Suppose that `connstr` doesn't change
let mut connstr = compute.connstr.clone(); let connstr = compute.connstr.as_str();
connstr
.query_pairs_mut()
.append_pair("application_name", "compute_activity_monitor");
let connstr = connstr.as_str();
// During startup and configuration we connect to every Postgres database, // During startup and configuration we connect to every Postgres database,
// but we don't want to count this as some user activity. So wait until // but we don't want to count this as some user activity. So wait until

View File

@@ -44,7 +44,7 @@ pub fn escape_conf_value(s: &str) -> String {
format!("'{}'", res) format!("'{}'", res)
} }
pub trait GenericOptionExt { trait GenericOptionExt {
fn to_pg_option(&self) -> String; fn to_pg_option(&self) -> String;
fn to_pg_setting(&self) -> String; fn to_pg_setting(&self) -> String;
} }
@@ -489,7 +489,7 @@ pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<()>
/// Read Postgres logs from `stderr` until EOF. Buffer is flushed on one of the following conditions: /// Read Postgres logs from `stderr` until EOF. Buffer is flushed on one of the following conditions:
/// - next line starts with timestamp /// - next line starts with timestamp
/// - EOF /// - EOF
/// - no new lines were written for the last 100 milliseconds /// - no new lines were written for the last second
async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> { async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> {
let mut lines = tokio::io::BufReader::new(stderr).lines(); let mut lines = tokio::io::BufReader::new(stderr).lines();
let timeout_duration = Duration::from_millis(100); let timeout_duration = Duration::from_millis(100);

View File

@@ -2,7 +2,7 @@ use std::fs::File;
use std::path::Path; use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
use anyhow::{anyhow, bail, Context, Result}; use anyhow::{anyhow, bail, Result};
use postgres::config::Config; use postgres::config::Config;
use postgres::{Client, NoTls}; use postgres::{Client, NoTls};
use reqwest::StatusCode; use reqwest::StatusCode;
@@ -10,7 +10,6 @@ use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};
use crate::config; use crate::config;
use crate::logger::inlinify; use crate::logger::inlinify;
use crate::migration::MigrationRunner;
use crate::params::PG_HBA_ALL_MD5; use crate::params::PG_HBA_ALL_MD5;
use crate::pg_helpers::*; use crate::pg_helpers::*;
@@ -303,9 +302,9 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
RoleAction::Create => { RoleAction::Create => {
// This branch only runs when roles are created through the console, so it is // This branch only runs when roles are created through the console, so it is
// safe to add more permissions here. BYPASSRLS and REPLICATION are inherited // safe to add more permissions here. BYPASSRLS and REPLICATION are inherited
// from neon_superuser. // from neon_superuser. (NOTE: REPLICATION has been removed from here for now).
let mut query: String = format!( let mut query: String = format!(
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser", "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS IN ROLE neon_superuser",
name.pg_quote() name.pg_quote()
); );
info!("running role create query: '{}'", &query); info!("running role create query: '{}'", &query);
@@ -491,7 +490,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
"rename_db" => { "rename_db" => {
let new_name = op.new_name.as_ref().unwrap(); let new_name = op.new_name.as_ref().unwrap();
if existing_dbs.contains_key(&op.name) { if existing_dbs.get(&op.name).is_some() {
let query: String = format!( let query: String = format!(
"ALTER DATABASE {} RENAME TO {}", "ALTER DATABASE {} RENAME TO {}",
op.name.pg_quote(), op.name.pg_quote(),
@@ -699,8 +698,7 @@ pub fn handle_grants(
// it is important to run this after all grants // it is important to run this after all grants
if enable_anon_extension { if enable_anon_extension {
handle_extension_anon(spec, &db.owner, &mut db_client, false) handle_extension_anon(spec, &db.owner, &mut db_client, false)?;
.context("handle_grants handle_extension_anon")?;
} }
} }
@@ -745,24 +743,21 @@ pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
// which may happen in two cases: // which may happen in two cases:
// - extension was just installed // - extension was just installed
// - extension was already installed and is up to date // - extension was already installed and is up to date
let query = "ALTER EXTENSION neon UPDATE"; // DISABLED due to compute node unpinning epic
info!("update neon extension version with query: {}", query); // let query = "ALTER EXTENSION neon UPDATE";
if let Err(e) = client.simple_query(query) { // info!("update neon extension version with query: {}", query);
error!( // client.simple_query(query)?;
"failed to upgrade neon extension during `handle_extension_neon`: {}",
e
);
}
Ok(()) Ok(())
} }
#[instrument(skip_all)] #[instrument(skip_all)]
pub fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> { pub fn handle_neon_extension_upgrade(_client: &mut Client) -> Result<()> {
info!("handle neon extension upgrade"); info!("handle neon extension upgrade (not really)");
let query = "ALTER EXTENSION neon UPDATE"; // DISABLED due to compute node unpinning epic
info!("update neon extension version with query: {}", query); // let query = "ALTER EXTENSION neon UPDATE";
client.simple_query(query)?; // info!("update neon extension version with query: {}", query);
// client.simple_query(query)?;
Ok(()) Ok(())
} }
@@ -775,27 +770,103 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
// !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN! // !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!
// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
// Add new migrations in numerical order.
let migrations = [ let migrations = [
include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"), "ALTER ROLE neon_superuser BYPASSRLS",
include_str!("./migrations/0002-alter_roles.sql"), r#"
include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"), DO $$
include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"), DECLARE
include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"), role_name text;
include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"), BEGIN
include_str!( FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
"./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql" LOOP
), RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
include_str!( EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
"./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql" END LOOP;
),
include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"), FOR role_name IN SELECT rolname FROM pg_roles
include_str!( WHERE
"./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql" NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
), LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
END LOOP;
END $$;
"#,
r#"
DO $$
BEGIN
IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
END IF;
END
$$;"#,
"GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION",
// Don't remove: these are some SQLs that we originally applied in migrations but turned out to execute somewhere else.
"",
"",
"",
"",
// Add new migrations below.
r#"
DO $$
DECLARE
role_name TEXT;
BEGIN
FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
LOOP
RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
END LOOP;
END
$$;"#,
]; ];
MigrationRunner::new(client, &migrations).run_migrations()?; let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
client.simple_query(query)?;
query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
client.simple_query(query)?;
query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
client.simple_query(query)?;
query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
client.simple_query(query)?;
query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
client.simple_query(query)?;
query = "SELECT id FROM neon_migration.migration_id";
let row = client.query_one(query, &[])?;
let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
let starting_migration_id = current_migration;
query = "BEGIN";
client.simple_query(query)?;
while current_migration < migrations.len() {
let migration = &migrations[current_migration];
if migration.is_empty() {
info!("Skip migration id={}", current_migration);
} else {
info!("Running migration:\n{}\n", migration);
client.simple_query(migration)?;
}
current_migration += 1;
}
let setval = format!(
"UPDATE neon_migration.migration_id SET id={}",
migrations.len()
);
client.simple_query(&setval)?;
query = "COMMIT";
client.simple_query(query)?;
info!(
"Ran {} migrations",
(migrations.len() - starting_migration_id)
);
Ok(()) Ok(())
} }

View File

@@ -1,45 +0,0 @@
use std::path::Path;
use anyhow::{anyhow, Context};
use tracing::warn;
pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
// run `/neonvm/bin/resize-swap --once {size_bytes}`
//
// Passing '--once' causes resize-swap to delete itself after successful completion, which
// means that if compute_ctl restarts later, we won't end up calling 'swapoff' while
// postgres is running.
//
// NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.
let child_result = std::process::Command::new("/usr/bin/sudo")
.arg(RESIZE_SWAP_BIN)
.arg("--once")
.arg(size_bytes.to_string())
.spawn();
child_result
.context("spawn() failed")
.and_then(|mut child| child.wait().context("wait() failed"))
.and_then(|status| match status.success() {
true => Ok(()),
false => {
// The command failed. Maybe it was because the resize-swap file doesn't exist?
// The --once flag causes it to delete itself on success so we don't disable swap
// while postgres is running; maybe this is fine.
match Path::new(RESIZE_SWAP_BIN).try_exists() {
Err(_) | Ok(true) => Err(anyhow!("process exited with {status}")),
// The path doesn't exist; we're actually ok
Ok(false) => {
warn!("ignoring \"not found\" error from resize-swap to avoid swapoff while compute is running");
Ok(())
},
}
}
})
// wrap any prior error with the overall context that we couldn't run the command
.with_context(|| {
format!("could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`")
})
}

View File

@@ -17,7 +17,6 @@ nix.workspace = true
once_cell.workspace = true once_cell.workspace = true
postgres.workspace = true postgres.workspace = true
hex.workspace = true hex.workspace = true
humantime-serde.workspace = true
hyper.workspace = true hyper.workspace = true
regex.workspace = true regex.workspace = true
reqwest = { workspace = true, features = ["blocking", "json"] } reqwest = { workspace = true, features = ["blocking", "json"] }
@@ -28,7 +27,6 @@ serde_with.workspace = true
tar.workspace = true tar.workspace = true
thiserror.workspace = true thiserror.workspace = true
toml.workspace = true toml.workspace = true
toml_edit.workspace = true
tokio.workspace = true tokio.workspace = true
tokio-postgres.workspace = true tokio-postgres.workspace = true
tokio-util.workspace = true tokio-util.workspace = true
@@ -40,7 +38,6 @@ safekeeper_api.workspace = true
postgres_connection.workspace = true postgres_connection.workspace = true
storage_broker.workspace = true storage_broker.workspace = true
utils.workspace = true utils.workspace = true
whoami.workspace = true
compute_api.workspace = true compute_api.workspace = true
workspace_hack.workspace = true workspace_hack.workspace = true

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "storage_controller" name = "attachment_service"
version = "0.1.0" version = "0.1.0"
edition.workspace = true edition.workspace = true
license.workspace = true license.workspace = true
@@ -18,7 +18,6 @@ anyhow.workspace = true
aws-config.workspace = true aws-config.workspace = true
bytes.workspace = true bytes.workspace = true
camino.workspace = true camino.workspace = true
chrono.workspace = true
clap.workspace = true clap.workspace = true
fail.workspace = true fail.workspace = true
futures.workspace = true futures.workspace = true
@@ -26,14 +25,12 @@ git-version.workspace = true
hex.workspace = true hex.workspace = true
hyper.workspace = true hyper.workspace = true
humantime.workspace = true humantime.workspace = true
itertools.workspace = true
lasso.workspace = true lasso.workspace = true
once_cell.workspace = true once_cell.workspace = true
pageserver_api.workspace = true pageserver_api.workspace = true
pageserver_client.workspace = true pageserver_client.workspace = true
postgres_connection.workspace = true postgres_connection.workspace = true
rand.workspace = true reqwest.workspace = true
reqwest = { workspace = true, features = ["stream"] }
routerify.workspace = true routerify.workspace = true
serde.workspace = true serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
@@ -42,20 +39,13 @@ tokio.workspace = true
tokio-util.workspace = true tokio-util.workspace = true
tracing.workspace = true tracing.workspace = true
measured.workspace = true measured.workspace = true
scopeguard.workspace = true
strum.workspace = true
strum_macros.workspace = true
diesel = { version = "2.1.4", features = [ diesel = { version = "2.1.4", features = ["serde_json", "postgres", "r2d2"] }
"serde_json",
"postgres",
"r2d2",
"chrono",
] }
diesel_migrations = { version = "2.1.0" } diesel_migrations = { version = "2.1.0" }
r2d2 = { version = "0.8.10" } r2d2 = { version = "0.8.10" }
utils = { path = "../libs/utils/" } utils = { path = "../../libs/utils/" }
metrics = { path = "../libs/metrics/" } metrics = { path = "../../libs/metrics/" }
control_plane = { path = "../control_plane" } control_plane = { path = ".." }
workspace_hack = { version = "0.1", path = "../workspace_hack" } workspace_hack = { version = "0.1", path = "../../workspace_hack" }

View File

@@ -0,0 +1,462 @@
use std::{collections::HashMap, time::Duration};
use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
use control_plane::local_env::LocalEnv;
use hyper::{Method, StatusCode};
use pageserver_api::shard::{ShardCount, ShardNumber, ShardStripeSize, TenantShardId};
use postgres_connection::parse_host_port;
use serde::{Deserialize, Serialize};
use tokio_util::sync::CancellationToken;
use utils::{
backoff::{self},
id::{NodeId, TenantId},
};
use crate::service::Config;
const BUSY_DELAY: Duration = Duration::from_secs(1);
const SLOWDOWN_DELAY: Duration = Duration::from_secs(5);
pub(crate) const API_CONCURRENCY: usize = 32;
struct ShardedComputeHookTenant {
stripe_size: ShardStripeSize,
shard_count: ShardCount,
shards: Vec<(ShardNumber, NodeId)>,
}
enum ComputeHookTenant {
Unsharded(NodeId),
Sharded(ShardedComputeHookTenant),
}
impl ComputeHookTenant {
/// Construct with at least one shard's information
fn new(tenant_shard_id: TenantShardId, stripe_size: ShardStripeSize, node_id: NodeId) -> Self {
if tenant_shard_id.shard_count.count() > 1 {
Self::Sharded(ShardedComputeHookTenant {
shards: vec![(tenant_shard_id.shard_number, node_id)],
stripe_size,
shard_count: tenant_shard_id.shard_count,
})
} else {
Self::Unsharded(node_id)
}
}
/// Set one shard's location. If stripe size or shard count have changed, Self is reset
/// and drops existing content.
fn update(
&mut self,
tenant_shard_id: TenantShardId,
stripe_size: ShardStripeSize,
node_id: NodeId,
) {
match self {
Self::Unsharded(existing_node_id) if tenant_shard_id.shard_count.count() == 1 => {
*existing_node_id = node_id
}
Self::Sharded(sharded_tenant)
if sharded_tenant.stripe_size == stripe_size
&& sharded_tenant.shard_count == tenant_shard_id.shard_count =>
{
if let Some(existing) = sharded_tenant
.shards
.iter()
.position(|s| s.0 == tenant_shard_id.shard_number)
{
sharded_tenant.shards.get_mut(existing).unwrap().1 = node_id;
} else {
sharded_tenant
.shards
.push((tenant_shard_id.shard_number, node_id));
sharded_tenant.shards.sort_by_key(|s| s.0)
}
}
_ => {
// Shard count changed: reset struct.
*self = Self::new(tenant_shard_id, stripe_size, node_id);
}
}
}
}
#[derive(Serialize, Deserialize, Debug)]
struct ComputeHookNotifyRequestShard {
node_id: NodeId,
shard_number: ShardNumber,
}
/// Request body that we send to the control plane to notify it of where a tenant is attached
#[derive(Serialize, Deserialize, Debug)]
struct ComputeHookNotifyRequest {
tenant_id: TenantId,
stripe_size: Option<ShardStripeSize>,
shards: Vec<ComputeHookNotifyRequestShard>,
}
/// Error type for attempts to call into the control plane compute notification hook
#[derive(thiserror::Error, Debug)]
pub(crate) enum NotifyError {
// Request was not send successfully, e.g. transport error
#[error("Sending request: {0}")]
Request(#[from] reqwest::Error),
// Request could not be serviced right now due to ongoing Operation in control plane, but should be possible soon.
#[error("Control plane tenant busy")]
Busy,
// Explicit 429 response asking us to retry less frequently
#[error("Control plane overloaded")]
SlowDown,
// A 503 response indicates the control plane can't handle the request right now
#[error("Control plane unavailable (status {0})")]
Unavailable(StatusCode),
// API returned unexpected non-success status. We will retry, but log a warning.
#[error("Control plane returned unexpected status {0}")]
Unexpected(StatusCode),
// We shutdown while sending
#[error("Shutting down")]
ShuttingDown,
// A response indicates we will never succeed, such as 400 or 404
#[error("Non-retryable error {0}")]
Fatal(StatusCode),
}
impl ComputeHookTenant {
fn maybe_reconfigure(&self, tenant_id: TenantId) -> Option<ComputeHookNotifyRequest> {
match self {
Self::Unsharded(node_id) => Some(ComputeHookNotifyRequest {
tenant_id,
shards: vec![ComputeHookNotifyRequestShard {
shard_number: ShardNumber(0),
node_id: *node_id,
}],
stripe_size: None,
}),
Self::Sharded(sharded_tenant)
if sharded_tenant.shards.len() == sharded_tenant.shard_count.count() as usize =>
{
Some(ComputeHookNotifyRequest {
tenant_id,
shards: sharded_tenant
.shards
.iter()
.map(|(shard_number, node_id)| ComputeHookNotifyRequestShard {
shard_number: *shard_number,
node_id: *node_id,
})
.collect(),
stripe_size: Some(sharded_tenant.stripe_size),
})
}
Self::Sharded(sharded_tenant) => {
// Sharded tenant doesn't yet have information for all its shards
tracing::info!(
"ComputeHookTenant::maybe_reconfigure: not enough shards ({}/{})",
sharded_tenant.shards.len(),
sharded_tenant.shard_count.count()
);
None
}
}
}
}
/// The compute hook is a destination for notifications about changes to tenant:pageserver
/// mapping. It aggregates updates for the shards in a tenant, and when appropriate reconfigures
/// the compute connection string.
pub(super) struct ComputeHook {
config: Config,
state: tokio::sync::Mutex<HashMap<TenantId, ComputeHookTenant>>,
authorization_header: Option<String>,
}
impl ComputeHook {
pub(super) fn new(config: Config) -> Self {
let authorization_header = config
.control_plane_jwt_token
.clone()
.map(|jwt| format!("Bearer {}", jwt));
Self {
state: Default::default(),
config,
authorization_header,
}
}
/// For test environments: use neon_local's LocalEnv to update compute
async fn do_notify_local(
&self,
reconfigure_request: ComputeHookNotifyRequest,
) -> anyhow::Result<()> {
let env = match LocalEnv::load_config() {
Ok(e) => e,
Err(e) => {
tracing::warn!("Couldn't load neon_local config, skipping compute update ({e})");
return Ok(());
}
};
let cplane =
ComputeControlPlane::load(env.clone()).expect("Error loading compute control plane");
let ComputeHookNotifyRequest {
tenant_id,
shards,
stripe_size,
} = reconfigure_request;
let compute_pageservers = shards
.into_iter()
.map(|shard| {
let ps_conf = env
.get_pageserver_conf(shard.node_id)
.expect("Unknown pageserver");
let (pg_host, pg_port) = parse_host_port(&ps_conf.listen_pg_addr)
.expect("Unable to parse listen_pg_addr");
(pg_host, pg_port.unwrap_or(5432))
})
.collect::<Vec<_>>();
for (endpoint_name, endpoint) in &cplane.endpoints {
if endpoint.tenant_id == tenant_id && endpoint.status() == EndpointStatus::Running {
tracing::info!("Reconfiguring endpoint {}", endpoint_name,);
endpoint
.reconfigure(compute_pageservers.clone(), stripe_size)
.await?;
}
}
Ok(())
}
async fn do_notify_iteration(
&self,
client: &reqwest::Client,
url: &String,
reconfigure_request: &ComputeHookNotifyRequest,
cancel: &CancellationToken,
) -> Result<(), NotifyError> {
let req = client.request(Method::PUT, url);
let req = if let Some(value) = &self.authorization_header {
req.header(reqwest::header::AUTHORIZATION, value)
} else {
req
};
tracing::info!(
"Sending notify request to {} ({:?})",
url,
reconfigure_request
);
let send_result = req.json(&reconfigure_request).send().await;
let response = match send_result {
Ok(r) => r,
Err(e) => return Err(e.into()),
};
// Treat all 2xx responses as success
if response.status() >= StatusCode::OK && response.status() < StatusCode::MULTIPLE_CHOICES {
if response.status() != StatusCode::OK {
// Non-200 2xx response: it doesn't make sense to retry, but this is unexpected, so
// log a warning.
tracing::warn!(
"Unexpected 2xx response code {} from control plane",
response.status()
);
}
return Ok(());
}
// Error response codes
match response.status() {
StatusCode::TOO_MANY_REQUESTS => {
// TODO: 429 handling should be global: set some state visible to other requests
// so that they will delay before starting, rather than all notifications trying
// once before backing off.
tokio::time::timeout(SLOWDOWN_DELAY, cancel.cancelled())
.await
.ok();
Err(NotifyError::SlowDown)
}
StatusCode::LOCKED => {
// Delay our retry if busy: the usual fast exponential backoff in backoff::retry
// is not appropriate
tokio::time::timeout(BUSY_DELAY, cancel.cancelled())
.await
.ok();
Err(NotifyError::Busy)
}
StatusCode::SERVICE_UNAVAILABLE
| StatusCode::GATEWAY_TIMEOUT
| StatusCode::BAD_GATEWAY => Err(NotifyError::Unavailable(response.status())),
StatusCode::BAD_REQUEST | StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
Err(NotifyError::Fatal(response.status()))
}
_ => Err(NotifyError::Unexpected(response.status())),
}
}
async fn do_notify(
&self,
url: &String,
reconfigure_request: ComputeHookNotifyRequest,
cancel: &CancellationToken,
) -> Result<(), NotifyError> {
let client = reqwest::Client::new();
backoff::retry(
|| self.do_notify_iteration(&client, url, &reconfigure_request, cancel),
|e| matches!(e, NotifyError::Fatal(_) | NotifyError::Unexpected(_)),
3,
10,
"Send compute notification",
cancel,
)
.await
.ok_or_else(|| NotifyError::ShuttingDown)
.and_then(|x| x)
}
/// Call this to notify the compute (postgres) tier of new pageservers to use
/// for a tenant. notify() is called by each shard individually, and this function
/// will decide whether an update to the tenant is sent. An update is sent on the
/// condition that:
/// - We know a pageserver for every shard.
/// - All the shards have the same shard_count (i.e. we are not mid-split)
///
/// Cancellation token enables callers to drop out, e.g. if calling from a Reconciler
/// that is cancelled.
///
/// This function is fallible, including in the case that the control plane is transiently
/// unavailable. A limited number of retries are done internally to efficiently hide short unavailability
/// periods, but we don't retry forever. The **caller** is responsible for handling failures and
/// ensuring that they eventually call again to ensure that the compute is eventually notified of
/// the proper pageserver nodes for a tenant.
#[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), node_id))]
pub(super) async fn notify(
&self,
tenant_shard_id: TenantShardId,
node_id: NodeId,
stripe_size: ShardStripeSize,
cancel: &CancellationToken,
) -> Result<(), NotifyError> {
let mut locked = self.state.lock().await;
use std::collections::hash_map::Entry;
let tenant = match locked.entry(tenant_shard_id.tenant_id) {
Entry::Vacant(e) => e.insert(ComputeHookTenant::new(
tenant_shard_id,
stripe_size,
node_id,
)),
Entry::Occupied(e) => {
let tenant = e.into_mut();
tenant.update(tenant_shard_id, stripe_size, node_id);
tenant
}
};
let reconfigure_request = tenant.maybe_reconfigure(tenant_shard_id.tenant_id);
let Some(reconfigure_request) = reconfigure_request else {
// The tenant doesn't yet have pageservers for all its shards: we won't notify anything
// until it does.
tracing::info!("Tenant isn't yet ready to emit a notification");
return Ok(());
};
if let Some(notify_url) = &self.config.compute_hook_url {
self.do_notify(notify_url, reconfigure_request, cancel)
.await
} else {
self.do_notify_local(reconfigure_request)
.await
.map_err(|e| {
// This path is for testing only, so munge the error into our prod-style error type.
tracing::error!("Local notification hook failed: {e}");
NotifyError::Fatal(StatusCode::INTERNAL_SERVER_ERROR)
})
}
}
}
#[cfg(test)]
pub(crate) mod tests {
use pageserver_api::shard::{ShardCount, ShardNumber};
use utils::id::TenantId;
use super::*;
#[test]
fn tenant_updates() -> anyhow::Result<()> {
let tenant_id = TenantId::generate();
let mut tenant_state = ComputeHookTenant::new(
TenantShardId {
tenant_id,
shard_count: ShardCount::new(0),
shard_number: ShardNumber(0),
},
ShardStripeSize(12345),
NodeId(1),
);
// An unsharded tenant is always ready to emit a notification
assert!(tenant_state.maybe_reconfigure(tenant_id).is_some());
assert_eq!(
tenant_state
.maybe_reconfigure(tenant_id)
.unwrap()
.shards
.len(),
1
);
assert!(tenant_state
.maybe_reconfigure(tenant_id)
.unwrap()
.stripe_size
.is_none());
// Writing the first shard of a multi-sharded situation (i.e. in a split)
// resets the tenant state and puts it in an non-notifying state (need to
// see all shards)
tenant_state.update(
TenantShardId {
tenant_id,
shard_count: ShardCount::new(2),
shard_number: ShardNumber(1),
},
ShardStripeSize(32768),
NodeId(1),
);
assert!(tenant_state.maybe_reconfigure(tenant_id).is_none());
// Writing the second shard makes it ready to notify
tenant_state.update(
TenantShardId {
tenant_id,
shard_count: ShardCount::new(2),
shard_number: ShardNumber(0),
},
ShardStripeSize(32768),
NodeId(1),
);
assert!(tenant_state.maybe_reconfigure(tenant_id).is_some());
assert_eq!(
tenant_state
.maybe_reconfigure(tenant_id)
.unwrap()
.shards
.len(),
2
);
assert_eq!(
tenant_state
.maybe_reconfigure(tenant_id)
.unwrap()
.stripe_size,
Some(ShardStripeSize(32768))
);
Ok(())
}
}

View File

@@ -6,7 +6,10 @@ use std::{
}; };
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use pageserver_api::{controller_api::NodeAvailability, models::PageserverUtilization}; use pageserver_api::{
controller_api::{NodeAvailability, UtilizationScore},
models::PageserverUtilization,
};
use thiserror::Error; use thiserror::Error;
use utils::id::NodeId; use utils::id::NodeId;
@@ -19,8 +22,7 @@ struct HeartbeaterTask {
state: HashMap<NodeId, PageserverState>, state: HashMap<NodeId, PageserverState>,
max_offline_interval: Duration, max_unavailable_interval: Duration,
max_warming_up_interval: Duration,
jwt_token: Option<String>, jwt_token: Option<String>,
} }
@@ -30,9 +32,6 @@ pub(crate) enum PageserverState {
last_seen_at: Instant, last_seen_at: Instant,
utilization: PageserverUtilization, utilization: PageserverUtilization,
}, },
WarmingUp {
started_at: Instant,
},
Offline, Offline,
} }
@@ -57,18 +56,12 @@ pub(crate) struct Heartbeater {
impl Heartbeater { impl Heartbeater {
pub(crate) fn new( pub(crate) fn new(
jwt_token: Option<String>, jwt_token: Option<String>,
max_offline_interval: Duration, max_unavailable_interval: Duration,
max_warming_up_interval: Duration,
cancel: CancellationToken, cancel: CancellationToken,
) -> Self { ) -> Self {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel::<HeartbeatRequest>(); let (sender, receiver) = tokio::sync::mpsc::unbounded_channel::<HeartbeatRequest>();
let mut heartbeater = HeartbeaterTask::new( let mut heartbeater =
receiver, HeartbeaterTask::new(receiver, jwt_token, max_unavailable_interval, cancel);
jwt_token,
max_offline_interval,
max_warming_up_interval,
cancel,
);
tokio::task::spawn(async move { heartbeater.run().await }); tokio::task::spawn(async move { heartbeater.run().await });
Self { sender } Self { sender }
@@ -84,12 +77,9 @@ impl Heartbeater {
pageservers, pageservers,
reply: sender, reply: sender,
}) })
.map_err(|_| HeartbeaterError::Cancel)?; .unwrap();
receiver receiver.await.unwrap()
.await
.map_err(|_| HeartbeaterError::Cancel)
.and_then(|x| x)
} }
} }
@@ -97,16 +87,14 @@ impl HeartbeaterTask {
fn new( fn new(
receiver: tokio::sync::mpsc::UnboundedReceiver<HeartbeatRequest>, receiver: tokio::sync::mpsc::UnboundedReceiver<HeartbeatRequest>,
jwt_token: Option<String>, jwt_token: Option<String>,
max_offline_interval: Duration, max_unavailable_interval: Duration,
max_warming_up_interval: Duration,
cancel: CancellationToken, cancel: CancellationToken,
) -> Self { ) -> Self {
Self { Self {
receiver, receiver,
cancel, cancel,
state: HashMap::new(), state: HashMap::new(),
max_offline_interval, max_unavailable_interval,
max_warming_up_interval,
jwt_token, jwt_token,
} }
} }
@@ -143,12 +131,11 @@ impl HeartbeaterTask {
// Clone the node and mark it as available such that the request // Clone the node and mark it as available such that the request
// goes through to the pageserver even when the node is marked offline. // goes through to the pageserver even when the node is marked offline.
// This doesn't impact the availability observed by [`crate::service::Service`]. // This doesn't impact the availability observed by [`crate::service::Service`].
let mut node_clone = node.clone(); let mut node = node.clone();
node_clone node.set_availability(NodeAvailability::Active(UtilizationScore::worst()));
.set_availability(NodeAvailability::Active(PageserverUtilization::full()));
async move { async move {
let response = node_clone let response = node
.with_client_retries( .with_client_retries(
|client| async move { client.get_utilization().await }, |client| async move { client.get_utilization().await },
&jwt_token, &jwt_token,
@@ -173,12 +160,6 @@ impl HeartbeaterTask {
last_seen_at: Instant::now(), last_seen_at: Instant::now(),
utilization, utilization,
} }
} else if let NodeAvailability::WarmingUp(last_seen_at) =
node.get_availability()
{
PageserverState::WarmingUp {
started_at: *last_seen_at,
}
} else { } else {
PageserverState::Offline PageserverState::Offline
}; };
@@ -204,66 +185,38 @@ impl HeartbeaterTask {
} }
} }
let mut warming_up = 0;
let mut offline = 0;
for state in new_state.values() {
match state {
PageserverState::WarmingUp { .. } => {
warming_up += 1;
}
PageserverState::Offline { .. } => offline += 1,
PageserverState::Available { .. } => {}
}
}
tracing::info!(
"Heartbeat round complete for {} nodes, {} warming-up, {} offline",
new_state.len(),
warming_up,
offline
);
let mut deltas = Vec::new(); let mut deltas = Vec::new();
let now = Instant::now(); let now = Instant::now();
for (node_id, ps_state) in new_state.iter_mut() { for (node_id, ps_state) in new_state {
use std::collections::hash_map::Entry::*; use std::collections::hash_map::Entry::*;
let entry = self.state.entry(*node_id); let entry = self.state.entry(node_id);
let mut needs_update = false; let mut needs_update = false;
match entry { match entry {
Occupied(ref occ) => match (occ.get(), &ps_state) { Occupied(ref occ) => match (occ.get(), &ps_state) {
(PageserverState::Offline, PageserverState::Offline) => {} (PageserverState::Offline, PageserverState::Offline) => {}
(PageserverState::Available { last_seen_at, .. }, PageserverState::Offline) => { (PageserverState::Available { last_seen_at, .. }, PageserverState::Offline) => {
if now - *last_seen_at >= self.max_offline_interval { if now - *last_seen_at >= self.max_unavailable_interval {
deltas.push((*node_id, ps_state.clone())); deltas.push((node_id, ps_state.clone()));
needs_update = true; needs_update = true;
} }
} }
(_, PageserverState::WarmingUp { started_at }) => {
if now - *started_at >= self.max_warming_up_interval {
*ps_state = PageserverState::Offline;
}
deltas.push((*node_id, ps_state.clone()));
needs_update = true;
}
_ => { _ => {
deltas.push((*node_id, ps_state.clone())); deltas.push((node_id, ps_state.clone()));
needs_update = true; needs_update = true;
} }
}, },
Vacant(_) => { Vacant(_) => {
// This is a new node. Don't generate a delta for it. deltas.push((node_id, ps_state.clone()));
deltas.push((*node_id, ps_state.clone()));
} }
} }
match entry { match entry {
Occupied(mut occ) if needs_update => { Occupied(mut occ) if needs_update => {
(*occ.get_mut()) = ps_state.clone(); (*occ.get_mut()) = ps_state;
} }
Vacant(vac) => { Vacant(vac) => {
vac.insert(ps_state.clone()); vac.insert(ps_state);
} }
_ => {} _ => {}
} }

View File

@@ -3,20 +3,13 @@ use crate::metrics::{
METRICS_REGISTRY, METRICS_REGISTRY,
}; };
use crate::reconciler::ReconcileError; use crate::reconciler::ReconcileError;
use crate::service::{LeadershipStatus, Service, STARTUP_RECONCILE_TIMEOUT}; use crate::service::{Service, STARTUP_RECONCILE_TIMEOUT};
use anyhow::Context;
use futures::Future; use futures::Future;
use hyper::header::CONTENT_TYPE; use hyper::header::CONTENT_TYPE;
use hyper::{Body, Request, Response}; use hyper::{Body, Request, Response};
use hyper::{StatusCode, Uri}; use hyper::{StatusCode, Uri};
use metrics::{BuildInfo, NeonMetrics};
use pageserver_api::controller_api::{
MetadataHealthListOutdatedRequest, MetadataHealthListOutdatedResponse,
MetadataHealthListUnhealthyResponse, MetadataHealthUpdateRequest, MetadataHealthUpdateResponse,
TenantCreateRequest,
};
use pageserver_api::models::{ use pageserver_api::models::{
TenantConfigRequest, TenantLocationConfigRequest, TenantShardSplitRequest, TenantConfigRequest, TenantCreateRequest, TenantLocationConfigRequest, TenantShardSplitRequest,
TenantTimeTravelRequest, TimelineCreateRequest, TenantTimeTravelRequest, TimelineCreateRequest,
}; };
use pageserver_api::shard::TenantShardId; use pageserver_api::shard::TenantShardId;
@@ -41,8 +34,7 @@ use utils::{
}; };
use pageserver_api::controller_api::{ use pageserver_api::controller_api::{
NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, TenantPolicyRequest, NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, TenantShardMigrateRequest,
TenantShardMigrateRequest,
}; };
use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest}; use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
@@ -51,19 +43,15 @@ use control_plane::storage_controller::{AttachHookRequest, InspectRequest};
use routerify::Middleware; use routerify::Middleware;
/// State available to HTTP request handlers /// State available to HTTP request handlers
#[derive(Clone)]
pub struct HttpState { pub struct HttpState {
service: Arc<crate::service::Service>, service: Arc<crate::service::Service>,
auth: Option<Arc<SwappableJwtAuth>>, auth: Option<Arc<SwappableJwtAuth>>,
neon_metrics: NeonMetrics,
allowlist_routes: Vec<Uri>, allowlist_routes: Vec<Uri>,
} }
impl HttpState { impl HttpState {
pub fn new( pub fn new(service: Arc<crate::service::Service>, auth: Option<Arc<SwappableJwtAuth>>) -> Self {
service: Arc<crate::service::Service>,
auth: Option<Arc<SwappableJwtAuth>>,
build_info: BuildInfo,
) -> Self {
let allowlist_routes = ["/status", "/ready", "/metrics"] let allowlist_routes = ["/status", "/ready", "/metrics"]
.iter() .iter()
.map(|v| v.parse().unwrap()) .map(|v| v.parse().unwrap())
@@ -71,7 +59,6 @@ impl HttpState {
Self { Self {
service, service,
auth, auth,
neon_metrics: NeonMetrics::new(build_info),
allowlist_routes, allowlist_routes,
} }
} }
@@ -147,6 +134,52 @@ async fn handle_tenant_create(
) )
} }
// For tenant and timeline deletions, which both implement an "initially return 202, then 404 once
// we're done" semantic, we wrap with a retry loop to expose a simpler API upstream. This avoids
// needing to track a "deleting" state for tenants.
async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
where
R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
{
let started_at = Instant::now();
// To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
// completed.
let mut retry_period = Duration::from_secs(1);
// On subsequent retries, wait longer.
let max_retry_period = Duration::from_secs(5);
// Enable callers with a 30 second request timeout to reliably get a response
let max_wait = Duration::from_secs(25);
loop {
let status = f(service.clone()).await?;
match status {
StatusCode::ACCEPTED => {
tracing::info!("Deletion accepted, waiting to try again...");
tokio::time::sleep(retry_period).await;
retry_period = max_retry_period;
}
StatusCode::NOT_FOUND => {
tracing::info!("Deletion complete");
return json_response(StatusCode::OK, ());
}
_ => {
tracing::warn!("Unexpected status {status}");
return json_response(status, ());
}
}
let now = Instant::now();
if now + retry_period > started_at + max_wait {
tracing::info!("Deletion timed out waiting for 404");
// REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
// the pageserver's swagger definition for this endpoint, and has the same desired
// effect of causing the control plane to retry later.
return json_response(StatusCode::CONFLICT, ());
}
}
}
async fn handle_tenant_location_config( async fn handle_tenant_location_config(
service: Arc<Service>, service: Arc<Service>,
mut req: Request<Body>, mut req: Request<Body>,
@@ -218,12 +251,6 @@ async fn handle_tenant_time_travel_remote_storage(
json_response(StatusCode::OK, ()) json_response(StatusCode::OK, ())
} }
fn map_reqwest_hyper_status(status: reqwest::StatusCode) -> Result<hyper::StatusCode, ApiError> {
hyper::StatusCode::from_u16(status.as_u16())
.context("invalid status code")
.map_err(ApiError::InternalServerError)
}
async fn handle_tenant_secondary_download( async fn handle_tenant_secondary_download(
service: Arc<Service>, service: Arc<Service>,
req: Request<Body>, req: Request<Body>,
@@ -232,7 +259,7 @@ async fn handle_tenant_secondary_download(
let wait = parse_query_param(&req, "wait_ms")?.map(Duration::from_millis); let wait = parse_query_param(&req, "wait_ms")?.map(Duration::from_millis);
let (status, progress) = service.tenant_secondary_download(tenant_id, wait).await?; let (status, progress) = service.tenant_secondary_download(tenant_id, wait).await?;
json_response(map_reqwest_hyper_status(status)?, progress) json_response(status, progress)
} }
async fn handle_tenant_delete( async fn handle_tenant_delete(
@@ -242,17 +269,10 @@ async fn handle_tenant_delete(
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?; check_permissions(&req, Scope::PageServerApi)?;
let status_code = service deletion_wrapper(service, move |service| async move {
.tenant_delete(tenant_id) service.tenant_delete(tenant_id).await
.await })
.and_then(map_reqwest_hyper_status)?; .await
if status_code == StatusCode::NOT_FOUND {
// The pageserver uses 404 for successful deletion, but we use 200
json_response(StatusCode::OK, ())
} else {
json_response(status_code, ())
}
} }
async fn handle_tenant_timeline_create( async fn handle_tenant_timeline_create(
@@ -280,76 +300,12 @@ async fn handle_tenant_timeline_delete(
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?; let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
// For timeline deletions, which both implement an "initially return 202, then 404 once
// we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.
async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
where
R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
{
let started_at = Instant::now();
// To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
// completed.
let mut retry_period = Duration::from_secs(1);
// On subsequent retries, wait longer.
let max_retry_period = Duration::from_secs(5);
// Enable callers with a 30 second request timeout to reliably get a response
let max_wait = Duration::from_secs(25);
loop {
let status = f(service.clone()).await?;
match status {
StatusCode::ACCEPTED => {
tracing::info!("Deletion accepted, waiting to try again...");
tokio::time::sleep(retry_period).await;
retry_period = max_retry_period;
}
StatusCode::NOT_FOUND => {
tracing::info!("Deletion complete");
return json_response(StatusCode::OK, ());
}
_ => {
tracing::warn!("Unexpected status {status}");
return json_response(status, ());
}
}
let now = Instant::now();
if now + retry_period > started_at + max_wait {
tracing::info!("Deletion timed out waiting for 404");
// REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
// the pageserver's swagger definition for this endpoint, and has the same desired
// effect of causing the control plane to retry later.
return json_response(StatusCode::CONFLICT, ());
}
}
}
deletion_wrapper(service, move |service| async move { deletion_wrapper(service, move |service| async move {
service service.tenant_timeline_delete(tenant_id, timeline_id).await
.tenant_timeline_delete(tenant_id, timeline_id)
.await
.and_then(map_reqwest_hyper_status)
}) })
.await .await
} }
async fn handle_tenant_timeline_detach_ancestor(
service: Arc<Service>,
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
let res = service
.tenant_timeline_detach_ancestor(tenant_id, timeline_id)
.await?;
json_response(StatusCode::OK, res)
}
async fn handle_tenant_timeline_passthrough( async fn handle_tenant_timeline_passthrough(
service: Arc<Service>, service: Arc<Service>,
req: Request<Body>, req: Request<Body>,
@@ -408,9 +364,11 @@ async fn handle_tenant_timeline_passthrough(
} }
// We have a reqest::Response, would like a http::Response // We have a reqest::Response, would like a http::Response
let mut builder = hyper::Response::builder().status(map_reqwest_hyper_status(resp.status())?); let mut builder = hyper::Response::builder()
.status(resp.status())
.version(resp.version());
for (k, v) in resp.headers() { for (k, v) in resp.headers() {
builder = builder.header(k.as_str(), v.as_bytes()); builder = builder.header(k, v);
} }
let response = builder let response = builder
@@ -434,21 +392,12 @@ async fn handle_tenant_describe(
service: Arc<Service>, service: Arc<Service>,
req: Request<Body>, req: Request<Body>,
) -> Result<Response<Body>, ApiError> { ) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Scrubber)?; check_permissions(&req, Scope::Admin)?;
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
json_response(StatusCode::OK, service.tenant_describe(tenant_id)?) json_response(StatusCode::OK, service.tenant_describe(tenant_id)?)
} }
async fn handle_tenant_list(
service: Arc<Service>,
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
json_response(StatusCode::OK, service.tenant_list())
}
async fn handle_node_register(mut req: Request<Body>) -> Result<Response<Body>, ApiError> { async fn handle_node_register(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?; check_permissions(&req, Scope::Admin)?;
@@ -462,10 +411,7 @@ async fn handle_node_list(req: Request<Body>) -> Result<Response<Body>, ApiError
check_permissions(&req, Scope::Admin)?; check_permissions(&req, Scope::Admin)?;
let state = get_state(&req); let state = get_state(&req);
let nodes = state.service.node_list().await?; json_response(StatusCode::OK, state.service.node_list().await?)
let api_nodes = nodes.into_iter().map(|n| n.describe()).collect::<Vec<_>>();
json_response(StatusCode::OK, api_nodes)
} }
async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> { async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -476,14 +422,6 @@ async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError
json_response(StatusCode::OK, state.service.node_drop(node_id).await?) json_response(StatusCode::OK, state.service.node_drop(node_id).await?)
} }
async fn handle_node_delete(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
let node_id: NodeId = parse_request_param(&req, "node_id")?;
json_response(StatusCode::OK, state.service.node_delete(node_id).await?)
}
async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>, ApiError> { async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?; check_permissions(&req, Scope::Admin)?;
@@ -500,7 +438,7 @@ async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>,
StatusCode::OK, StatusCode::OK,
state state
.service .service
.external_node_configure( .node_configure(
config_req.node_id, config_req.node_id,
config_req.availability.map(NodeAvailability::from), config_req.availability.map(NodeAvailability::from),
config_req.scheduling, config_req.scheduling,
@@ -509,119 +447,6 @@ async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>,
) )
} }
async fn handle_node_status(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
let node_id: NodeId = parse_request_param(&req, "node_id")?;
let node_status = state.service.get_node(node_id).await?;
json_response(StatusCode::OK, node_status)
}
async fn handle_get_leader(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
let leader = state.service.get_leader().await.map_err(|err| {
ApiError::InternalServerError(anyhow::anyhow!(
"Failed to read leader from database: {err}"
))
})?;
json_response(StatusCode::OK, leader)
}
async fn handle_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
let node_id: NodeId = parse_request_param(&req, "node_id")?;
state.service.start_node_drain(node_id).await?;
json_response(StatusCode::ACCEPTED, ())
}
async fn handle_cancel_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
let node_id: NodeId = parse_request_param(&req, "node_id")?;
state.service.cancel_node_drain(node_id).await?;
json_response(StatusCode::ACCEPTED, ())
}
async fn handle_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
let node_id: NodeId = parse_request_param(&req, "node_id")?;
state.service.start_node_fill(node_id).await?;
json_response(StatusCode::ACCEPTED, ())
}
async fn handle_cancel_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
let node_id: NodeId = parse_request_param(&req, "node_id")?;
state.service.cancel_node_fill(node_id).await?;
json_response(StatusCode::ACCEPTED, ())
}
async fn handle_metadata_health_update(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Scrubber)?;
let update_req = json_request::<MetadataHealthUpdateRequest>(&mut req).await?;
let state = get_state(&req);
state.service.metadata_health_update(update_req).await?;
json_response(StatusCode::OK, MetadataHealthUpdateResponse {})
}
async fn handle_metadata_health_list_unhealthy(
req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
let unhealthy_tenant_shards = state.service.metadata_health_list_unhealthy().await?;
json_response(
StatusCode::OK,
MetadataHealthListUnhealthyResponse {
unhealthy_tenant_shards,
},
)
}
async fn handle_metadata_health_list_outdated(
mut req: Request<Body>,
) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let list_outdated_req = json_request::<MetadataHealthListOutdatedRequest>(&mut req).await?;
let state = get_state(&req);
let health_records = state
.service
.metadata_health_list_outdated(list_outdated_req.not_scrubbed_for)
.await?;
json_response(
StatusCode::OK,
MetadataHealthListOutdatedResponse { health_records },
)
}
async fn handle_tenant_shard_split( async fn handle_tenant_shard_split(
service: Arc<Service>, service: Arc<Service>,
mut req: Request<Body>, mut req: Request<Body>,
@@ -653,29 +478,6 @@ async fn handle_tenant_shard_migrate(
) )
} }
async fn handle_tenant_update_policy(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
let update_req = json_request::<TenantPolicyRequest>(&mut req).await?;
let state = get_state(&req);
json_response(
StatusCode::OK,
state
.service
.tenant_update_policy(tenant_id, update_req)
.await?,
)
}
async fn handle_step_down(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
json_response(StatusCode::OK, state.service.step_down().await)
}
async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> { async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?; check_permissions(&req, Scope::PageServerApi)?;
@@ -685,18 +487,6 @@ async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiErr
json_response(StatusCode::OK, state.service.tenant_drop(tenant_id).await?) json_response(StatusCode::OK, state.service.tenant_drop(tenant_id).await?)
} }
async fn handle_tenant_import(req: Request<Body>) -> Result<Response<Body>, ApiError> {
let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
check_permissions(&req, Scope::PageServerApi)?;
let state = get_state(&req);
json_response(
StatusCode::OK,
state.service.tenant_import(tenant_id).await?,
)
}
async fn handle_tenants_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> { async fn handle_tenants_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?; check_permissions(&req, Scope::Admin)?;
@@ -719,14 +509,6 @@ async fn handle_consistency_check(req: Request<Body>) -> Result<Response<Body>,
json_response(StatusCode::OK, state.service.consistency_check().await?) json_response(StatusCode::OK, state.service.consistency_check().await?)
} }
async fn handle_reconcile_all(req: Request<Body>) -> Result<Response<Body>, ApiError> {
check_permissions(&req, Scope::Admin)?;
let state = get_state(&req);
json_response(StatusCode::OK, state.service.reconcile_all_now().await?)
}
/// Status endpoint is just used for checking that our HTTP listener is up /// Status endpoint is just used for checking that our HTTP listener is up
async fn handle_status(_req: Request<Body>) -> Result<Response<Body>, ApiError> { async fn handle_status(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
json_response(StatusCode::OK, ()) json_response(StatusCode::OK, ())
@@ -783,17 +565,9 @@ where
.await .await
} }
/// Check if the required scope is held in the request's token, or if the request has
/// a token with 'admin' scope then always permit it.
fn check_permissions(request: &Request<Body>, required_scope: Scope) -> Result<(), ApiError> { fn check_permissions(request: &Request<Body>, required_scope: Scope) -> Result<(), ApiError> {
check_permission_with(request, |claims| { check_permission_with(request, |claims| {
match crate::auth::check_permission(claims, required_scope) { crate::auth::check_permission(claims, required_scope)
Err(e) => match crate::auth::check_permission(claims, Scope::Admin) {
Ok(()) => Ok(()),
Err(_) => Err(e),
},
Ok(()) => Ok(()),
}
}) })
} }
@@ -803,47 +577,6 @@ struct RequestMeta {
at: Instant, at: Instant,
} }
pub fn prologue_leadership_status_check_middleware<
B: hyper::body::HttpBody + Send + Sync + 'static,
>() -> Middleware<B, ApiError> {
Middleware::pre(move |req| async move {
let state = get_state(&req);
let leadership_status = state.service.get_leadership_status();
enum AllowedRoutes<'a> {
All,
Some(Vec<&'a str>),
}
let allowed_routes = match leadership_status {
LeadershipStatus::Leader => AllowedRoutes::All,
LeadershipStatus::SteppedDown => {
// TODO: does it make sense to allow /status here?
AllowedRoutes::Some(["/control/v1/step_down", "/status", "/metrics"].to_vec())
}
LeadershipStatus::Candidate => {
AllowedRoutes::Some(["/ready", "/status", "/metrics"].to_vec())
}
};
let uri = req.uri().to_string();
match allowed_routes {
AllowedRoutes::All => Ok(req),
AllowedRoutes::Some(allowed) if allowed.contains(&uri.as_str()) => Ok(req),
_ => {
tracing::info!(
"Request {} not allowed due to current leadership state",
req.uri()
);
Err(ApiError::ResourceUnavailable(
format!("Current leadership status is {leadership_status}").into(),
))
}
}
})
}
fn prologue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>( fn prologue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
) -> Middleware<B, ApiError> { ) -> Middleware<B, ApiError> {
Middleware::pre(move |req| async move { Middleware::pre(move |req| async move {
@@ -894,11 +627,10 @@ fn epilogue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>
}) })
} }
pub async fn measured_metrics_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> { pub async fn measured_metrics_handler(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
pub const TEXT_FORMAT: &str = "text/plain; version=0.0.4"; pub const TEXT_FORMAT: &str = "text/plain; version=0.0.4";
let state = get_state(&req); let payload = crate::metrics::METRICS_REGISTRY.encode();
let payload = crate::metrics::METRICS_REGISTRY.encode(&state.neon_metrics);
let response = Response::builder() let response = Response::builder()
.status(200) .status(200)
.header(CONTENT_TYPE, TEXT_FORMAT) .header(CONTENT_TYPE, TEXT_FORMAT)
@@ -927,10 +659,8 @@ where
pub fn make_router( pub fn make_router(
service: Arc<Service>, service: Arc<Service>,
auth: Option<Arc<SwappableJwtAuth>>, auth: Option<Arc<SwappableJwtAuth>>,
build_info: BuildInfo,
) -> RouterBuilder<hyper::Body, ApiError> { ) -> RouterBuilder<hyper::Body, ApiError> {
let mut router = endpoint::make_router() let mut router = endpoint::make_router()
.middleware(prologue_leadership_status_check_middleware())
.middleware(prologue_metrics_middleware()) .middleware(prologue_metrics_middleware())
.middleware(epilogue_metrics_middleware()); .middleware(epilogue_metrics_middleware());
if auth.is_some() { if auth.is_some() {
@@ -945,7 +675,7 @@ pub fn make_router(
} }
router router
.data(Arc::new(HttpState::new(service, auth, build_info))) .data(Arc::new(HttpState::new(service, auth)))
.get("/metrics", |r| { .get("/metrics", |r| {
named_request_span(r, measured_metrics_handler, RequestName("metrics")) named_request_span(r, measured_metrics_handler, RequestName("metrics"))
}) })
@@ -976,13 +706,6 @@ pub fn make_router(
.post("/debug/v1/node/:node_id/drop", |r| { .post("/debug/v1/node/:node_id/drop", |r| {
named_request_span(r, handle_node_drop, RequestName("debug_v1_node_drop")) named_request_span(r, handle_node_drop, RequestName("debug_v1_node_drop"))
}) })
.post("/debug/v1/tenant/:tenant_id/import", |r| {
named_request_span(
r,
handle_tenant_import,
RequestName("debug_v1_tenant_import"),
)
})
.get("/debug/v1/tenant", |r| { .get("/debug/v1/tenant", |r| {
named_request_span(r, handle_tenants_dump, RequestName("debug_v1_tenant")) named_request_span(r, handle_tenants_dump, RequestName("debug_v1_tenant"))
}) })
@@ -1003,9 +726,6 @@ pub fn make_router(
RequestName("debug_v1_consistency_check"), RequestName("debug_v1_consistency_check"),
) )
}) })
.post("/debug/v1/reconcile_all", |r| {
request_span(r, handle_reconcile_all)
})
.put("/debug/v1/failpoints", |r| { .put("/debug/v1/failpoints", |r| {
request_span(r, |r| failpoints_handler(r, CancellationToken::new())) request_span(r, |r| failpoints_handler(r, CancellationToken::new()))
}) })
@@ -1013,9 +733,6 @@ pub fn make_router(
.post("/control/v1/node", |r| { .post("/control/v1/node", |r| {
named_request_span(r, handle_node_register, RequestName("control_v1_node")) named_request_span(r, handle_node_register, RequestName("control_v1_node"))
}) })
.delete("/control/v1/node/:node_id", |r| {
named_request_span(r, handle_node_delete, RequestName("control_v1_node_delete"))
})
.get("/control/v1/node", |r| { .get("/control/v1/node", |r| {
named_request_span(r, handle_node_list, RequestName("control_v1_node")) named_request_span(r, handle_node_list, RequestName("control_v1_node"))
}) })
@@ -1026,54 +743,6 @@ pub fn make_router(
RequestName("control_v1_node_config"), RequestName("control_v1_node_config"),
) )
}) })
.get("/control/v1/node/:node_id", |r| {
named_request_span(r, handle_node_status, RequestName("control_v1_node_status"))
})
.get("/control/v1/leader", |r| {
named_request_span(r, handle_get_leader, RequestName("control_v1_get_leader"))
})
.put("/control/v1/node/:node_id/drain", |r| {
named_request_span(r, handle_node_drain, RequestName("control_v1_node_drain"))
})
.delete("/control/v1/node/:node_id/drain", |r| {
named_request_span(
r,
handle_cancel_node_drain,
RequestName("control_v1_cancel_node_drain"),
)
})
.put("/control/v1/node/:node_id/fill", |r| {
named_request_span(r, handle_node_fill, RequestName("control_v1_node_fill"))
})
.delete("/control/v1/node/:node_id/fill", |r| {
named_request_span(
r,
handle_cancel_node_fill,
RequestName("control_v1_cancel_node_fill"),
)
})
// Metadata health operations
.post("/control/v1/metadata_health/update", |r| {
named_request_span(
r,
handle_metadata_health_update,
RequestName("control_v1_metadata_health_update"),
)
})
.get("/control/v1/metadata_health/unhealthy", |r| {
named_request_span(
r,
handle_metadata_health_list_unhealthy,
RequestName("control_v1_metadata_health_list_unhealthy"),
)
})
.post("/control/v1/metadata_health/outdated", |r| {
named_request_span(
r,
handle_metadata_health_list_outdated,
RequestName("control_v1_metadata_health_list_outdated"),
)
})
// Tenant Shard operations // Tenant Shard operations
.put("/control/v1/tenant/:tenant_shard_id/migrate", |r| { .put("/control/v1/tenant/:tenant_shard_id/migrate", |r| {
tenant_service_handler( tenant_service_handler(
@@ -1096,19 +765,6 @@ pub fn make_router(
RequestName("control_v1_tenant_describe"), RequestName("control_v1_tenant_describe"),
) )
}) })
.get("/control/v1/tenant", |r| {
tenant_service_handler(r, handle_tenant_list, RequestName("control_v1_tenant_list"))
})
.put("/control/v1/tenant/:tenant_id/policy", |r| {
named_request_span(
r,
handle_tenant_update_policy,
RequestName("control_v1_tenant_policy"),
)
})
.put("/control/v1/step_down", |r| {
named_request_span(r, handle_step_down, RequestName("control_v1_step_down"))
})
// Tenant operations // Tenant operations
// The ^/v1/ endpoints act as a "Virtual Pageserver", enabling shard-naive clients to call into // The ^/v1/ endpoints act as a "Virtual Pageserver", enabling shard-naive clients to call into
// this service to manage tenants that actually consist of many tenant shards, as if they are a single entity. // this service to manage tenants that actually consist of many tenant shards, as if they are a single entity.
@@ -1160,17 +816,7 @@ pub fn make_router(
RequestName("v1_tenant_timeline"), RequestName("v1_tenant_timeline"),
) )
}) })
.put( // Tenant detail GET passthrough to shard zero
"/v1/tenant/:tenant_id/timeline/:timeline_id/detach_ancestor",
|r| {
tenant_service_handler(
r,
handle_tenant_timeline_detach_ancestor,
RequestName("v1_tenant_timeline_detach_ancestor"),
)
},
)
// Tenant detail GET passthrough to shard zero:
.get("/v1/tenant/:tenant_id", |r| { .get("/v1/tenant/:tenant_id", |r| {
tenant_service_handler( tenant_service_handler(
r, r,
@@ -1178,14 +824,13 @@ pub fn make_router(
RequestName("v1_tenant_passthrough"), RequestName("v1_tenant_passthrough"),
) )
}) })
// The `*` in the URL is a wildcard: any tenant/timeline GET APIs on the pageserver // Timeline GET passthrough to shard zero. Note that the `*` in the URL is a wildcard: any future
// are implicitly exposed here. This must be last in the list to avoid // timeline GET APIs will be implicitly included.
// taking precedence over other GET methods we might implement by hand. .get("/v1/tenant/:tenant_id/timeline*", |r| {
.get("/v1/tenant/:tenant_id/*", |r| {
tenant_service_handler( tenant_service_handler(
r, r,
handle_tenant_timeline_passthrough, handle_tenant_timeline_passthrough,
RequestName("v1_tenant_passthrough"), RequestName("v1_tenant_timeline_passthrough"),
) )
}) })
} }

View File

@@ -0,0 +1,54 @@
use std::{collections::HashMap, sync::Arc};
/// A map of locks covering some arbitrary identifiers. Useful if you have a collection of objects but don't
/// want to embed a lock in each one, or if your locking granularity is different to your object granularity.
/// For example, used in the storage controller where the objects are tenant shards, but sometimes locking
/// is needed at a tenant-wide granularity.
pub(crate) struct IdLockMap<T>
where
T: Eq + PartialEq + std::hash::Hash,
{
/// A synchronous lock for getting/setting the async locks that our callers will wait on.
entities: std::sync::Mutex<std::collections::HashMap<T, Arc<tokio::sync::RwLock<()>>>>,
}
impl<T> IdLockMap<T>
where
T: Eq + PartialEq + std::hash::Hash,
{
pub(crate) fn shared(
&self,
key: T,
) -> impl std::future::Future<Output = tokio::sync::OwnedRwLockReadGuard<()>> {
let mut locked = self.entities.lock().unwrap();
let entry = locked.entry(key).or_default();
entry.clone().read_owned()
}
pub(crate) fn exclusive(
&self,
key: T,
) -> impl std::future::Future<Output = tokio::sync::OwnedRwLockWriteGuard<()>> {
let mut locked = self.entities.lock().unwrap();
let entry = locked.entry(key).or_default();
entry.clone().write_owned()
}
/// Rather than building a lock guard that re-takes the [`Self::entities`] lock, we just do
/// periodic housekeeping to avoid the map growing indefinitely
pub(crate) fn housekeeping(&self) {
let mut locked = self.entities.lock().unwrap();
locked.retain(|_k, lock| lock.try_write().is_err())
}
}
impl<T> Default for IdLockMap<T>
where
T: Eq + PartialEq + std::hash::Hash,
{
fn default() -> Self {
Self {
entities: std::sync::Mutex::new(HashMap::new()),
}
}
}

View File

@@ -2,23 +2,19 @@ use serde::Serialize;
use utils::seqwait::MonotonicCounter; use utils::seqwait::MonotonicCounter;
mod auth; mod auth;
mod background_node_operations;
mod compute_hook; mod compute_hook;
mod drain_utils;
mod heartbeater; mod heartbeater;
pub mod http; pub mod http;
mod id_lock_map; mod id_lock_map;
mod leadership;
pub mod metrics; pub mod metrics;
mod node; mod node;
mod pageserver_client; mod pageserver_client;
mod peer_client;
pub mod persistence; pub mod persistence;
mod reconciler; mod reconciler;
mod scheduler; mod scheduler;
mod schema; mod schema;
pub mod service; pub mod service;
mod tenant_shard; mod tenant_state;
#[derive(Ord, PartialOrd, Eq, PartialEq, Copy, Clone, Serialize)] #[derive(Ord, PartialOrd, Eq, PartialEq, Copy, Clone, Serialize)]
struct Sequence(u64); struct Sequence(u64);

View File

@@ -1,31 +1,26 @@
use anyhow::{anyhow, Context}; use anyhow::{anyhow, Context};
use attachment_service::http::make_router;
use attachment_service::metrics::preinitialize_metrics;
use attachment_service::persistence::Persistence;
use attachment_service::service::{Config, Service, MAX_UNAVAILABLE_INTERVAL_DEFAULT};
use camino::Utf8PathBuf;
use clap::Parser; use clap::Parser;
use hyper::Uri; use diesel::Connection;
use metrics::launch_timestamp::LaunchTimestamp; use metrics::launch_timestamp::LaunchTimestamp;
use metrics::BuildInfo;
use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration;
use storage_controller::http::make_router;
use storage_controller::metrics::preinitialize_metrics;
use storage_controller::persistence::Persistence;
use storage_controller::service::chaos_injector::ChaosInjector;
use storage_controller::service::{
Config, Service, MAX_OFFLINE_INTERVAL_DEFAULT, MAX_WARMING_UP_INTERVAL_DEFAULT,
RECONCILER_CONCURRENCY_DEFAULT,
};
use tokio::signal::unix::SignalKind; use tokio::signal::unix::SignalKind;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::Instrument;
use utils::auth::{JwtAuth, SwappableJwtAuth}; use utils::auth::{JwtAuth, SwappableJwtAuth};
use utils::logging::{self, LogFormat}; use utils::logging::{self, LogFormat};
use utils::sentry_init::init_sentry;
use utils::{project_build_tag, project_git_version, tcp_listener}; use utils::{project_build_tag, project_git_version, tcp_listener};
project_git_version!(GIT_VERSION); project_git_version!(GIT_VERSION);
project_build_tag!(BUILD_TAG); project_build_tag!(BUILD_TAG);
use diesel_migrations::{embed_migrations, EmbeddedMigrations};
pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("./migrations");
#[derive(Parser)] #[derive(Parser)]
#[command(author, version, about, long_about = None)] #[command(author, version, about, long_about = None)]
#[command(arg_required_else_help(true))] #[command(arg_required_else_help(true))]
@@ -47,14 +42,15 @@ struct Cli {
#[arg(long)] #[arg(long)]
control_plane_jwt_token: Option<String>, control_plane_jwt_token: Option<String>,
#[arg(long)]
peer_jwt_token: Option<String>,
/// URL to control plane compute notification endpoint /// URL to control plane compute notification endpoint
#[arg(long)] #[arg(long)]
compute_hook_url: Option<String>, compute_hook_url: Option<String>,
/// URL to connect to postgres, like postgresql://localhost:1234/storage_controller /// Path to the .json file to store state (will be created if it doesn't exist)
#[arg(short, long)]
path: Option<Utf8PathBuf>,
/// URL to connect to postgres, like postgresql://localhost:1234/attachment_service
#[arg(long)] #[arg(long)]
database_url: Option<String>, database_url: Option<String>,
@@ -64,46 +60,7 @@ struct Cli {
/// Grace period before marking unresponsive pageserver offline /// Grace period before marking unresponsive pageserver offline
#[arg(long)] #[arg(long)]
max_offline_interval: Option<humantime::Duration>, max_unavailable_interval: Option<humantime::Duration>,
/// More tolerant grace period before marking unresponsive pagserver offline used
/// around pageserver restarts
#[arg(long)]
max_warming_up_interval: Option<humantime::Duration>,
/// Size threshold for automatically splitting shards (disabled by default)
#[arg(long)]
split_threshold: Option<u64>,
/// Maximum number of reconcilers that may run in parallel
#[arg(long)]
reconciler_concurrency: Option<usize>,
/// How long to wait for the initial database connection to be available.
#[arg(long, default_value = "5s")]
db_connect_timeout: humantime::Duration,
#[arg(long, default_value = "false")]
start_as_candidate: bool,
// TODO: make this mandatory once the helm chart gets updated
#[arg(long)]
address_for_peers: Option<Uri>,
/// `neon_local` sets this to the path of the neon_local repo dir.
/// Only relevant for testing.
// TODO: make `cfg(feature = "testing")`
#[arg(long)]
neon_local_repo_dir: Option<PathBuf>,
/// Chaos testing
#[arg(long)]
chaos_interval: Option<humantime::Duration>,
// Maximum acceptable lag for the secondary location while draining
// a pageserver
#[arg(long)]
max_secondary_lag_bytes: Option<u64>,
} }
enum StrictMode { enum StrictMode {
@@ -129,28 +86,28 @@ struct Secrets {
public_key: Option<JwtAuth>, public_key: Option<JwtAuth>,
jwt_token: Option<String>, jwt_token: Option<String>,
control_plane_jwt_token: Option<String>, control_plane_jwt_token: Option<String>,
peer_jwt_token: Option<String>,
} }
impl Secrets { impl Secrets {
const DATABASE_URL_ENV: &'static str = "DATABASE_URL"; const DATABASE_URL_ENV: &'static str = "DATABASE_URL";
const PAGESERVER_JWT_TOKEN_ENV: &'static str = "PAGESERVER_JWT_TOKEN"; const PAGESERVER_JWT_TOKEN_ENV: &'static str = "PAGESERVER_JWT_TOKEN";
const CONTROL_PLANE_JWT_TOKEN_ENV: &'static str = "CONTROL_PLANE_JWT_TOKEN"; const CONTROL_PLANE_JWT_TOKEN_ENV: &'static str = "CONTROL_PLANE_JWT_TOKEN";
const PEER_JWT_TOKEN_ENV: &'static str = "PEER_JWT_TOKEN";
const PUBLIC_KEY_ENV: &'static str = "PUBLIC_KEY"; const PUBLIC_KEY_ENV: &'static str = "PUBLIC_KEY";
/// Load secrets from, in order of preference: /// Load secrets from, in order of preference:
/// - CLI args if database URL is provided on the CLI /// - CLI args if database URL is provided on the CLI
/// - Environment variables if DATABASE_URL is set. /// - Environment variables if DATABASE_URL is set.
/// - AWS Secrets Manager secrets
async fn load(args: &Cli) -> anyhow::Result<Self> { async fn load(args: &Cli) -> anyhow::Result<Self> {
let Some(database_url) = Self::load_secret(&args.database_url, Self::DATABASE_URL_ENV) let Some(database_url) =
Self::load_secret(&args.database_url, Self::DATABASE_URL_ENV).await
else { else {
anyhow::bail!( anyhow::bail!(
"Database URL is not set (set `--database-url`, or `DATABASE_URL` environment)" "Database URL is not set (set `--database-url`, or `DATABASE_URL` environment)"
) )
}; };
let public_key = match Self::load_secret(&args.public_key, Self::PUBLIC_KEY_ENV) { let public_key = match Self::load_secret(&args.public_key, Self::PUBLIC_KEY_ENV).await {
Some(v) => Some(JwtAuth::from_key(v).context("Loading public key")?), Some(v) => Some(JwtAuth::from_key(v).context("Loading public key")?),
None => None, None => None,
}; };
@@ -158,18 +115,18 @@ impl Secrets {
let this = Self { let this = Self {
database_url, database_url,
public_key, public_key,
jwt_token: Self::load_secret(&args.jwt_token, Self::PAGESERVER_JWT_TOKEN_ENV), jwt_token: Self::load_secret(&args.jwt_token, Self::PAGESERVER_JWT_TOKEN_ENV).await,
control_plane_jwt_token: Self::load_secret( control_plane_jwt_token: Self::load_secret(
&args.control_plane_jwt_token, &args.control_plane_jwt_token,
Self::CONTROL_PLANE_JWT_TOKEN_ENV, Self::CONTROL_PLANE_JWT_TOKEN_ENV,
), )
peer_jwt_token: Self::load_secret(&args.peer_jwt_token, Self::PEER_JWT_TOKEN_ENV), .await,
}; };
Ok(this) Ok(this)
} }
fn load_secret(cli: &Option<String>, env_name: &str) -> Option<String> { async fn load_secret(cli: &Option<String>, env_name: &str) -> Option<String> {
if let Some(v) = cli { if let Some(v) = cli {
Some(v.clone()) Some(v.clone())
} else if let Ok(v) = std::env::var(env_name) { } else if let Ok(v) = std::env::var(env_name) {
@@ -180,24 +137,24 @@ impl Secrets {
} }
} }
/// Execute the diesel migrations that are built into this binary
async fn migration_run(database_url: &str) -> anyhow::Result<()> {
use diesel::PgConnection;
use diesel_migrations::{HarnessWithOutput, MigrationHarness};
let mut conn = PgConnection::establish(database_url)?;
HarnessWithOutput::write_to_stdout(&mut conn)
.run_pending_migrations(MIGRATIONS)
.map(|_| ())
.map_err(|e| anyhow::anyhow!(e))?;
Ok(())
}
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
logging::init( let default_panic = std::panic::take_hook();
LogFormat::Plain,
logging::TracingErrorLayerEnablement::Disabled,
logging::Output::Stdout,
)?;
// log using tracing so we don't get confused output by default hook writing to stderr
utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();
let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
let hook = std::panic::take_hook();
std::panic::set_hook(Box::new(move |info| { std::panic::set_hook(Box::new(move |info| {
// let sentry send a message (and flush) default_panic(info);
// and trace the error
hook(info);
std::process::exit(1); std::process::exit(1);
})); }));
@@ -214,22 +171,24 @@ fn main() -> anyhow::Result<()> {
async fn async_main() -> anyhow::Result<()> { async fn async_main() -> anyhow::Result<()> {
let launch_ts = Box::leak(Box::new(LaunchTimestamp::generate())); let launch_ts = Box::leak(Box::new(LaunchTimestamp::generate()));
logging::init(
LogFormat::Plain,
logging::TracingErrorLayerEnablement::Disabled,
logging::Output::Stdout,
)?;
preinitialize_metrics(); preinitialize_metrics();
let args = Cli::parse(); let args = Cli::parse();
tracing::info!( tracing::info!(
"version: {}, launch_timestamp: {}, build_tag {}, listening on {}", "version: {}, launch_timestamp: {}, build_tag {}, state at {}, listening on {}",
GIT_VERSION, GIT_VERSION,
launch_ts.to_string(), launch_ts.to_string(),
BUILD_TAG, BUILD_TAG,
args.path.as_ref().unwrap_or(&Utf8PathBuf::from("<none>")),
args.listen args.listen
); );
let build_info = BuildInfo {
revision: GIT_VERSION,
build_tag: BUILD_TAG,
};
let strict_mode = if args.dev { let strict_mode = if args.dev {
StrictMode::Dev StrictMode::Dev
} else { } else {
@@ -269,31 +228,20 @@ async fn async_main() -> anyhow::Result<()> {
let config = Config { let config = Config {
jwt_token: secrets.jwt_token, jwt_token: secrets.jwt_token,
control_plane_jwt_token: secrets.control_plane_jwt_token, control_plane_jwt_token: secrets.control_plane_jwt_token,
peer_jwt_token: secrets.peer_jwt_token,
compute_hook_url: args.compute_hook_url, compute_hook_url: args.compute_hook_url,
max_offline_interval: args max_unavailable_interval: args
.max_offline_interval .max_unavailable_interval
.map(humantime::Duration::into) .map(humantime::Duration::into)
.unwrap_or(MAX_OFFLINE_INTERVAL_DEFAULT), .unwrap_or(MAX_UNAVAILABLE_INTERVAL_DEFAULT),
max_warming_up_interval: args
.max_warming_up_interval
.map(humantime::Duration::into)
.unwrap_or(MAX_WARMING_UP_INTERVAL_DEFAULT),
reconciler_concurrency: args
.reconciler_concurrency
.unwrap_or(RECONCILER_CONCURRENCY_DEFAULT),
split_threshold: args.split_threshold,
neon_local_repo_dir: args.neon_local_repo_dir,
max_secondary_lag_bytes: args.max_secondary_lag_bytes,
address_for_peers: args.address_for_peers,
start_as_candidate: args.start_as_candidate,
http_service_port: args.listen.port() as i32,
}; };
// Validate that we can connect to the database // After loading secrets & config, but before starting anything else, apply database migrations
Persistence::await_connection(&secrets.database_url, args.db_connect_timeout.into()).await?; migration_run(&secrets.database_url)
.await
.context("Running database migrations")?;
let persistence = Arc::new(Persistence::new(secrets.database_url)); let json_path = args.path;
let persistence = Arc::new(Persistence::new(secrets.database_url, json_path.clone()));
let service = Service::spawn(config, persistence.clone()).await?; let service = Service::spawn(config, persistence.clone()).await?;
@@ -302,7 +250,7 @@ async fn async_main() -> anyhow::Result<()> {
let auth = secrets let auth = secrets
.public_key .public_key
.map(|jwt_auth| Arc::new(SwappableJwtAuth::new(jwt_auth))); .map(|jwt_auth| Arc::new(SwappableJwtAuth::new(jwt_auth)));
let router = make_router(service.clone(), auth, build_info) let router = make_router(service.clone(), auth)
.build() .build()
.map_err(|err| anyhow!(err))?; .map_err(|err| anyhow!(err))?;
let router_service = utils::http::RouterService::new(router).unwrap(); let router_service = utils::http::RouterService::new(router).unwrap();
@@ -320,22 +268,6 @@ async fn async_main() -> anyhow::Result<()> {
tracing::info!("Serving on {0}", args.listen); tracing::info!("Serving on {0}", args.listen);
let server_task = tokio::task::spawn(server); let server_task = tokio::task::spawn(server);
let chaos_task = args.chaos_interval.map(|interval| {
let service = service.clone();
let cancel = CancellationToken::new();
let cancel_bg = cancel.clone();
(
tokio::task::spawn(
async move {
let mut chaos_injector = ChaosInjector::new(service, interval.into());
chaos_injector.run(cancel_bg).await
}
.instrument(tracing::info_span!("chaos_injector")),
),
cancel,
)
});
// Wait until we receive a signal // Wait until we receive a signal
let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt())?; let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt())?;
let mut sigquit = tokio::signal::unix::signal(SignalKind::quit())?; let mut sigquit = tokio::signal::unix::signal(SignalKind::quit())?;
@@ -347,28 +279,21 @@ async fn async_main() -> anyhow::Result<()> {
} }
tracing::info!("Terminating on signal"); tracing::info!("Terminating on signal");
// Stop HTTP server first, so that we don't have to service requests if json_path.is_some() {
// while shutting down Service. // Write out a JSON dump on shutdown: this is used in compat tests to avoid passing
server_shutdown.cancel(); // full postgres dumps around.
match tokio::time::timeout(Duration::from_secs(5), server_task).await { if let Err(e) = persistence.write_tenants_json().await {
Ok(Ok(_)) => { tracing::error!("Failed to write JSON on shutdown: {e}")
tracing::info!("Joined HTTP server task");
}
Ok(Err(e)) => {
tracing::error!("Error joining HTTP server task: {e}")
}
Err(_) => {
tracing::warn!("Timed out joining HTTP server task");
// We will fall through and shut down the service anyway, any request handlers
// in flight will experience cancellation & their clients will see a torn connection.
} }
} }
// If we were injecting chaos, stop that so that we're not calling into Service while it shuts down // Stop HTTP server first, so that we don't have to service requests
if let Some((chaos_jh, chaos_cancel)) = chaos_task { // while shutting down Service
chaos_cancel.cancel(); server_shutdown.cancel();
chaos_jh.await.ok(); if let Err(e) = server_task.await {
tracing::error!("Error joining HTTP server task: {e}")
} }
tracing::info!("Joined HTTP server task");
service.shutdown().await; service.shutdown().await;
tracing::info!("Service shutdown complete"); tracing::info!("Service shutdown complete");

View File

@@ -8,16 +8,14 @@
//! The rest of the code defines label group types and deals with converting outer types to labels. //! The rest of the code defines label group types and deals with converting outer types to labels.
//! //!
use bytes::Bytes; use bytes::Bytes;
use measured::{label::LabelValue, metric::histogram, FixedCardinalityLabel, MetricGroup}; use measured::{
use metrics::NeonMetrics; label::{LabelValue, StaticLabelSet},
FixedCardinalityLabel, MetricGroup,
};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use std::sync::Mutex; use std::sync::Mutex;
use strum::IntoEnumIterator;
use crate::{ use crate::persistence::{DatabaseError, DatabaseOperation};
persistence::{DatabaseError, DatabaseOperation},
service::LeadershipStatus,
};
pub(crate) static METRICS_REGISTRY: Lazy<StorageControllerMetrics> = pub(crate) static METRICS_REGISTRY: Lazy<StorageControllerMetrics> =
Lazy::new(StorageControllerMetrics::default); Lazy::new(StorageControllerMetrics::default);
@@ -28,28 +26,21 @@ pub fn preinitialize_metrics() {
pub(crate) struct StorageControllerMetrics { pub(crate) struct StorageControllerMetrics {
pub(crate) metrics_group: StorageControllerMetricGroup, pub(crate) metrics_group: StorageControllerMetricGroup,
encoder: Mutex<measured::text::BufferedTextEncoder>, encoder: Mutex<measured::text::TextEncoder>,
} }
#[derive(measured::MetricGroup)] #[derive(measured::MetricGroup)]
#[metric(new())]
pub(crate) struct StorageControllerMetricGroup { pub(crate) struct StorageControllerMetricGroup {
/// Count of how many times we spawn a reconcile task /// Count of how many times we spawn a reconcile task
pub(crate) storage_controller_reconcile_spawn: measured::Counter, pub(crate) storage_controller_reconcile_spawn: measured::Counter,
/// Reconciler tasks completed, broken down by success/failure/cancelled /// Reconciler tasks completed, broken down by success/failure/cancelled
pub(crate) storage_controller_reconcile_complete: pub(crate) storage_controller_reconcile_complete:
measured::CounterVec<ReconcileCompleteLabelGroupSet>, measured::CounterVec<ReconcileCompleteLabelGroupSet>,
/// Count of how many times we make an optimization change to a tenant's scheduling
pub(crate) storage_controller_schedule_optimization: measured::Counter,
/// HTTP request status counters for handled requests /// HTTP request status counters for handled requests
pub(crate) storage_controller_http_request_status: pub(crate) storage_controller_http_request_status:
measured::CounterVec<HttpRequestStatusLabelGroupSet>, measured::CounterVec<HttpRequestStatusLabelGroupSet>,
/// HTTP request handler latency across all status codes /// HTTP request handler latency across all status codes
#[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
pub(crate) storage_controller_http_request_latency: pub(crate) storage_controller_http_request_latency:
measured::HistogramVec<HttpRequestLatencyLabelGroupSet, 5>, measured::HistogramVec<HttpRequestLatencyLabelGroupSet, 5>,
@@ -61,7 +52,6 @@ pub(crate) struct StorageControllerMetricGroup {
/// Latency of HTTP requests to the pageserver, broken down by pageserver /// Latency of HTTP requests to the pageserver, broken down by pageserver
/// node id, request name and method. This include both successful and unsuccessful /// node id, request name and method. This include both successful and unsuccessful
/// requests. /// requests.
#[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
pub(crate) storage_controller_pageserver_request_latency: pub(crate) storage_controller_pageserver_request_latency:
measured::HistogramVec<PageserverRequestLabelGroupSet, 5>, measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
@@ -73,7 +63,6 @@ pub(crate) struct StorageControllerMetricGroup {
/// Latency of pass-through HTTP requests to the pageserver, broken down by pageserver /// Latency of pass-through HTTP requests to the pageserver, broken down by pageserver
/// node id, request name and method. This include both successful and unsuccessful /// node id, request name and method. This include both successful and unsuccessful
/// requests. /// requests.
#[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
pub(crate) storage_controller_passthrough_request_latency: pub(crate) storage_controller_passthrough_request_latency:
measured::HistogramVec<PageserverRequestLabelGroupSet, 5>, measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
@@ -82,36 +71,75 @@ pub(crate) struct StorageControllerMetricGroup {
measured::CounterVec<DatabaseQueryErrorLabelGroupSet>, measured::CounterVec<DatabaseQueryErrorLabelGroupSet>,
/// Latency of database queries, broken down by operation. /// Latency of database queries, broken down by operation.
#[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
pub(crate) storage_controller_database_query_latency: pub(crate) storage_controller_database_query_latency:
measured::HistogramVec<DatabaseQueryLatencyLabelGroupSet, 5>, measured::HistogramVec<DatabaseQueryLatencyLabelGroupSet, 5>,
pub(crate) storage_controller_leadership_status: measured::GaugeVec<LeadershipStatusGroupSet>,
} }
impl StorageControllerMetrics { impl StorageControllerMetrics {
pub(crate) fn encode(&self, neon_metrics: &NeonMetrics) -> Bytes { pub(crate) fn encode(&self) -> Bytes {
let mut encoder = self.encoder.lock().unwrap(); let mut encoder = self.encoder.lock().unwrap();
neon_metrics self.metrics_group.collect_into(&mut *encoder);
.collect_group_into(&mut *encoder)
.unwrap_or_else(|infallible| match infallible {});
self.metrics_group
.collect_group_into(&mut *encoder)
.unwrap_or_else(|infallible| match infallible {});
encoder.finish() encoder.finish()
} }
} }
impl Default for StorageControllerMetrics { impl Default for StorageControllerMetrics {
fn default() -> Self { fn default() -> Self {
let mut metrics_group = StorageControllerMetricGroup::new();
metrics_group
.storage_controller_reconcile_complete
.init_all_dense();
Self { Self {
metrics_group, metrics_group: StorageControllerMetricGroup::new(),
encoder: Mutex::new(measured::text::BufferedTextEncoder::new()), encoder: Mutex::new(measured::text::TextEncoder::new()),
}
}
}
impl StorageControllerMetricGroup {
pub(crate) fn new() -> Self {
Self {
storage_controller_reconcile_spawn: measured::Counter::new(),
storage_controller_reconcile_complete: measured::CounterVec::new(
ReconcileCompleteLabelGroupSet {
status: StaticLabelSet::new(),
},
),
storage_controller_http_request_status: measured::CounterVec::new(
HttpRequestStatusLabelGroupSet {
path: lasso::ThreadedRodeo::new(),
method: StaticLabelSet::new(),
status: StaticLabelSet::new(),
},
),
storage_controller_http_request_latency: measured::HistogramVec::new(
measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
),
storage_controller_pageserver_request_error: measured::CounterVec::new(
PageserverRequestLabelGroupSet {
pageserver_id: lasso::ThreadedRodeo::new(),
path: lasso::ThreadedRodeo::new(),
method: StaticLabelSet::new(),
},
),
storage_controller_pageserver_request_latency: measured::HistogramVec::new(
measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
),
storage_controller_passthrough_request_error: measured::CounterVec::new(
PageserverRequestLabelGroupSet {
pageserver_id: lasso::ThreadedRodeo::new(),
path: lasso::ThreadedRodeo::new(),
method: StaticLabelSet::new(),
},
),
storage_controller_passthrough_request_latency: measured::HistogramVec::new(
measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
),
storage_controller_database_query_error: measured::CounterVec::new(
DatabaseQueryErrorLabelGroupSet {
operation: StaticLabelSet::new(),
error_type: StaticLabelSet::new(),
},
),
storage_controller_database_query_latency: measured::HistogramVec::new(
measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
),
} }
} }
} }
@@ -125,7 +153,7 @@ pub(crate) struct ReconcileCompleteLabelGroup {
#[derive(measured::LabelGroup)] #[derive(measured::LabelGroup)]
#[label(set = HttpRequestStatusLabelGroupSet)] #[label(set = HttpRequestStatusLabelGroupSet)]
pub(crate) struct HttpRequestStatusLabelGroup<'a> { pub(crate) struct HttpRequestStatusLabelGroup<'a> {
#[label(dynamic_with = lasso::ThreadedRodeo, default)] #[label(dynamic_with = lasso::ThreadedRodeo)]
pub(crate) path: &'a str, pub(crate) path: &'a str,
pub(crate) method: Method, pub(crate) method: Method,
pub(crate) status: StatusCode, pub(crate) status: StatusCode,
@@ -134,21 +162,40 @@ pub(crate) struct HttpRequestStatusLabelGroup<'a> {
#[derive(measured::LabelGroup)] #[derive(measured::LabelGroup)]
#[label(set = HttpRequestLatencyLabelGroupSet)] #[label(set = HttpRequestLatencyLabelGroupSet)]
pub(crate) struct HttpRequestLatencyLabelGroup<'a> { pub(crate) struct HttpRequestLatencyLabelGroup<'a> {
#[label(dynamic_with = lasso::ThreadedRodeo, default)] #[label(dynamic_with = lasso::ThreadedRodeo)]
pub(crate) path: &'a str, pub(crate) path: &'a str,
pub(crate) method: Method, pub(crate) method: Method,
} }
impl Default for HttpRequestLatencyLabelGroupSet {
fn default() -> Self {
Self {
path: lasso::ThreadedRodeo::new(),
method: StaticLabelSet::new(),
}
}
}
#[derive(measured::LabelGroup, Clone)] #[derive(measured::LabelGroup, Clone)]
#[label(set = PageserverRequestLabelGroupSet)] #[label(set = PageserverRequestLabelGroupSet)]
pub(crate) struct PageserverRequestLabelGroup<'a> { pub(crate) struct PageserverRequestLabelGroup<'a> {
#[label(dynamic_with = lasso::ThreadedRodeo, default)] #[label(dynamic_with = lasso::ThreadedRodeo)]
pub(crate) pageserver_id: &'a str, pub(crate) pageserver_id: &'a str,
#[label(dynamic_with = lasso::ThreadedRodeo, default)] #[label(dynamic_with = lasso::ThreadedRodeo)]
pub(crate) path: &'a str, pub(crate) path: &'a str,
pub(crate) method: Method, pub(crate) method: Method,
} }
impl Default for PageserverRequestLabelGroupSet {
fn default() -> Self {
Self {
pageserver_id: lasso::ThreadedRodeo::new(),
path: lasso::ThreadedRodeo::new(),
method: StaticLabelSet::new(),
}
}
}
#[derive(measured::LabelGroup)] #[derive(measured::LabelGroup)]
#[label(set = DatabaseQueryErrorLabelGroupSet)] #[label(set = DatabaseQueryErrorLabelGroupSet)]
pub(crate) struct DatabaseQueryErrorLabelGroup { pub(crate) struct DatabaseQueryErrorLabelGroup {
@@ -162,13 +209,7 @@ pub(crate) struct DatabaseQueryLatencyLabelGroup {
pub(crate) operation: DatabaseOperation, pub(crate) operation: DatabaseOperation,
} }
#[derive(measured::LabelGroup)] #[derive(FixedCardinalityLabel)]
#[label(set = LeadershipStatusGroupSet)]
pub(crate) struct LeadershipStatusGroup {
pub(crate) status: LeadershipStatus,
}
#[derive(FixedCardinalityLabel, Clone, Copy)]
pub(crate) enum ReconcileOutcome { pub(crate) enum ReconcileOutcome {
#[label(rename = "ok")] #[label(rename = "ok")]
Success, Success,
@@ -176,7 +217,7 @@ pub(crate) enum ReconcileOutcome {
Cancel, Cancel,
} }
#[derive(FixedCardinalityLabel, Copy, Clone)] #[derive(FixedCardinalityLabel, Clone)]
pub(crate) enum Method { pub(crate) enum Method {
Get, Get,
Put, Put,
@@ -201,12 +242,11 @@ impl From<hyper::Method> for Method {
} }
} }
#[derive(Clone, Copy)]
pub(crate) struct StatusCode(pub(crate) hyper::http::StatusCode); pub(crate) struct StatusCode(pub(crate) hyper::http::StatusCode);
impl LabelValue for StatusCode { impl LabelValue for StatusCode {
fn visit<V: measured::label::LabelVisitor>(&self, v: V) -> V::Output { fn visit<V: measured::label::LabelVisitor>(&self, v: V) -> V::Output {
v.write_int(self.0.as_u16() as i64) v.write_int(self.0.as_u16() as u64)
} }
} }
@@ -224,13 +264,12 @@ impl FixedCardinalityLabel for StatusCode {
} }
} }
#[derive(FixedCardinalityLabel, Clone, Copy)] #[derive(FixedCardinalityLabel)]
pub(crate) enum DatabaseErrorLabel { pub(crate) enum DatabaseErrorLabel {
Query, Query,
Connection, Connection,
ConnectionPool, ConnectionPool,
Logical, Logical,
Migration,
} }
impl DatabaseError { impl DatabaseError {
@@ -240,22 +279,6 @@ impl DatabaseError {
Self::Connection(_) => DatabaseErrorLabel::Connection, Self::Connection(_) => DatabaseErrorLabel::Connection,
Self::ConnectionPool(_) => DatabaseErrorLabel::ConnectionPool, Self::ConnectionPool(_) => DatabaseErrorLabel::ConnectionPool,
Self::Logical(_) => DatabaseErrorLabel::Logical, Self::Logical(_) => DatabaseErrorLabel::Logical,
Self::Migration(_) => DatabaseErrorLabel::Migration,
}
}
}
/// Update the leadership status metric gauges to reflect the requested status
pub(crate) fn update_leadership_status(status: LeadershipStatus) {
let status_metric = &METRICS_REGISTRY
.metrics_group
.storage_controller_leadership_status;
for s in LeadershipStatus::iter() {
if s == status {
status_metric.set(LeadershipStatusGroup { status: s }, 1);
} else {
status_metric.set(LeadershipStatusGroup { status: s }, 0);
} }
} }
} }

View File

@@ -1,14 +1,13 @@
use std::{str::FromStr, time::Duration}; use std::{str::FromStr, time::Duration};
use hyper::StatusCode;
use pageserver_api::{ use pageserver_api::{
controller_api::{ controller_api::{
NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy, NodeAvailability, NodeRegisterRequest, NodeSchedulingPolicy, TenantLocateResponseShard,
TenantLocateResponseShard,
}, },
shard::TenantShardId, shard::TenantShardId,
}; };
use pageserver_client::mgmt_api; use pageserver_client::mgmt_api;
use reqwest::StatusCode;
use serde::Serialize; use serde::Serialize;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use utils::{backoff, id::NodeId}; use utils::{backoff, id::NodeId};
@@ -46,8 +45,6 @@ pub(crate) struct Node {
/// whether/how they changed it. /// whether/how they changed it.
pub(crate) enum AvailabilityTransition { pub(crate) enum AvailabilityTransition {
ToActive, ToActive,
ToWarmingUpFromActive,
ToWarmingUpFromOffline,
ToOffline, ToOffline,
Unchanged, Unchanged,
} }
@@ -61,10 +58,6 @@ impl Node {
self.id self.id
} }
pub(crate) fn get_scheduling(&self) -> NodeSchedulingPolicy {
self.scheduling
}
pub(crate) fn set_scheduling(&mut self, scheduling: NodeSchedulingPolicy) { pub(crate) fn set_scheduling(&mut self, scheduling: NodeSchedulingPolicy) {
self.scheduling = scheduling self.scheduling = scheduling
} }
@@ -92,52 +85,36 @@ impl Node {
} }
} }
pub(crate) fn get_availability(&self) -> &NodeAvailability {
&self.availability
}
pub(crate) fn set_availability(&mut self, availability: NodeAvailability) { pub(crate) fn set_availability(&mut self, availability: NodeAvailability) {
use AvailabilityTransition::*; match self.get_availability_transition(availability) {
use NodeAvailability::WarmingUp; AvailabilityTransition::ToActive => {
match self.get_availability_transition(&availability) {
ToActive => {
// Give the node a new cancellation token, effectively resetting it to un-cancelled. Any // Give the node a new cancellation token, effectively resetting it to un-cancelled. Any
// users of previously-cloned copies of the node will still see the old cancellation // users of previously-cloned copies of the node will still see the old cancellation
// state. For example, Reconcilers in flight will have to complete and be spawned // state. For example, Reconcilers in flight will have to complete and be spawned
// again to realize that the node has become available. // again to realize that the node has become available.
self.cancel = CancellationToken::new(); self.cancel = CancellationToken::new();
} }
ToOffline | ToWarmingUpFromActive => { AvailabilityTransition::ToOffline => {
// Fire the node's cancellation token to cancel any in-flight API requests to it // Fire the node's cancellation token to cancel any in-flight API requests to it
self.cancel.cancel(); self.cancel.cancel();
} }
Unchanged | ToWarmingUpFromOffline => {} AvailabilityTransition::Unchanged => {}
}
if let (WarmingUp(crnt), WarmingUp(proposed)) = (&self.availability, &availability) {
self.availability = WarmingUp(std::cmp::max(*crnt, *proposed));
} else {
self.availability = availability;
} }
self.availability = availability;
} }
/// Without modifying the availability of the node, convert the intended availability /// Without modifying the availability of the node, convert the intended availability
/// into a description of the transition. /// into a description of the transition.
pub(crate) fn get_availability_transition( pub(crate) fn get_availability_transition(
&self, &self,
availability: &NodeAvailability, availability: NodeAvailability,
) -> AvailabilityTransition { ) -> AvailabilityTransition {
use AvailabilityTransition::*; use AvailabilityTransition::*;
use NodeAvailability::*; use NodeAvailability::*;
match (&self.availability, availability) { match (self.availability, availability) {
(Offline, Active(_)) => ToActive, (Offline, Active(_)) => ToActive,
(Active(_), Offline) => ToOffline, (Active(_), Offline) => ToOffline,
(Active(_), WarmingUp(_)) => ToWarmingUpFromActive,
(WarmingUp(_), Offline) => ToOffline,
(WarmingUp(_), Active(_)) => ToActive,
(Offline, WarmingUp(_)) => ToWarmingUpFromOffline,
_ => Unchanged, _ => Unchanged,
} }
} }
@@ -153,17 +130,16 @@ impl Node {
/// Is this node elegible to have work scheduled onto it? /// Is this node elegible to have work scheduled onto it?
pub(crate) fn may_schedule(&self) -> MaySchedule { pub(crate) fn may_schedule(&self) -> MaySchedule {
let utilization = match &self.availability { let score = match self.availability {
NodeAvailability::Active(u) => u.clone(), NodeAvailability::Active(score) => score,
NodeAvailability::Offline | NodeAvailability::WarmingUp(_) => return MaySchedule::No, NodeAvailability::Offline => return MaySchedule::No,
}; };
match self.scheduling { match self.scheduling {
NodeSchedulingPolicy::Active => MaySchedule::Yes(utilization), NodeSchedulingPolicy::Active => MaySchedule::Yes(score),
NodeSchedulingPolicy::Draining => MaySchedule::No, NodeSchedulingPolicy::Draining => MaySchedule::No,
NodeSchedulingPolicy::Filling => MaySchedule::Yes(utilization), NodeSchedulingPolicy::Filling => MaySchedule::Yes(score),
NodeSchedulingPolicy::Pause => MaySchedule::No, NodeSchedulingPolicy::Pause => MaySchedule::No,
NodeSchedulingPolicy::PauseForRestart => MaySchedule::No,
} }
} }
@@ -180,7 +156,7 @@ impl Node {
listen_http_port, listen_http_port,
listen_pg_addr, listen_pg_addr,
listen_pg_port, listen_pg_port,
scheduling: NodeSchedulingPolicy::Active, scheduling: NodeSchedulingPolicy::Filling,
availability: NodeAvailability::Offline, availability: NodeAvailability::Offline,
cancel: CancellationToken::new(), cancel: CancellationToken::new(),
} }
@@ -234,7 +210,7 @@ impl Node {
fn is_fatal(e: &mgmt_api::Error) -> bool { fn is_fatal(e: &mgmt_api::Error) -> bool {
use mgmt_api::Error::*; use mgmt_api::Error::*;
match e { match e {
SendRequest(_) | ReceiveBody(_) | ReceiveErrorBody(_) => false, ReceiveBody(_) | ReceiveErrorBody(_) => false,
ApiError(StatusCode::SERVICE_UNAVAILABLE, _) ApiError(StatusCode::SERVICE_UNAVAILABLE, _)
| ApiError(StatusCode::GATEWAY_TIMEOUT, _) | ApiError(StatusCode::GATEWAY_TIMEOUT, _)
| ApiError(StatusCode::REQUEST_TIMEOUT, _) => false, | ApiError(StatusCode::REQUEST_TIMEOUT, _) => false,
@@ -280,19 +256,6 @@ impl Node {
) )
.await .await
} }
/// Generate the simplified API-friendly description of a node's state
pub(crate) fn describe(&self) -> NodeDescribeResponse {
NodeDescribeResponse {
id: self.id,
availability: self.availability.clone().into(),
scheduling: self.scheduling,
listen_http_addr: self.listen_http_addr.clone(),
listen_http_port: self.listen_http_port,
listen_pg_addr: self.listen_pg_addr.clone(),
listen_pg_port: self.listen_pg_port,
}
}
} }
impl std::fmt::Display for Node { impl std::fmt::Display for Node {

View File

@@ -1,15 +1,13 @@
use pageserver_api::{ use pageserver_api::{
models::{ models::{
detach_ancestor::AncestorDetached, LocationConfig, LocationConfigListResponse, LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress,
PageserverUtilization, SecondaryProgress, TenantScanRemoteStorageResponse,
TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo, TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
TopTenantShardsRequest, TopTenantShardsResponse,
}, },
shard::TenantShardId, shard::TenantShardId,
}; };
use pageserver_client::mgmt_api::{Client, Result}; use pageserver_client::mgmt_api::{Client, Result};
use reqwest::StatusCode; use reqwest::StatusCode;
use utils::id::{NodeId, TenantId, TimelineId}; use utils::id::{NodeId, TimelineId};
/// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage /// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage
/// controller to collect metrics in a non-intrusive manner. /// controller to collect metrics in a non-intrusive manner.
@@ -90,18 +88,6 @@ impl PageserverClient {
) )
} }
pub(crate) async fn tenant_scan_remote_storage(
&self,
tenant_id: TenantId,
) -> Result<TenantScanRemoteStorageResponse> {
measured_request!(
"tenant_scan_remote_storage",
crate::metrics::Method::Get,
&self.node_id_label,
self.inner.tenant_scan_remote_storage(tenant_id).await
)
}
pub(crate) async fn tenant_secondary_download( pub(crate) async fn tenant_secondary_download(
&self, &self,
tenant_id: TenantShardId, tenant_id: TenantShardId,
@@ -115,27 +101,6 @@ impl PageserverClient {
) )
} }
pub(crate) async fn tenant_secondary_status(
&self,
tenant_shard_id: TenantShardId,
) -> Result<SecondaryProgress> {
measured_request!(
"tenant_secondary_status",
crate::metrics::Method::Get,
&self.node_id_label,
self.inner.tenant_secondary_status(tenant_shard_id).await
)
}
pub(crate) async fn tenant_heatmap_upload(&self, tenant_id: TenantShardId) -> Result<()> {
measured_request!(
"tenant_heatmap_upload",
crate::metrics::Method::Post,
&self.node_id_label,
self.inner.tenant_heatmap_upload(tenant_id).await
)
}
pub(crate) async fn location_config( pub(crate) async fn location_config(
&self, &self,
tenant_shard_id: TenantShardId, tenant_shard_id: TenantShardId,
@@ -227,21 +192,6 @@ impl PageserverClient {
) )
} }
pub(crate) async fn timeline_detach_ancestor(
&self,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
) -> Result<AncestorDetached> {
measured_request!(
"timeline_detach_ancestor",
crate::metrics::Method::Put,
&self.node_id_label,
self.inner
.timeline_detach_ancestor(tenant_shard_id, timeline_id)
.await
)
}
pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> { pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> {
measured_request!( measured_request!(
"utilization", "utilization",
@@ -250,16 +200,4 @@ impl PageserverClient {
self.inner.get_utilization().await self.inner.get_utilization().await
) )
} }
pub(crate) async fn top_tenant_shards(
&self,
request: TopTenantShardsRequest,
) -> Result<TopTenantShardsResponse> {
measured_request!(
"top_tenants",
crate::metrics::Method::Post,
&self.node_id_label,
self.inner.top_tenant_shards(request).await
)
}
} }

View File

@@ -2,14 +2,13 @@ pub(crate) mod split_state;
use std::collections::HashMap; use std::collections::HashMap;
use std::str::FromStr; use std::str::FromStr;
use std::time::Duration; use std::time::Duration;
use std::time::Instant;
use self::split_state::SplitState; use self::split_state::SplitState;
use camino::Utf8Path;
use camino::Utf8PathBuf;
use diesel::pg::PgConnection; use diesel::pg::PgConnection;
use diesel::prelude::*; use diesel::prelude::*;
use diesel::Connection; use diesel::Connection;
use pageserver_api::controller_api::MetadataHealthRecord;
use pageserver_api::controller_api::ShardSchedulingPolicy;
use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy}; use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy};
use pageserver_api::models::TenantConfig; use pageserver_api::models::TenantConfig;
use pageserver_api::shard::ShardConfigError; use pageserver_api::shard::ShardConfigError;
@@ -25,9 +24,6 @@ use crate::metrics::{
}; };
use crate::node::Node; use crate::node::Node;
use diesel_migrations::{embed_migrations, EmbeddedMigrations};
const MIGRATIONS: EmbeddedMigrations = embed_migrations!("./migrations");
/// ## What do we store? /// ## What do we store?
/// ///
/// The storage controller service does not store most of its state durably. /// The storage controller service does not store most of its state durably.
@@ -57,6 +53,11 @@ const MIGRATIONS: EmbeddedMigrations = embed_migrations!("./migrations");
/// we can UPDATE a node's scheduling mode reasonably quickly to mark a bad node offline. /// we can UPDATE a node's scheduling mode reasonably quickly to mark a bad node offline.
pub struct Persistence { pub struct Persistence {
connection_pool: diesel::r2d2::Pool<diesel::r2d2::ConnectionManager<PgConnection>>, connection_pool: diesel::r2d2::Pool<diesel::r2d2::ConnectionManager<PgConnection>>,
// In test environments, we support loading+saving a JSON file. This is temporary, for the benefit of
// test_compatibility.py, so that we don't have to commit to making the database contents fully backward/forward
// compatible just yet.
json_path: Option<Utf8PathBuf>,
} }
/// Legacy format, for use in JSON compat objects in test environment /// Legacy format, for use in JSON compat objects in test environment
@@ -75,11 +76,9 @@ pub(crate) enum DatabaseError {
ConnectionPool(#[from] r2d2::Error), ConnectionPool(#[from] r2d2::Error),
#[error("Logical error: {0}")] #[error("Logical error: {0}")]
Logical(String), Logical(String),
#[error("Migration error: {0}")]
Migration(String),
} }
#[derive(measured::FixedCardinalityLabel, Copy, Clone)] #[derive(measured::FixedCardinalityLabel, Clone)]
pub(crate) enum DatabaseOperation { pub(crate) enum DatabaseOperation {
InsertNode, InsertNode,
UpdateNode, UpdateNode,
@@ -91,18 +90,11 @@ pub(crate) enum DatabaseOperation {
Detach, Detach,
ReAttach, ReAttach,
IncrementGeneration, IncrementGeneration,
PeekGenerations,
ListTenantShards, ListTenantShards,
InsertTenantShards, InsertTenantShards,
UpdateTenantShard, UpdateTenantShard,
DeleteTenant, DeleteTenant,
UpdateTenantConfig, UpdateTenantConfig,
UpdateMetadataHealth,
ListMetadataHealth,
ListMetadataHealthUnhealthy,
ListMetadataHealthOutdated,
GetLeader,
UpdateLeader,
} }
#[must_use] #[must_use]
@@ -115,12 +107,6 @@ pub(crate) enum AbortShardSplitStatus {
pub(crate) type DatabaseResult<T> = Result<T, DatabaseError>; pub(crate) type DatabaseResult<T> = Result<T, DatabaseError>;
/// Some methods can operate on either a whole tenant or a single shard
pub(crate) enum TenantFilter {
Tenant(TenantId),
Shard(TenantShardId),
}
impl Persistence { impl Persistence {
// The default postgres connection limit is 100. We use up to 99, to leave one free for a human admin under // The default postgres connection limit is 100. We use up to 99, to leave one free for a human admin under
// normal circumstances. This assumes we have exclusive use of the database cluster to which we connect. // normal circumstances. This assumes we have exclusive use of the database cluster to which we connect.
@@ -130,7 +116,7 @@ impl Persistence {
const IDLE_CONNECTION_TIMEOUT: Duration = Duration::from_secs(10); const IDLE_CONNECTION_TIMEOUT: Duration = Duration::from_secs(10);
const MAX_CONNECTION_LIFETIME: Duration = Duration::from_secs(60); const MAX_CONNECTION_LIFETIME: Duration = Duration::from_secs(60);
pub fn new(database_url: String) -> Self { pub fn new(database_url: String, json_path: Option<Utf8PathBuf>) -> Self {
let manager = diesel::r2d2::ConnectionManager::<PgConnection>::new(database_url); let manager = diesel::r2d2::ConnectionManager::<PgConnection>::new(database_url);
// We will use a connection pool: this is primarily to _limit_ our connection count, rather than to optimize time // We will use a connection pool: this is primarily to _limit_ our connection count, rather than to optimize time
@@ -145,47 +131,12 @@ impl Persistence {
.build(manager) .build(manager)
.expect("Could not build connection pool"); .expect("Could not build connection pool");
Self { connection_pool } Self {
} connection_pool,
json_path,
/// A helper for use during startup, where we would like to tolerate concurrent restarts of the
/// database and the storage controller, therefore the database might not be available right away
pub async fn await_connection(
database_url: &str,
timeout: Duration,
) -> Result<(), diesel::ConnectionError> {
let started_at = Instant::now();
loop {
match PgConnection::establish(database_url) {
Ok(_) => {
tracing::info!("Connected to database.");
return Ok(());
}
Err(e) => {
if started_at.elapsed() > timeout {
return Err(e);
} else {
tracing::info!("Database not yet available, waiting... ({e})");
tokio::time::sleep(Duration::from_millis(100)).await;
}
}
}
} }
} }
/// Execute the diesel migrations that are built into this binary
pub(crate) async fn migration_run(&self) -> DatabaseResult<()> {
use diesel_migrations::{HarnessWithOutput, MigrationHarness};
self.with_conn(move |conn| -> DatabaseResult<()> {
HarnessWithOutput::write_to_stdout(conn)
.run_pending_migrations(MIGRATIONS)
.map(|_| ())
.map_err(|e| DatabaseError::Migration(e.to_string()))
})
.await
}
/// Wraps `with_conn` in order to collect latency and error metrics /// Wraps `with_conn` in order to collect latency and error metrics
async fn with_measured_conn<F, R>(&self, op: DatabaseOperation, func: F) -> DatabaseResult<R> async fn with_measured_conn<F, R>(&self, op: DatabaseOperation, func: F) -> DatabaseResult<R>
where where
@@ -195,7 +146,9 @@ impl Persistence {
let latency = &METRICS_REGISTRY let latency = &METRICS_REGISTRY
.metrics_group .metrics_group
.storage_controller_database_query_latency; .storage_controller_database_query_latency;
let _timer = latency.start_timer(DatabaseQueryLatencyLabelGroup { operation: op }); let _timer = latency.start_timer(DatabaseQueryLatencyLabelGroup {
operation: op.clone(),
});
let res = self.with_conn(func).await; let res = self.with_conn(func).await;
@@ -218,45 +171,10 @@ impl Persistence {
F: Fn(&mut PgConnection) -> DatabaseResult<R> + Send + 'static, F: Fn(&mut PgConnection) -> DatabaseResult<R> + Send + 'static,
R: Send + 'static, R: Send + 'static,
{ {
// A generous allowance for how many times we may retry serializable transactions
// before giving up. This is not expected to be hit: it is a defensive measure in case we
// somehow engineer a situation where duelling transactions might otherwise live-lock.
const MAX_RETRIES: usize = 128;
let mut conn = self.connection_pool.get()?; let mut conn = self.connection_pool.get()?;
tokio::task::spawn_blocking(move || -> DatabaseResult<R> { tokio::task::spawn_blocking(move || -> DatabaseResult<R> { func(&mut conn) })
let mut retry_count = 0; .await
loop { .expect("Task panic")
match conn.build_transaction().serializable().run(|c| func(c)) {
Ok(r) => break Ok(r),
Err(
err @ DatabaseError::Query(diesel::result::Error::DatabaseError(
diesel::result::DatabaseErrorKind::SerializationFailure,
_,
)),
) => {
retry_count += 1;
if retry_count > MAX_RETRIES {
tracing::error!(
"Exceeded max retries on SerializationFailure errors: {err:?}"
);
break Err(err);
} else {
// Retry on serialization errors: these are expected, because even though our
// transactions don't fight for the same rows, they will occasionally collide
// on index pages (e.g. increment_generation for unrelated shards can collide)
tracing::debug!(
"Retrying transaction on serialization failure {err:?}"
);
continue;
}
}
Err(e) => break Err(e),
}
}
})
.await
.expect("Task panic")
} }
/// When a node is first registered, persist it before using it for anything /// When a node is first registered, persist it before using it for anything
@@ -318,13 +236,80 @@ impl Persistence {
/// At startup, load the high level state for shards, such as their config + policy. This will /// At startup, load the high level state for shards, such as their config + policy. This will
/// be enriched at runtime with state discovered on pageservers. /// be enriched at runtime with state discovered on pageservers.
pub(crate) async fn list_tenant_shards(&self) -> DatabaseResult<Vec<TenantShardPersistence>> { pub(crate) async fn list_tenant_shards(&self) -> DatabaseResult<Vec<TenantShardPersistence>> {
self.with_measured_conn( let loaded = self
DatabaseOperation::ListTenantShards, .with_measured_conn(
move |conn| -> DatabaseResult<_> { DatabaseOperation::ListTenantShards,
Ok(crate::schema::tenant_shards::table.load::<TenantShardPersistence>(conn)?) move |conn| -> DatabaseResult<_> {
}, Ok(crate::schema::tenant_shards::table.load::<TenantShardPersistence>(conn)?)
) },
.await )
.await?;
if loaded.is_empty() {
if let Some(path) = &self.json_path {
if tokio::fs::try_exists(path)
.await
.map_err(|e| DatabaseError::Logical(format!("Error stat'ing JSON file: {e}")))?
{
tracing::info!("Importing from legacy JSON format at {path}");
return self.list_tenant_shards_json(path).await;
}
}
}
Ok(loaded)
}
/// Shim for automated compatibility tests: load tenants from a JSON file instead of database
pub(crate) async fn list_tenant_shards_json(
&self,
path: &Utf8Path,
) -> DatabaseResult<Vec<TenantShardPersistence>> {
let bytes = tokio::fs::read(path)
.await
.map_err(|e| DatabaseError::Logical(format!("Failed to load JSON: {e}")))?;
let mut decoded = serde_json::from_slice::<JsonPersistence>(&bytes)
.map_err(|e| DatabaseError::Logical(format!("Deserialization error: {e}")))?;
for shard in decoded.tenants.values_mut() {
if shard.placement_policy == "\"Single\"" {
// Backward compat for test data after PR https://github.com/neondatabase/neon/pull/7165
shard.placement_policy = "{\"Attached\":0}".to_string();
}
}
let tenants: Vec<TenantShardPersistence> = decoded.tenants.into_values().collect();
// Synchronize database with what is in the JSON file
self.insert_tenant_shards(tenants.clone()).await?;
Ok(tenants)
}
/// For use in testing environments, where we dump out JSON on shutdown.
pub async fn write_tenants_json(&self) -> anyhow::Result<()> {
let Some(path) = &self.json_path else {
anyhow::bail!("Cannot write JSON if path isn't set (test environment bug)");
};
tracing::info!("Writing state to {path}...");
let tenants = self.list_tenant_shards().await?;
let mut tenants_map = HashMap::new();
for tsp in tenants {
let tenant_shard_id = TenantShardId {
tenant_id: TenantId::from_str(tsp.tenant_id.as_str())?,
shard_number: ShardNumber(tsp.shard_number as u8),
shard_count: ShardCount::new(tsp.shard_count as u8),
};
tenants_map.insert(tenant_shard_id, tsp);
}
let json = serde_json::to_string(&JsonPersistence {
tenants: tenants_map,
})?;
tokio::fs::write(path, &json).await?;
tracing::info!("Wrote {} bytes to {path}...", json.len());
Ok(())
} }
/// Tenants must be persisted before we schedule them for the first time. This enables us /// Tenants must be persisted before we schedule them for the first time. This enables us
@@ -333,32 +318,18 @@ impl Persistence {
&self, &self,
shards: Vec<TenantShardPersistence>, shards: Vec<TenantShardPersistence>,
) -> DatabaseResult<()> { ) -> DatabaseResult<()> {
use crate::schema::metadata_health; use crate::schema::tenant_shards::dsl::*;
use crate::schema::tenant_shards;
let now = chrono::Utc::now();
let metadata_health_records = shards
.iter()
.map(|t| MetadataHealthPersistence {
tenant_id: t.tenant_id.clone(),
shard_number: t.shard_number,
shard_count: t.shard_count,
healthy: true,
last_scrubbed_at: now,
})
.collect::<Vec<_>>();
self.with_measured_conn( self.with_measured_conn(
DatabaseOperation::InsertTenantShards, DatabaseOperation::InsertTenantShards,
move |conn| -> DatabaseResult<()> { move |conn| -> DatabaseResult<()> {
diesel::insert_into(tenant_shards::table) conn.transaction(|conn| -> QueryResult<()> {
.values(&shards) for tenant in &shards {
.execute(conn)?; diesel::insert_into(tenant_shards)
.values(tenant)
diesel::insert_into(metadata_health::table) .execute(conn)?;
.values(&metadata_health_records) }
.execute(conn)?; Ok(())
})?;
Ok(()) Ok(())
}, },
) )
@@ -372,10 +343,10 @@ impl Persistence {
self.with_measured_conn( self.with_measured_conn(
DatabaseOperation::DeleteTenant, DatabaseOperation::DeleteTenant,
move |conn| -> DatabaseResult<()> { move |conn| -> DatabaseResult<()> {
// `metadata_health` status (if exists) is also deleted based on the cascade behavior.
diesel::delete(tenant_shards) diesel::delete(tenant_shards)
.filter(tenant_id.eq(del_tenant_id.to_string())) .filter(tenant_id.eq(del_tenant_id.to_string()))
.execute(conn)?; .execute(conn)?;
Ok(()) Ok(())
}, },
) )
@@ -403,15 +374,13 @@ impl Persistence {
#[tracing::instrument(skip_all, fields(node_id))] #[tracing::instrument(skip_all, fields(node_id))]
pub(crate) async fn re_attach( pub(crate) async fn re_attach(
&self, &self,
input_node_id: NodeId, node_id: NodeId,
) -> DatabaseResult<HashMap<TenantShardId, Generation>> { ) -> DatabaseResult<HashMap<TenantShardId, Generation>> {
use crate::schema::nodes::dsl::scheduling_policy;
use crate::schema::nodes::dsl::*;
use crate::schema::tenant_shards::dsl::*; use crate::schema::tenant_shards::dsl::*;
let updated = self let updated = self
.with_measured_conn(DatabaseOperation::ReAttach, move |conn| { .with_measured_conn(DatabaseOperation::ReAttach, move |conn| {
let rows_updated = diesel::update(tenant_shards) let rows_updated = diesel::update(tenant_shards)
.filter(generation_pageserver.eq(input_node_id.0 as i64)) .filter(generation_pageserver.eq(node_id.0 as i64))
.set(generation.eq(generation + 1)) .set(generation.eq(generation + 1))
.execute(conn)?; .execute(conn)?;
@@ -420,23 +389,9 @@ impl Persistence {
// TODO: UPDATE+SELECT in one query // TODO: UPDATE+SELECT in one query
let updated = tenant_shards let updated = tenant_shards
.filter(generation_pageserver.eq(input_node_id.0 as i64)) .filter(generation_pageserver.eq(node_id.0 as i64))
.select(TenantShardPersistence::as_select()) .select(TenantShardPersistence::as_select())
.load(conn)?; .load(conn)?;
// If the node went through a drain and restart phase before re-attaching,
// then reset it's node scheduling policy to active.
diesel::update(nodes)
.filter(node_id.eq(input_node_id.0 as i64))
.filter(
scheduling_policy
.eq(String::from(NodeSchedulingPolicy::PauseForRestart))
.or(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Draining)))
.or(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Filling))),
)
.set(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Active)))
.execute(conn)?;
Ok(updated) Ok(updated)
}) })
.await?; .await?;
@@ -503,44 +458,6 @@ impl Persistence {
Ok(Generation::new(g as u32)) Ok(Generation::new(g as u32))
} }
/// When we want to call out to the running shards for a tenant, e.g. during timeline CRUD operations,
/// we need to know where the shard is attached, _and_ the generation, so that we can re-check the generation
/// afterwards to confirm that our timeline CRUD operation is truly persistent (it must have happened in the
/// latest generation)
///
/// If the tenant doesn't exist, an empty vector is returned.
///
/// Output is sorted by shard number
pub(crate) async fn peek_generations(
&self,
filter_tenant_id: TenantId,
) -> Result<Vec<(TenantShardId, Option<Generation>, Option<NodeId>)>, DatabaseError> {
use crate::schema::tenant_shards::dsl::*;
let rows = self
.with_measured_conn(DatabaseOperation::PeekGenerations, move |conn| {
let result = tenant_shards
.filter(tenant_id.eq(filter_tenant_id.to_string()))
.select(TenantShardPersistence::as_select())
.order(shard_number)
.load(conn)?;
Ok(result)
})
.await?;
Ok(rows
.into_iter()
.map(|p| {
(
p.get_tenant_shard_id()
.expect("Corrupt tenant shard id in database"),
p.generation.map(|g| Generation::new(g as u32)),
p.generation_pageserver.map(|n| NodeId(n as u64)),
)
})
.collect())
}
#[allow(non_local_definitions)]
/// For use when updating a persistent property of a tenant, such as its config or placement_policy. /// For use when updating a persistent property of a tenant, such as its config or placement_policy.
/// ///
/// Do not use this for settting generation, unless in the special onboarding code path (/location_config) /// Do not use this for settting generation, unless in the special onboarding code path (/location_config)
@@ -548,48 +465,59 @@ impl Persistence {
/// that we only do the first time a tenant is set to an attached policy via /location_config. /// that we only do the first time a tenant is set to an attached policy via /location_config.
pub(crate) async fn update_tenant_shard( pub(crate) async fn update_tenant_shard(
&self, &self,
tenant: TenantFilter, tenant_shard_id: TenantShardId,
input_placement_policy: Option<PlacementPolicy>, input_placement_policy: PlacementPolicy,
input_config: Option<TenantConfig>, input_config: TenantConfig,
input_generation: Option<Generation>, input_generation: Option<Generation>,
input_scheduling_policy: Option<ShardSchedulingPolicy>,
) -> DatabaseResult<()> { ) -> DatabaseResult<()> {
use crate::schema::tenant_shards::dsl::*; use crate::schema::tenant_shards::dsl::*;
self.with_measured_conn(DatabaseOperation::UpdateTenantShard, move |conn| { self.with_measured_conn(DatabaseOperation::UpdateTenantShard, move |conn| {
let query = match tenant { let query = diesel::update(tenant_shards)
TenantFilter::Shard(tenant_shard_id) => diesel::update(tenant_shards) .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
.filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string())) .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
.filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32)) .filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32));
.filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32))
.into_boxed(),
TenantFilter::Tenant(input_tenant_id) => diesel::update(tenant_shards)
.filter(tenant_id.eq(input_tenant_id.to_string()))
.into_boxed(),
};
#[derive(AsChangeset)] if let Some(input_generation) = input_generation {
#[diesel(table_name = crate::schema::tenant_shards)] // Update includes generation column
struct ShardUpdate { query
generation: Option<i32>, .set((
placement_policy: Option<String>, generation.eq(Some(input_generation.into().unwrap() as i32)),
config: Option<String>, placement_policy
scheduling_policy: Option<String>, .eq(serde_json::to_string(&input_placement_policy).unwrap()),
config.eq(serde_json::to_string(&input_config).unwrap()),
))
.execute(conn)?;
} else {
// Update does not include generation column
query
.set((
placement_policy
.eq(serde_json::to_string(&input_placement_policy).unwrap()),
config.eq(serde_json::to_string(&input_config).unwrap()),
))
.execute(conn)?;
} }
let update = ShardUpdate { Ok(())
generation: input_generation.map(|g| g.into().unwrap() as i32), })
placement_policy: input_placement_policy .await?;
.as_ref()
.map(|p| serde_json::to_string(&p).unwrap()),
config: input_config
.as_ref()
.map(|c| serde_json::to_string(&c).unwrap()),
scheduling_policy: input_scheduling_policy
.map(|p| serde_json::to_string(&p).unwrap()),
};
query.set(update).execute(conn)?; Ok(())
}
pub(crate) async fn update_tenant_config(
&self,
input_tenant_id: TenantId,
input_config: TenantConfig,
) -> DatabaseResult<()> {
use crate::schema::tenant_shards::dsl::*;
self.with_measured_conn(DatabaseOperation::UpdateTenantConfig, move |conn| {
diesel::update(tenant_shards)
.filter(tenant_id.eq(input_tenant_id.to_string()))
.set((config.eq(serde_json::to_string(&input_config).unwrap()),))
.execute(conn)?;
Ok(()) Ok(())
}) })
@@ -631,51 +559,55 @@ impl Persistence {
) -> DatabaseResult<()> { ) -> DatabaseResult<()> {
use crate::schema::tenant_shards::dsl::*; use crate::schema::tenant_shards::dsl::*;
self.with_measured_conn(DatabaseOperation::BeginShardSplit, move |conn| -> DatabaseResult<()> { self.with_measured_conn(DatabaseOperation::BeginShardSplit, move |conn| -> DatabaseResult<()> {
// Mark parent shards as splitting conn.transaction(|conn| -> DatabaseResult<()> {
// Mark parent shards as splitting
let updated = diesel::update(tenant_shards) let updated = diesel::update(tenant_shards)
.filter(tenant_id.eq(split_tenant_id.to_string())) .filter(tenant_id.eq(split_tenant_id.to_string()))
.filter(shard_count.eq(old_shard_count.literal() as i32)) .filter(shard_count.eq(old_shard_count.literal() as i32))
.set((splitting.eq(1),)) .set((splitting.eq(1),))
.execute(conn)?; .execute(conn)?;
if u8::try_from(updated) if u8::try_from(updated)
.map_err(|_| DatabaseError::Logical( .map_err(|_| DatabaseError::Logical(
format!("Overflow existing shard count {} while splitting", updated)) format!("Overflow existing shard count {} while splitting", updated))
)? != old_shard_count.count() { )? != old_shard_count.count() {
// Perhaps a deletion or another split raced with this attempt to split, mutating // Perhaps a deletion or another split raced with this attempt to split, mutating
// the parent shards that we intend to split. In this case the split request should fail. // the parent shards that we intend to split. In this case the split request should fail.
return Err(DatabaseError::Logical( return Err(DatabaseError::Logical(
format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {})", old_shard_count.count()) format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {})", old_shard_count.count())
)); ));
}
// FIXME: spurious clone to sidestep closure move rules
let parent_to_children = parent_to_children.clone();
// Insert child shards
for (parent_shard_id, children) in parent_to_children {
let mut parent = crate::schema::tenant_shards::table
.filter(tenant_id.eq(parent_shard_id.tenant_id.to_string()))
.filter(shard_number.eq(parent_shard_id.shard_number.0 as i32))
.filter(shard_count.eq(parent_shard_id.shard_count.literal() as i32))
.load::<TenantShardPersistence>(conn)?;
let parent = if parent.len() != 1 {
return Err(DatabaseError::Logical(format!(
"Parent shard {parent_shard_id} not found"
)));
} else {
parent.pop().unwrap()
};
for mut shard in children {
// Carry the parent's generation into the child
shard.generation = parent.generation;
debug_assert!(shard.splitting == SplitState::Splitting);
diesel::insert_into(tenant_shards)
.values(shard)
.execute(conn)?;
} }
}
// FIXME: spurious clone to sidestep closure move rules
let parent_to_children = parent_to_children.clone();
// Insert child shards
for (parent_shard_id, children) in parent_to_children {
let mut parent = crate::schema::tenant_shards::table
.filter(tenant_id.eq(parent_shard_id.tenant_id.to_string()))
.filter(shard_number.eq(parent_shard_id.shard_number.0 as i32))
.filter(shard_count.eq(parent_shard_id.shard_count.literal() as i32))
.load::<TenantShardPersistence>(conn)?;
let parent = if parent.len() != 1 {
return Err(DatabaseError::Logical(format!(
"Parent shard {parent_shard_id} not found"
)));
} else {
parent.pop().unwrap()
};
for mut shard in children {
// Carry the parent's generation into the child
shard.generation = parent.generation;
debug_assert!(shard.splitting == SplitState::Splitting);
diesel::insert_into(tenant_shards)
.values(shard)
.execute(conn)?;
}
}
Ok(())
})?;
Ok(()) Ok(())
}) })
@@ -693,18 +625,22 @@ impl Persistence {
self.with_measured_conn( self.with_measured_conn(
DatabaseOperation::CompleteShardSplit, DatabaseOperation::CompleteShardSplit,
move |conn| -> DatabaseResult<()> { move |conn| -> DatabaseResult<()> {
// Drop parent shards conn.transaction(|conn| -> QueryResult<()> {
diesel::delete(tenant_shards) // Drop parent shards
.filter(tenant_id.eq(split_tenant_id.to_string())) diesel::delete(tenant_shards)
.filter(shard_count.eq(old_shard_count.literal() as i32)) .filter(tenant_id.eq(split_tenant_id.to_string()))
.execute(conn)?; .filter(shard_count.eq(old_shard_count.literal() as i32))
.execute(conn)?;
// Clear sharding flag // Clear sharding flag
let updated = diesel::update(tenant_shards) let updated = diesel::update(tenant_shards)
.filter(tenant_id.eq(split_tenant_id.to_string())) .filter(tenant_id.eq(split_tenant_id.to_string()))
.set((splitting.eq(0),)) .set((splitting.eq(0),))
.execute(conn)?; .execute(conn)?;
debug_assert!(updated > 0); debug_assert!(updated > 0);
Ok(())
})?;
Ok(()) Ok(())
}, },
@@ -723,192 +659,46 @@ impl Persistence {
self.with_measured_conn( self.with_measured_conn(
DatabaseOperation::AbortShardSplit, DatabaseOperation::AbortShardSplit,
move |conn| -> DatabaseResult<AbortShardSplitStatus> { move |conn| -> DatabaseResult<AbortShardSplitStatus> {
// Clear the splitting state on parent shards let aborted =
let updated = diesel::update(tenant_shards) conn.transaction(|conn| -> DatabaseResult<AbortShardSplitStatus> {
.filter(tenant_id.eq(split_tenant_id.to_string())) // Clear the splitting state on parent shards
.filter(shard_count.ne(new_shard_count.literal() as i32)) let updated = diesel::update(tenant_shards)
.set((splitting.eq(0),)) .filter(tenant_id.eq(split_tenant_id.to_string()))
.execute(conn)?; .filter(shard_count.ne(new_shard_count.literal() as i32))
.set((splitting.eq(0),))
.execute(conn)?;
// Parent shards are already gone: we cannot abort. // Parent shards are already gone: we cannot abort.
if updated == 0 { if updated == 0 {
return Ok(AbortShardSplitStatus::Complete); return Ok(AbortShardSplitStatus::Complete);
} }
// Sanity check: if parent shards were present, their cardinality should // Sanity check: if parent shards were present, their cardinality should
// be less than the number of child shards. // be less than the number of child shards.
if updated >= new_shard_count.count() as usize { if updated >= new_shard_count.count() as usize {
return Err(DatabaseError::Logical(format!( return Err(DatabaseError::Logical(format!(
"Unexpected parent shard count {updated} while aborting split to \ "Unexpected parent shard count {updated} while aborting split to \
count {new_shard_count:?} on tenant {split_tenant_id}" count {new_shard_count:?} on tenant {split_tenant_id}"
))); )));
} }
// Erase child shards // Erase child shards
diesel::delete(tenant_shards) diesel::delete(tenant_shards)
.filter(tenant_id.eq(split_tenant_id.to_string())) .filter(tenant_id.eq(split_tenant_id.to_string()))
.filter(shard_count.eq(new_shard_count.literal() as i32)) .filter(shard_count.eq(new_shard_count.literal() as i32))
.execute(conn)?; .execute(conn)?;
Ok(AbortShardSplitStatus::Aborted) Ok(AbortShardSplitStatus::Aborted)
})?;
Ok(aborted)
}, },
) )
.await .await
} }
/// Stores all the latest metadata health updates durably. Updates existing entry on conflict.
///
/// **Correctness:** `metadata_health_updates` should all belong the tenant shards managed by the storage controller.
#[allow(dead_code)]
pub(crate) async fn update_metadata_health_records(
&self,
healthy_records: Vec<MetadataHealthPersistence>,
unhealthy_records: Vec<MetadataHealthPersistence>,
now: chrono::DateTime<chrono::Utc>,
) -> DatabaseResult<()> {
use crate::schema::metadata_health::dsl::*;
self.with_measured_conn(
DatabaseOperation::UpdateMetadataHealth,
move |conn| -> DatabaseResult<_> {
diesel::insert_into(metadata_health)
.values(&healthy_records)
.on_conflict((tenant_id, shard_number, shard_count))
.do_update()
.set((healthy.eq(true), last_scrubbed_at.eq(now)))
.execute(conn)?;
diesel::insert_into(metadata_health)
.values(&unhealthy_records)
.on_conflict((tenant_id, shard_number, shard_count))
.do_update()
.set((healthy.eq(false), last_scrubbed_at.eq(now)))
.execute(conn)?;
Ok(())
},
)
.await
}
/// Lists all the metadata health records.
#[allow(dead_code)]
pub(crate) async fn list_metadata_health_records(
&self,
) -> DatabaseResult<Vec<MetadataHealthPersistence>> {
self.with_measured_conn(
DatabaseOperation::ListMetadataHealth,
move |conn| -> DatabaseResult<_> {
Ok(
crate::schema::metadata_health::table
.load::<MetadataHealthPersistence>(conn)?,
)
},
)
.await
}
/// Lists all the metadata health records that is unhealthy.
#[allow(dead_code)]
pub(crate) async fn list_unhealthy_metadata_health_records(
&self,
) -> DatabaseResult<Vec<MetadataHealthPersistence>> {
use crate::schema::metadata_health::dsl::*;
self.with_measured_conn(
DatabaseOperation::ListMetadataHealthUnhealthy,
move |conn| -> DatabaseResult<_> {
Ok(crate::schema::metadata_health::table
.filter(healthy.eq(false))
.load::<MetadataHealthPersistence>(conn)?)
},
)
.await
}
/// Lists all the metadata health records that have not been updated since an `earlier` time.
#[allow(dead_code)]
pub(crate) async fn list_outdated_metadata_health_records(
&self,
earlier: chrono::DateTime<chrono::Utc>,
) -> DatabaseResult<Vec<MetadataHealthPersistence>> {
use crate::schema::metadata_health::dsl::*;
self.with_measured_conn(
DatabaseOperation::ListMetadataHealthOutdated,
move |conn| -> DatabaseResult<_> {
let query = metadata_health.filter(last_scrubbed_at.lt(earlier));
let res = query.load::<MetadataHealthPersistence>(conn)?;
Ok(res)
},
)
.await
}
/// Get the current entry from the `leader` table if one exists.
/// It is an error for the table to contain more than one entry.
pub(crate) async fn get_leader(&self) -> DatabaseResult<Option<ControllerPersistence>> {
let mut leader: Vec<ControllerPersistence> = self
.with_measured_conn(
DatabaseOperation::GetLeader,
move |conn| -> DatabaseResult<_> {
Ok(crate::schema::controllers::table.load::<ControllerPersistence>(conn)?)
},
)
.await?;
if leader.len() > 1 {
return Err(DatabaseError::Logical(format!(
"More than one entry present in the leader table: {leader:?}"
)));
}
Ok(leader.pop())
}
/// Update the new leader with compare-exchange semantics. If `prev` does not
/// match the current leader entry, then the update is treated as a failure.
/// When `prev` is not specified, the update is forced.
pub(crate) async fn update_leader(
&self,
prev: Option<ControllerPersistence>,
new: ControllerPersistence,
) -> DatabaseResult<()> {
use crate::schema::controllers::dsl::*;
let updated = self
.with_measured_conn(
DatabaseOperation::UpdateLeader,
move |conn| -> DatabaseResult<usize> {
let updated = match &prev {
Some(prev) => diesel::update(controllers)
.filter(address.eq(prev.address.clone()))
.filter(started_at.eq(prev.started_at))
.set((
address.eq(new.address.clone()),
started_at.eq(new.started_at),
))
.execute(conn)?,
None => diesel::insert_into(controllers)
.values(new.clone())
.execute(conn)?,
};
Ok(updated)
},
)
.await?;
if updated == 0 {
return Err(DatabaseError::Logical(
"Leader table update failed".to_string(),
));
}
Ok(())
}
} }
/// Parts of [`crate::tenant_shard::TenantShard`] that are stored durably /// Parts of [`crate::tenant_state::TenantState`] that are stored durably
#[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq)] #[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq)]
#[diesel(table_name = crate::schema::tenant_shards)] #[diesel(table_name = crate::schema::tenant_shards)]
pub(crate) struct TenantShardPersistence { pub(crate) struct TenantShardPersistence {
@@ -938,8 +728,6 @@ pub(crate) struct TenantShardPersistence {
pub(crate) splitting: SplitState, pub(crate) splitting: SplitState,
#[serde(default)] #[serde(default)]
pub(crate) config: String, pub(crate) config: String,
#[serde(default)]
pub(crate) scheduling_policy: String,
} }
impl TenantShardPersistence { impl TenantShardPersistence {
@@ -975,68 +763,3 @@ pub(crate) struct NodePersistence {
pub(crate) listen_pg_addr: String, pub(crate) listen_pg_addr: String,
pub(crate) listen_pg_port: i32, pub(crate) listen_pg_port: i32,
} }
/// Tenant metadata health status that are stored durably.
#[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq)]
#[diesel(table_name = crate::schema::metadata_health)]
pub(crate) struct MetadataHealthPersistence {
#[serde(default)]
pub(crate) tenant_id: String,
#[serde(default)]
pub(crate) shard_number: i32,
#[serde(default)]
pub(crate) shard_count: i32,
pub(crate) healthy: bool,
pub(crate) last_scrubbed_at: chrono::DateTime<chrono::Utc>,
}
impl MetadataHealthPersistence {
pub fn new(
tenant_shard_id: TenantShardId,
healthy: bool,
last_scrubbed_at: chrono::DateTime<chrono::Utc>,
) -> Self {
let tenant_id = tenant_shard_id.tenant_id.to_string();
let shard_number = tenant_shard_id.shard_number.0 as i32;
let shard_count = tenant_shard_id.shard_count.literal() as i32;
MetadataHealthPersistence {
tenant_id,
shard_number,
shard_count,
healthy,
last_scrubbed_at,
}
}
#[allow(dead_code)]
pub(crate) fn get_tenant_shard_id(&self) -> Result<TenantShardId, hex::FromHexError> {
Ok(TenantShardId {
tenant_id: TenantId::from_str(self.tenant_id.as_str())?,
shard_number: ShardNumber(self.shard_number as u8),
shard_count: ShardCount::new(self.shard_count as u8),
})
}
}
impl From<MetadataHealthPersistence> for MetadataHealthRecord {
fn from(value: MetadataHealthPersistence) -> Self {
MetadataHealthRecord {
tenant_shard_id: value
.get_tenant_shard_id()
.expect("stored tenant id should be valid"),
healthy: value.healthy,
last_scrubbed_at: value.last_scrubbed_at,
}
}
}
#[derive(
Serialize, Deserialize, Queryable, Selectable, Insertable, Eq, PartialEq, Debug, Clone,
)]
#[diesel(table_name = crate::schema::controllers)]
pub(crate) struct ControllerPersistence {
pub(crate) address: String,
pub(crate) started_at: chrono::DateTime<chrono::Utc>,
}

View File

@@ -1,18 +1,16 @@
use crate::pageserver_client::PageserverClient; use crate::pageserver_client::PageserverClient;
use crate::persistence::Persistence; use crate::persistence::Persistence;
use crate::service; use crate::service;
use pageserver_api::controller_api::PlacementPolicy; use hyper::StatusCode;
use pageserver_api::models::{ use pageserver_api::models::{
LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig, LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig,
}; };
use pageserver_api::shard::{ShardIdentity, TenantShardId}; use pageserver_api::shard::{ShardIdentity, TenantShardId};
use pageserver_client::mgmt_api; use pageserver_client::mgmt_api;
use reqwest::StatusCode;
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use utils::failpoint_support;
use utils::generation::Generation; use utils::generation::Generation;
use utils::id::{NodeId, TimelineId}; use utils::id::{NodeId, TimelineId};
use utils::lsn::Lsn; use utils::lsn::Lsn;
@@ -20,18 +18,17 @@ use utils::sync::gate::GateGuard;
use crate::compute_hook::{ComputeHook, NotifyError}; use crate::compute_hook::{ComputeHook, NotifyError};
use crate::node::Node; use crate::node::Node;
use crate::tenant_shard::{IntentState, ObservedState, ObservedStateLocation}; use crate::tenant_state::{IntentState, ObservedState, ObservedStateLocation};
const DEFAULT_HEATMAP_PERIOD: &str = "60s"; const DEFAULT_HEATMAP_PERIOD: &str = "60s";
/// Object with the lifetime of the background reconcile task that is created /// Object with the lifetime of the background reconcile task that is created
/// for tenants which have a difference between their intent and observed states. /// for tenants which have a difference between their intent and observed states.
pub(super) struct Reconciler { pub(super) struct Reconciler {
/// See [`crate::tenant_shard::TenantShard`] for the meanings of these fields: they are a snapshot /// See [`crate::tenant_state::TenantState`] for the meanings of these fields: they are a snapshot
/// of a tenant's state from when we spawned a reconcile task. /// of a tenant's state from when we spawned a reconcile task.
pub(super) tenant_shard_id: TenantShardId, pub(super) tenant_shard_id: TenantShardId,
pub(crate) shard: ShardIdentity, pub(crate) shard: ShardIdentity,
pub(crate) placement_policy: PlacementPolicy,
pub(crate) generation: Option<Generation>, pub(crate) generation: Option<Generation>,
pub(crate) intent: TargetState, pub(crate) intent: TargetState,
@@ -39,9 +36,6 @@ pub(super) struct Reconciler {
/// to detach this tenant shard. /// to detach this tenant shard.
pub(crate) detach: Vec<Node>, pub(crate) detach: Vec<Node>,
/// Configuration specific to this reconciler
pub(crate) reconciler_config: ReconcilerConfig,
pub(crate) config: TenantConfig, pub(crate) config: TenantConfig,
pub(crate) observed: ObservedState, pub(crate) observed: ObservedState,
@@ -54,15 +48,11 @@ pub(super) struct Reconciler {
/// To avoid stalling if the cloud control plane is unavailable, we may proceed /// To avoid stalling if the cloud control plane is unavailable, we may proceed
/// past failures in [`ComputeHook::notify`], but we _must_ remember that we failed /// past failures in [`ComputeHook::notify`], but we _must_ remember that we failed
/// so that we can set [`crate::tenant_shard::TenantShard::pending_compute_notification`] to ensure a later retry. /// so that we can set [`crate::tenant_state::TenantState::pending_compute_notification`] to ensure a later retry.
pub(crate) compute_notify_failure: bool, pub(crate) compute_notify_failure: bool,
/// Reconciler is responsible for keeping alive semaphore units that limit concurrency on how many
/// we will spawn.
pub(crate) _resource_units: ReconcileUnits,
/// A means to abort background reconciliation: it is essential to /// A means to abort background reconciliation: it is essential to
/// call this when something changes in the original TenantShard that /// call this when something changes in the original TenantState that
/// will make this reconciliation impossible or unnecessary, for /// will make this reconciliation impossible or unnecessary, for
/// example when a pageserver node goes offline, or the PlacementPolicy for /// example when a pageserver node goes offline, or the PlacementPolicy for
/// the tenant is changed. /// the tenant is changed.
@@ -76,79 +66,7 @@ pub(super) struct Reconciler {
pub(crate) persistence: Arc<Persistence>, pub(crate) persistence: Arc<Persistence>,
} }
pub(crate) struct ReconcilerConfigBuilder { /// This is a snapshot of [`crate::tenant_state::IntentState`], but it does not do any
config: ReconcilerConfig,
}
impl ReconcilerConfigBuilder {
pub(crate) fn new() -> Self {
Self {
config: ReconcilerConfig::default(),
}
}
pub(crate) fn secondary_warmup_timeout(self, value: Duration) -> Self {
Self {
config: ReconcilerConfig {
secondary_warmup_timeout: Some(value),
..self.config
},
}
}
pub(crate) fn secondary_download_request_timeout(self, value: Duration) -> Self {
Self {
config: ReconcilerConfig {
secondary_download_request_timeout: Some(value),
..self.config
},
}
}
pub(crate) fn build(self) -> ReconcilerConfig {
self.config
}
}
#[derive(Default, Debug, Copy, Clone)]
pub(crate) struct ReconcilerConfig {
// During live migration give up on warming-up the secondary
// after this timeout.
secondary_warmup_timeout: Option<Duration>,
// During live migrations this is the amount of time that
// the pagserver will hold our poll.
secondary_download_request_timeout: Option<Duration>,
}
impl ReconcilerConfig {
pub(crate) fn get_secondary_warmup_timeout(&self) -> Duration {
const SECONDARY_WARMUP_TIMEOUT_DEFAULT: Duration = Duration::from_secs(300);
self.secondary_warmup_timeout
.unwrap_or(SECONDARY_WARMUP_TIMEOUT_DEFAULT)
}
pub(crate) fn get_secondary_download_request_timeout(&self) -> Duration {
const SECONDARY_DOWNLOAD_REQUEST_TIMEOUT_DEFAULT: Duration = Duration::from_secs(20);
self.secondary_download_request_timeout
.unwrap_or(SECONDARY_DOWNLOAD_REQUEST_TIMEOUT_DEFAULT)
}
}
/// RAII resource units granted to a Reconciler, which it should keep alive until it finishes doing I/O
pub(crate) struct ReconcileUnits {
_sem_units: tokio::sync::OwnedSemaphorePermit,
}
impl ReconcileUnits {
pub(crate) fn new(sem_units: tokio::sync::OwnedSemaphorePermit) -> Self {
Self {
_sem_units: sem_units,
}
}
}
/// This is a snapshot of [`crate::tenant_shard::IntentState`], but it does not do any
/// reference counting for Scheduler. The IntentState is what the scheduler works with, /// reference counting for Scheduler. The IntentState is what the scheduler works with,
/// and the TargetState is just the instruction for a particular Reconciler run. /// and the TargetState is just the instruction for a particular Reconciler run.
#[derive(Debug)] #[derive(Debug)]
@@ -362,13 +280,11 @@ impl Reconciler {
) -> Result<(), ReconcileError> { ) -> Result<(), ReconcileError> {
// This is not the timeout for a request, but the total amount of time we're willing to wait // This is not the timeout for a request, but the total amount of time we're willing to wait
// for a secondary location to get up to date before // for a secondary location to get up to date before
let total_download_timeout = self.reconciler_config.get_secondary_warmup_timeout(); const TOTAL_DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(300);
// This the long-polling interval for the secondary download requests we send to destination pageserver // This the long-polling interval for the secondary download requests we send to destination pageserver
// during a migration. // during a migration.
let request_download_timeout = self const REQUEST_DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(20);
.reconciler_config
.get_secondary_download_request_timeout();
let started_at = Instant::now(); let started_at = Instant::now();
@@ -379,14 +295,14 @@ impl Reconciler {
client client
.tenant_secondary_download( .tenant_secondary_download(
tenant_shard_id, tenant_shard_id,
Some(request_download_timeout), Some(REQUEST_DOWNLOAD_TIMEOUT),
) )
.await .await
}, },
&self.service_config.jwt_token, &self.service_config.jwt_token,
1, 1,
3, 3,
request_download_timeout * 2, REQUEST_DOWNLOAD_TIMEOUT * 2,
&self.cancel, &self.cancel,
) )
.await .await
@@ -414,7 +330,7 @@ impl Reconciler {
return Ok(()); return Ok(());
} else if status == StatusCode::ACCEPTED { } else if status == StatusCode::ACCEPTED {
let total_runtime = started_at.elapsed(); let total_runtime = started_at.elapsed();
if total_runtime > total_download_timeout { if total_runtime > TOTAL_DOWNLOAD_TIMEOUT {
tracing::warn!("Timed out after {}ms downloading layers to {node}. Progress so far: {}/{} layers, {}/{} bytes", tracing::warn!("Timed out after {}ms downloading layers to {node}. Progress so far: {}/{} layers, {}/{} bytes",
total_runtime.as_millis(), total_runtime.as_millis(),
progress.layers_downloaded, progress.layers_downloaded,
@@ -571,7 +487,6 @@ impl Reconciler {
while let Err(e) = self.compute_notify().await { while let Err(e) = self.compute_notify().await {
match e { match e {
NotifyError::Fatal(_) => return Err(ReconcileError::Notify(e)), NotifyError::Fatal(_) => return Err(ReconcileError::Notify(e)),
NotifyError::ShuttingDown => return Err(ReconcileError::Cancel),
_ => { _ => {
tracing::warn!( tracing::warn!(
"Live migration blocked by compute notification error, retrying: {e}" "Live migration blocked by compute notification error, retrying: {e}"
@@ -708,7 +623,7 @@ impl Reconciler {
generation, generation,
&self.shard, &self.shard,
&self.config, &self.config,
&self.placement_policy, !self.intent.secondary.is_empty(),
); );
match self.observed.locations.get(&node.get_id()) { match self.observed.locations.get(&node.get_id()) {
Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => { Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {
@@ -720,8 +635,11 @@ impl Reconciler {
// reconcile this location. This includes locations with different configurations, as well // reconcile this location. This includes locations with different configurations, as well
// as locations with unknown (None) observed state. // as locations with unknown (None) observed state.
// Incrementing generation is the safe general case, but is inefficient for changes that only // The general case is to increment the generation. However, there are cases
// modify some details (e.g. the tenant's config). // where this is not necessary:
// - if we are only updating the TenantConf part of the location
// - if we are only changing the attachment mode (e.g. going to attachedmulti or attachedstale)
// and the location was already in the correct generation
let increment_generation = match observed { let increment_generation = match observed {
None => true, None => true,
Some(ObservedStateLocation { conf: None }) => true, Some(ObservedStateLocation { conf: None }) => true,
@@ -730,11 +648,18 @@ impl Reconciler {
}) => { }) => {
let generations_match = observed.generation == wanted_conf.generation; let generations_match = observed.generation == wanted_conf.generation;
// We may skip incrementing the generation if the location is already in the expected mode and use LocationConfigMode::*;
// generation. In principle it would also be safe to skip from certain other modes (e.g. AttachedStale), let mode_transition_requires_gen_inc =
// but such states are handled inside `live_migrate`, and if we see that state here we're cleaning up match (observed.mode, wanted_conf.mode) {
// after a restart/crash, so fall back to the universally safe path of incrementing generation. // Usually the short-lived attachment modes (multi and stale) are only used
!generations_match || (observed.mode != wanted_conf.mode) // in the case of [`Self::live_migrate`], but it is simple to handle them correctly
// here too. Locations are allowed to go Single->Stale and Multi->Single within the same generation.
(AttachedSingle, AttachedStale) => false,
(AttachedMulti, AttachedSingle) => false,
(lhs, rhs) => lhs != rhs,
};
!generations_match || mode_transition_requires_gen_inc
} }
}; };
@@ -804,8 +729,6 @@ impl Reconciler {
self.location_config(&node, conf, None, false).await?; self.location_config(&node, conf, None, false).await?;
} }
failpoint_support::sleep_millis_async!("sleep-on-reconcile-epilogue");
Ok(()) Ok(())
} }
@@ -826,10 +749,7 @@ impl Reconciler {
// It is up to the caller whether they want to drop out on this error, but they don't have to: // It is up to the caller whether they want to drop out on this error, but they don't have to:
// in general we should avoid letting unavailability of the cloud control plane stop us from // in general we should avoid letting unavailability of the cloud control plane stop us from
// making progress. // making progress.
if !matches!(e, NotifyError::ShuttingDown) { tracing::warn!("Failed to notify compute of attached pageserver {node}: {e}");
tracing::warn!("Failed to notify compute of attached pageserver {node}: {e}");
}
// Set this flag so that in our ReconcileResult we will set the flag on the shard that it // Set this flag so that in our ReconcileResult we will set the flag on the shard that it
// needs to retry at some point. // needs to retry at some point.
self.compute_notify_failure = true; self.compute_notify_failure = true;
@@ -860,15 +780,8 @@ pub(crate) fn attached_location_conf(
generation: Generation, generation: Generation,
shard: &ShardIdentity, shard: &ShardIdentity,
config: &TenantConfig, config: &TenantConfig,
policy: &PlacementPolicy, has_secondaries: bool,
) -> LocationConfig { ) -> LocationConfig {
let has_secondaries = match policy {
PlacementPolicy::Attached(0) | PlacementPolicy::Detached | PlacementPolicy::Secondary => {
false
}
PlacementPolicy::Attached(_) => true,
};
LocationConfig { LocationConfig {
mode: LocationConfigMode::AttachedSingle, mode: LocationConfigMode::AttachedSingle,
generation: generation.into(), generation: generation.into(),

View File

@@ -0,0 +1,352 @@
use crate::{node::Node, tenant_state::TenantState};
use pageserver_api::controller_api::UtilizationScore;
use serde::Serialize;
use std::collections::HashMap;
use utils::{http::error::ApiError, id::NodeId};
/// Scenarios in which we cannot find a suitable location for a tenant shard
#[derive(thiserror::Error, Debug)]
pub enum ScheduleError {
#[error("No pageservers found")]
NoPageservers,
#[error("No pageserver found matching constraint")]
ImpossibleConstraint,
}
impl From<ScheduleError> for ApiError {
fn from(value: ScheduleError) -> Self {
ApiError::Conflict(format!("Scheduling error: {}", value))
}
}
#[derive(Serialize, Eq, PartialEq)]
pub enum MaySchedule {
Yes(UtilizationScore),
No,
}
#[derive(Serialize)]
struct SchedulerNode {
/// How many shards are currently scheduled on this node, via their [`crate::tenant_state::IntentState`].
shard_count: usize,
/// Whether this node is currently elegible to have new shards scheduled (this is derived
/// from a node's availability state and scheduling policy).
may_schedule: MaySchedule,
}
impl PartialEq for SchedulerNode {
fn eq(&self, other: &Self) -> bool {
let may_schedule_matches = matches!(
(&self.may_schedule, &other.may_schedule),
(MaySchedule::Yes(_), MaySchedule::Yes(_)) | (MaySchedule::No, MaySchedule::No)
);
may_schedule_matches && self.shard_count == other.shard_count
}
}
impl Eq for SchedulerNode {}
/// This type is responsible for selecting which node is used when a tenant shard needs to choose a pageserver
/// on which to run.
///
/// The type has no persistent state of its own: this is all populated at startup. The Serialize
/// impl is only for debug dumps.
#[derive(Serialize)]
pub(crate) struct Scheduler {
nodes: HashMap<NodeId, SchedulerNode>,
}
impl Scheduler {
pub(crate) fn new<'a>(nodes: impl Iterator<Item = &'a Node>) -> Self {
let mut scheduler_nodes = HashMap::new();
for node in nodes {
scheduler_nodes.insert(
node.get_id(),
SchedulerNode {
shard_count: 0,
may_schedule: node.may_schedule(),
},
);
}
Self {
nodes: scheduler_nodes,
}
}
/// For debug/support: check that our internal statistics are in sync with the state of
/// the nodes & tenant shards.
///
/// If anything is inconsistent, log details and return an error.
pub(crate) fn consistency_check<'a>(
&self,
nodes: impl Iterator<Item = &'a Node>,
shards: impl Iterator<Item = &'a TenantState>,
) -> anyhow::Result<()> {
let mut expect_nodes: HashMap<NodeId, SchedulerNode> = HashMap::new();
for node in nodes {
expect_nodes.insert(
node.get_id(),
SchedulerNode {
shard_count: 0,
may_schedule: node.may_schedule(),
},
);
}
for shard in shards {
if let Some(node_id) = shard.intent.get_attached() {
match expect_nodes.get_mut(node_id) {
Some(node) => node.shard_count += 1,
None => anyhow::bail!(
"Tenant {} references nonexistent node {}",
shard.tenant_shard_id,
node_id
),
}
}
for node_id in shard.intent.get_secondary() {
match expect_nodes.get_mut(node_id) {
Some(node) => node.shard_count += 1,
None => anyhow::bail!(
"Tenant {} references nonexistent node {}",
shard.tenant_shard_id,
node_id
),
}
}
}
for (node_id, expect_node) in &expect_nodes {
let Some(self_node) = self.nodes.get(node_id) else {
anyhow::bail!("Node {node_id} not found in Self")
};
if self_node != expect_node {
tracing::error!("Inconsistency detected in scheduling state for node {node_id}");
tracing::error!("Expected state: {}", serde_json::to_string(expect_node)?);
tracing::error!("Self state: {}", serde_json::to_string(self_node)?);
anyhow::bail!("Inconsistent state on {node_id}");
}
}
if expect_nodes.len() != self.nodes.len() {
// We just checked that all the expected nodes are present. If the lengths don't match,
// it means that we have nodes in Self that are unexpected.
for node_id in self.nodes.keys() {
if !expect_nodes.contains_key(node_id) {
anyhow::bail!("Node {node_id} found in Self but not in expected nodes");
}
}
}
Ok(())
}
/// Increment the reference count of a node. This reference count is used to guide scheduling
/// decisions, not for memory management: it represents one tenant shard whose IntentState targets
/// this node.
///
/// It is an error to call this for a node that is not known to the scheduler (i.e. passed into
/// [`Self::new`] or [`Self::node_upsert`])
pub(crate) fn node_inc_ref(&mut self, node_id: NodeId) {
let Some(node) = self.nodes.get_mut(&node_id) else {
tracing::error!("Scheduler missing node {node_id}");
debug_assert!(false);
return;
};
node.shard_count += 1;
}
/// Decrement a node's reference count. Inverse of [`Self::node_inc_ref`].
pub(crate) fn node_dec_ref(&mut self, node_id: NodeId) {
let Some(node) = self.nodes.get_mut(&node_id) else {
debug_assert!(false);
tracing::error!("Scheduler missing node {node_id}");
return;
};
node.shard_count -= 1;
}
pub(crate) fn node_upsert(&mut self, node: &Node) {
use std::collections::hash_map::Entry::*;
match self.nodes.entry(node.get_id()) {
Occupied(mut entry) => {
entry.get_mut().may_schedule = node.may_schedule();
}
Vacant(entry) => {
entry.insert(SchedulerNode {
shard_count: 0,
may_schedule: node.may_schedule(),
});
}
}
}
pub(crate) fn node_remove(&mut self, node_id: NodeId) {
if self.nodes.remove(&node_id).is_none() {
tracing::warn!(node_id=%node_id, "Removed non-existent node from scheduler");
}
}
/// Where we have several nodes to choose from, for example when picking a secondary location
/// to promote to an attached location, this method may be used to pick the best choice based
/// on the scheduler's knowledge of utilization and availability.
///
/// If the input is empty, or all the nodes are not elegible for scheduling, return None: the
/// caller can pick a node some other way.
pub(crate) fn node_preferred(&self, nodes: &[NodeId]) -> Option<NodeId> {
if nodes.is_empty() {
return None;
}
// TODO: When the utilization score returned by the pageserver becomes meaningful,
// schedule based on that instead of the shard count.
let node = nodes
.iter()
.map(|node_id| {
let may_schedule = self
.nodes
.get(node_id)
.map(|n| n.may_schedule != MaySchedule::No)
.unwrap_or(false);
(*node_id, may_schedule)
})
.max_by_key(|(_n, may_schedule)| *may_schedule);
// If even the preferred node has may_schedule==false, return None
node.and_then(|(node_id, may_schedule)| if may_schedule { Some(node_id) } else { None })
}
pub(crate) fn schedule_shard(&self, hard_exclude: &[NodeId]) -> Result<NodeId, ScheduleError> {
if self.nodes.is_empty() {
return Err(ScheduleError::NoPageservers);
}
let mut tenant_counts: Vec<(NodeId, usize)> = self
.nodes
.iter()
.filter_map(|(k, v)| {
if hard_exclude.contains(k) || v.may_schedule == MaySchedule::No {
None
} else {
Some((*k, v.shard_count))
}
})
.collect();
// Sort by tenant count. Nodes with the same tenant count are sorted by ID.
tenant_counts.sort_by_key(|i| (i.1, i.0));
if tenant_counts.is_empty() {
// After applying constraints, no pageservers were left. We log some detail about
// the state of nodes to help understand why this happened. This is not logged as an error because
// it is legitimately possible for enough nodes to be Offline to prevent scheduling a shard.
tracing::info!("Scheduling failure, while excluding {hard_exclude:?}, node states:");
for (node_id, node) in &self.nodes {
tracing::info!(
"Node {node_id}: may_schedule={} shards={}",
node.may_schedule != MaySchedule::No,
node.shard_count
);
}
return Err(ScheduleError::ImpossibleConstraint);
}
let node_id = tenant_counts.first().unwrap().0;
tracing::info!(
"scheduler selected node {node_id} (elegible nodes {:?}, exclude: {hard_exclude:?})",
tenant_counts.iter().map(|i| i.0 .0).collect::<Vec<_>>()
);
// Note that we do not update shard count here to reflect the scheduling: that
// is IntentState's job when the scheduled location is used.
Ok(node_id)
}
}
#[cfg(test)]
pub(crate) mod test_utils {
use crate::node::Node;
use pageserver_api::controller_api::{NodeAvailability, UtilizationScore};
use std::collections::HashMap;
use utils::id::NodeId;
/// Test helper: synthesize the requested number of nodes, all in active state.
///
/// Node IDs start at one.
pub(crate) fn make_test_nodes(n: u64) -> HashMap<NodeId, Node> {
(1..n + 1)
.map(|i| {
(NodeId(i), {
let mut node = Node::new(
NodeId(i),
format!("httphost-{i}"),
80 + i as u16,
format!("pghost-{i}"),
5432 + i as u16,
);
node.set_availability(NodeAvailability::Active(UtilizationScore::worst()));
assert!(node.is_available());
node
})
})
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tenant_state::IntentState;
#[test]
fn scheduler_basic() -> anyhow::Result<()> {
let nodes = test_utils::make_test_nodes(2);
let mut scheduler = Scheduler::new(nodes.values());
let mut t1_intent = IntentState::new();
let mut t2_intent = IntentState::new();
let scheduled = scheduler.schedule_shard(&[])?;
t1_intent.set_attached(&mut scheduler, Some(scheduled));
let scheduled = scheduler.schedule_shard(&[])?;
t2_intent.set_attached(&mut scheduler, Some(scheduled));
assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 1);
assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 1);
let scheduled = scheduler.schedule_shard(&t1_intent.all_pageservers())?;
t1_intent.push_secondary(&mut scheduler, scheduled);
assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 1);
assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 2);
t1_intent.clear(&mut scheduler);
assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 0);
assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 1);
if cfg!(debug_assertions) {
// Dropping an IntentState without clearing it causes a panic in debug mode,
// because we have failed to properly update scheduler shard counts.
let result = std::panic::catch_unwind(move || {
drop(t2_intent);
});
assert!(result.is_err());
} else {
t2_intent.clear(&mut scheduler);
assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 0);
assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 0);
}
Ok(())
}
}

View File

@@ -1,22 +1,5 @@
// @generated automatically by Diesel CLI. // @generated automatically by Diesel CLI.
diesel::table! {
controllers (address, started_at) {
address -> Varchar,
started_at -> Timestamptz,
}
}
diesel::table! {
metadata_health (tenant_id, shard_number, shard_count) {
tenant_id -> Varchar,
shard_number -> Int4,
shard_count -> Int4,
healthy -> Bool,
last_scrubbed_at -> Timestamptz,
}
}
diesel::table! { diesel::table! {
nodes (node_id) { nodes (node_id) {
node_id -> Int8, node_id -> Int8,
@@ -39,8 +22,7 @@ diesel::table! {
placement_policy -> Varchar, placement_policy -> Varchar,
splitting -> Int2, splitting -> Int2,
config -> Text, config -> Text,
scheduling_policy -> Varchar,
} }
} }
diesel::allow_tables_to_appear_in_same_query!(controllers, metadata_health, nodes, tenant_shards,); diesel::allow_tables_to_appear_in_same_query!(nodes, tenant_shards,);

View File

@@ -0,0 +1,983 @@
use std::{
collections::{HashMap, HashSet},
sync::Arc,
time::Duration,
};
use crate::{
metrics::{self, ReconcileCompleteLabelGroup, ReconcileOutcome},
persistence::TenantShardPersistence,
};
use pageserver_api::controller_api::PlacementPolicy;
use pageserver_api::{
models::{LocationConfig, LocationConfigMode, TenantConfig},
shard::{ShardIdentity, TenantShardId},
};
use serde::Serialize;
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use tracing::{instrument, Instrument};
use utils::{
generation::Generation,
id::NodeId,
seqwait::{SeqWait, SeqWaitError},
sync::gate::Gate,
};
use crate::{
compute_hook::ComputeHook,
node::Node,
persistence::{split_state::SplitState, Persistence},
reconciler::{
attached_location_conf, secondary_location_conf, ReconcileError, Reconciler, TargetState,
},
scheduler::{ScheduleError, Scheduler},
service, Sequence,
};
/// Serialization helper
fn read_mutex_content<S, T>(v: &std::sync::Mutex<T>, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::ser::Serializer,
T: Clone + std::fmt::Display,
{
serializer.collect_str(&v.lock().unwrap())
}
/// In-memory state for a particular tenant shard.
///
/// This struct implement Serialize for debugging purposes, but is _not_ persisted
/// itself: see [`crate::persistence`] for the subset of tenant shard state that is persisted.
#[derive(Serialize)]
pub(crate) struct TenantState {
pub(crate) tenant_shard_id: TenantShardId,
pub(crate) shard: ShardIdentity,
// Runtime only: sequence used to coordinate when updating this object while
// with background reconcilers may be running. A reconciler runs to a particular
// sequence.
pub(crate) sequence: Sequence,
// Latest generation number: next time we attach, increment this
// and use the incremented number when attaching.
//
// None represents an incompletely onboarded tenant via the [`Service::location_config`]
// API, where this tenant may only run in PlacementPolicy::Secondary.
pub(crate) generation: Option<Generation>,
// High level description of how the tenant should be set up. Provided
// externally.
pub(crate) policy: PlacementPolicy,
// Low level description of exactly which pageservers should fulfil
// which role. Generated by `Self::schedule`.
pub(crate) intent: IntentState,
// Low level description of how the tenant is configured on pageservers:
// if this does not match `Self::intent` then the tenant needs reconciliation
// with `Self::reconcile`.
pub(crate) observed: ObservedState,
// Tenant configuration, passed through opaquely to the pageserver. Identical
// for all shards in a tenant.
pub(crate) config: TenantConfig,
/// If a reconcile task is currently in flight, it may be joined here (it is
/// only safe to join if either the result has been received or the reconciler's
/// cancellation token has been fired)
#[serde(skip)]
pub(crate) reconciler: Option<ReconcilerHandle>,
/// If a tenant is being split, then all shards with that TenantId will have a
/// SplitState set, this acts as a guard against other operations such as background
/// reconciliation, and timeline creation.
pub(crate) splitting: SplitState,
/// Optionally wait for reconciliation to complete up to a particular
/// sequence number.
#[serde(skip)]
pub(crate) waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,
/// Indicates sequence number for which we have encountered an error reconciling. If
/// this advances ahead of [`Self::waiter`] then a reconciliation error has occurred,
/// and callers should stop waiting for `waiter` and propagate the error.
#[serde(skip)]
pub(crate) error_waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,
/// The most recent error from a reconcile on this tenant
/// TODO: generalize to an array of recent events
/// TOOD: use a ArcSwap instead of mutex for faster reads?
#[serde(serialize_with = "read_mutex_content")]
pub(crate) last_error: std::sync::Arc<std::sync::Mutex<String>>,
/// If we have a pending compute notification that for some reason we weren't able to send,
/// set this to true. If this is set, calls to [`Self::maybe_reconcile`] will run a task to retry
/// sending it. This is the mechanism by which compute notifications are included in the scope
/// of state that we publish externally in an eventually consistent way.
pub(crate) pending_compute_notification: bool,
}
#[derive(Default, Clone, Debug, Serialize)]
pub(crate) struct IntentState {
attached: Option<NodeId>,
secondary: Vec<NodeId>,
}
impl IntentState {
pub(crate) fn new() -> Self {
Self {
attached: None,
secondary: vec![],
}
}
pub(crate) fn single(scheduler: &mut Scheduler, node_id: Option<NodeId>) -> Self {
if let Some(node_id) = node_id {
scheduler.node_inc_ref(node_id);
}
Self {
attached: node_id,
secondary: vec![],
}
}
pub(crate) fn set_attached(&mut self, scheduler: &mut Scheduler, new_attached: Option<NodeId>) {
if self.attached != new_attached {
if let Some(old_attached) = self.attached.take() {
scheduler.node_dec_ref(old_attached);
}
if let Some(new_attached) = &new_attached {
scheduler.node_inc_ref(*new_attached);
}
self.attached = new_attached;
}
}
/// Like set_attached, but the node is from [`Self::secondary`]. This swaps the node from
/// secondary to attached while maintaining the scheduler's reference counts.
pub(crate) fn promote_attached(
&mut self,
_scheduler: &mut Scheduler,
promote_secondary: NodeId,
) {
// If we call this with a node that isn't in secondary, it would cause incorrect
// scheduler reference counting, since we assume the node is already referenced as a secondary.
debug_assert!(self.secondary.contains(&promote_secondary));
// TODO: when scheduler starts tracking attached + secondary counts separately, we will
// need to call into it here.
self.secondary.retain(|n| n != &promote_secondary);
self.attached = Some(promote_secondary);
}
pub(crate) fn push_secondary(&mut self, scheduler: &mut Scheduler, new_secondary: NodeId) {
debug_assert!(!self.secondary.contains(&new_secondary));
scheduler.node_inc_ref(new_secondary);
self.secondary.push(new_secondary);
}
/// It is legal to call this with a node that is not currently a secondary: that is a no-op
pub(crate) fn remove_secondary(&mut self, scheduler: &mut Scheduler, node_id: NodeId) {
let index = self.secondary.iter().position(|n| *n == node_id);
if let Some(index) = index {
scheduler.node_dec_ref(node_id);
self.secondary.remove(index);
}
}
pub(crate) fn clear_secondary(&mut self, scheduler: &mut Scheduler) {
for secondary in self.secondary.drain(..) {
scheduler.node_dec_ref(secondary);
}
}
/// Remove the last secondary node from the list of secondaries
pub(crate) fn pop_secondary(&mut self, scheduler: &mut Scheduler) {
if let Some(node_id) = self.secondary.pop() {
scheduler.node_dec_ref(node_id);
}
}
pub(crate) fn clear(&mut self, scheduler: &mut Scheduler) {
if let Some(old_attached) = self.attached.take() {
scheduler.node_dec_ref(old_attached);
}
self.clear_secondary(scheduler);
}
pub(crate) fn all_pageservers(&self) -> Vec<NodeId> {
let mut result = Vec::new();
if let Some(p) = self.attached {
result.push(p)
}
result.extend(self.secondary.iter().copied());
result
}
pub(crate) fn get_attached(&self) -> &Option<NodeId> {
&self.attached
}
pub(crate) fn get_secondary(&self) -> &Vec<NodeId> {
&self.secondary
}
/// If the node is in use as the attached location, demote it into
/// the list of secondary locations. This is used when a node goes offline,
/// and we want to use a different node for attachment, but not permanently
/// forget the location on the offline node.
///
/// Returns true if a change was made
pub(crate) fn demote_attached(&mut self, node_id: NodeId) -> bool {
if self.attached == Some(node_id) {
// TODO: when scheduler starts tracking attached + secondary counts separately, we will
// need to call into it here.
self.attached = None;
self.secondary.push(node_id);
true
} else {
false
}
}
}
impl Drop for IntentState {
fn drop(&mut self) {
// Must clear before dropping, to avoid leaving stale refcounts in the Scheduler
debug_assert!(self.attached.is_none() && self.secondary.is_empty());
}
}
#[derive(Default, Clone, Serialize)]
pub(crate) struct ObservedState {
pub(crate) locations: HashMap<NodeId, ObservedStateLocation>,
}
/// Our latest knowledge of how this tenant is configured in the outside world.
///
/// Meaning:
/// * No instance of this type exists for a node: we are certain that we have nothing configured on that
/// node for this shard.
/// * Instance exists with conf==None: we *might* have some state on that node, but we don't know
/// what it is (e.g. we failed partway through configuring it)
/// * Instance exists with conf==Some: this tells us what we last successfully configured on this node,
/// and that configuration will still be present unless something external interfered.
#[derive(Clone, Serialize)]
pub(crate) struct ObservedStateLocation {
/// If None, it means we do not know the status of this shard's location on this node, but
/// we know that we might have some state on this node.
pub(crate) conf: Option<LocationConfig>,
}
pub(crate) struct ReconcilerWaiter {
// For observability purposes, remember the ID of the shard we're
// waiting for.
pub(crate) tenant_shard_id: TenantShardId,
seq_wait: std::sync::Arc<SeqWait<Sequence, Sequence>>,
error_seq_wait: std::sync::Arc<SeqWait<Sequence, Sequence>>,
error: std::sync::Arc<std::sync::Mutex<String>>,
seq: Sequence,
}
#[derive(thiserror::Error, Debug)]
pub enum ReconcileWaitError {
#[error("Timeout waiting for shard {0}")]
Timeout(TenantShardId),
#[error("shutting down")]
Shutdown,
#[error("Reconcile error on shard {0}: {1}")]
Failed(TenantShardId, String),
}
impl ReconcilerWaiter {
pub(crate) async fn wait_timeout(&self, timeout: Duration) -> Result<(), ReconcileWaitError> {
tokio::select! {
result = self.seq_wait.wait_for_timeout(self.seq, timeout)=> {
result.map_err(|e| match e {
SeqWaitError::Timeout => ReconcileWaitError::Timeout(self.tenant_shard_id),
SeqWaitError::Shutdown => ReconcileWaitError::Shutdown
})?;
},
result = self.error_seq_wait.wait_for(self.seq) => {
result.map_err(|e| match e {
SeqWaitError::Shutdown => ReconcileWaitError::Shutdown,
SeqWaitError::Timeout => unreachable!()
})?;
return Err(ReconcileWaitError::Failed(self.tenant_shard_id, self.error.lock().unwrap().clone()))
}
}
Ok(())
}
}
/// Having spawned a reconciler task, the tenant shard's state will carry enough
/// information to optionally cancel & await it later.
pub(crate) struct ReconcilerHandle {
sequence: Sequence,
handle: JoinHandle<()>,
cancel: CancellationToken,
}
/// When a reconcile task completes, it sends this result object
/// to be applied to the primary TenantState.
pub(crate) struct ReconcileResult {
pub(crate) sequence: Sequence,
/// On errors, `observed` should be treated as an incompleted description
/// of state (i.e. any nodes present in the result should override nodes
/// present in the parent tenant state, but any unmentioned nodes should
/// not be removed from parent tenant state)
pub(crate) result: Result<(), ReconcileError>,
pub(crate) tenant_shard_id: TenantShardId,
pub(crate) generation: Option<Generation>,
pub(crate) observed: ObservedState,
/// Set [`TenantState::pending_compute_notification`] from this flag
pub(crate) pending_compute_notification: bool,
}
impl ObservedState {
pub(crate) fn new() -> Self {
Self {
locations: HashMap::new(),
}
}
}
impl TenantState {
pub(crate) fn new(
tenant_shard_id: TenantShardId,
shard: ShardIdentity,
policy: PlacementPolicy,
) -> Self {
Self {
tenant_shard_id,
policy,
intent: IntentState::default(),
generation: Some(Generation::new(0)),
shard,
observed: ObservedState::default(),
config: TenantConfig::default(),
reconciler: None,
splitting: SplitState::Idle,
sequence: Sequence(1),
waiter: Arc::new(SeqWait::new(Sequence(0))),
error_waiter: Arc::new(SeqWait::new(Sequence(0))),
last_error: Arc::default(),
pending_compute_notification: false,
}
}
/// For use on startup when learning state from pageservers: generate my [`IntentState`] from my
/// [`ObservedState`], even if it violates my [`PlacementPolicy`]. Call [`Self::schedule`] next,
/// to get an intent state that complies with placement policy. The overall goal is to do scheduling
/// in a way that makes use of any configured locations that already exist in the outside world.
pub(crate) fn intent_from_observed(&mut self, scheduler: &mut Scheduler) {
// Choose an attached location by filtering observed locations, and then sorting to get the highest
// generation
let mut attached_locs = self
.observed
.locations
.iter()
.filter_map(|(node_id, l)| {
if let Some(conf) = &l.conf {
if conf.mode == LocationConfigMode::AttachedMulti
|| conf.mode == LocationConfigMode::AttachedSingle
|| conf.mode == LocationConfigMode::AttachedStale
{
Some((node_id, conf.generation))
} else {
None
}
} else {
None
}
})
.collect::<Vec<_>>();
attached_locs.sort_by_key(|i| i.1);
if let Some((node_id, _gen)) = attached_locs.into_iter().last() {
self.intent.set_attached(scheduler, Some(*node_id));
}
// All remaining observed locations generate secondary intents. This includes None
// observations, as these may well have some local content on disk that is usable (this
// is an edge case that might occur if we restarted during a migration or other change)
//
// We may leave intent.attached empty if we didn't find any attached locations: [`Self::schedule`]
// will take care of promoting one of these secondaries to be attached.
self.observed.locations.keys().for_each(|node_id| {
if Some(*node_id) != self.intent.attached {
self.intent.push_secondary(scheduler, *node_id);
}
});
}
/// Part of [`Self::schedule`] that is used to choose exactly one node to act as the
/// attached pageserver for a shard.
///
/// Returns whether we modified it, and the NodeId selected.
fn schedule_attached(
&mut self,
scheduler: &mut Scheduler,
) -> Result<(bool, NodeId), ScheduleError> {
// No work to do if we already have an attached tenant
if let Some(node_id) = self.intent.attached {
return Ok((false, node_id));
}
if let Some(promote_secondary) = scheduler.node_preferred(&self.intent.secondary) {
// Promote a secondary
tracing::debug!("Promoted secondary {} to attached", promote_secondary);
self.intent.promote_attached(scheduler, promote_secondary);
Ok((true, promote_secondary))
} else {
// Pick a fresh node: either we had no secondaries or none were schedulable
let node_id = scheduler.schedule_shard(&self.intent.secondary)?;
tracing::debug!("Selected {} as attached", node_id);
self.intent.set_attached(scheduler, Some(node_id));
Ok((true, node_id))
}
}
pub(crate) fn schedule(&mut self, scheduler: &mut Scheduler) -> Result<(), ScheduleError> {
// TODO: before scheduling new nodes, check if any existing content in
// self.intent refers to pageservers that are offline, and pick other
// pageservers if so.
// TODO: respect the splitting bit on tenants: if they are currently splitting then we may not
// change their attach location.
// Build the set of pageservers already in use by this tenant, to avoid scheduling
// more work on the same pageservers we're already using.
let mut modified = false;
// Add/remove nodes to fulfil policy
use PlacementPolicy::*;
match self.policy {
Attached(secondary_count) => {
let retain_secondaries = if self.intent.attached.is_none()
&& scheduler.node_preferred(&self.intent.secondary).is_some()
{
// If we have no attached, and one of the secondaries is elegible to be promoted, retain
// one more secondary than we usually would, as one of them will become attached futher down this function.
secondary_count + 1
} else {
secondary_count
};
while self.intent.secondary.len() > retain_secondaries {
// We have no particular preference for one secondary location over another: just
// arbitrarily drop from the end
self.intent.pop_secondary(scheduler);
modified = true;
}
// Should have exactly one attached, and N secondaries
let (modified_attached, attached_node_id) = self.schedule_attached(scheduler)?;
modified |= modified_attached;
let mut used_pageservers = vec![attached_node_id];
while self.intent.secondary.len() < secondary_count {
let node_id = scheduler.schedule_shard(&used_pageservers)?;
self.intent.push_secondary(scheduler, node_id);
used_pageservers.push(node_id);
modified = true;
}
}
Secondary => {
if let Some(node_id) = self.intent.get_attached() {
// Populate secondary by demoting the attached node
self.intent.demote_attached(*node_id);
modified = true;
} else if self.intent.secondary.is_empty() {
// Populate secondary by scheduling a fresh node
let node_id = scheduler.schedule_shard(&[])?;
self.intent.push_secondary(scheduler, node_id);
modified = true;
}
while self.intent.secondary.len() > 1 {
// We have no particular preference for one secondary location over another: just
// arbitrarily drop from the end
self.intent.pop_secondary(scheduler);
modified = true;
}
}
Detached => {
// Never add locations in this mode
if self.intent.get_attached().is_some() || !self.intent.get_secondary().is_empty() {
self.intent.clear(scheduler);
modified = true;
}
}
}
if modified {
self.sequence.0 += 1;
}
Ok(())
}
/// Query whether the tenant's observed state for attached node matches its intent state, and if so,
/// yield the node ID. This is appropriate for emitting compute hook notifications: we are checking that
/// the node in question is not only where we intend to attach, but that the tenant is indeed already attached there.
///
/// Reconciliation may still be needed for other aspects of state such as secondaries (see [`Self::dirty`]): this
/// funciton should not be used to decide whether to reconcile.
pub(crate) fn stably_attached(&self) -> Option<NodeId> {
if let Some(attach_intent) = self.intent.attached {
match self.observed.locations.get(&attach_intent) {
Some(loc) => match &loc.conf {
Some(conf) => match conf.mode {
LocationConfigMode::AttachedMulti
| LocationConfigMode::AttachedSingle
| LocationConfigMode::AttachedStale => {
// Our intent and observed state agree that this node is in an attached state.
Some(attach_intent)
}
// Our observed config is not an attached state
_ => None,
},
// Our observed state is None, i.e. in flux
None => None,
},
// We have no observed state for this node
None => None,
}
} else {
// Our intent is not to attach
None
}
}
fn dirty(&self, nodes: &Arc<HashMap<NodeId, Node>>) -> bool {
let mut dirty_nodes = HashSet::new();
if let Some(node_id) = self.intent.attached {
// Maybe panic: it is a severe bug if we try to attach while generation is null.
let generation = self
.generation
.expect("Attempted to enter attached state without a generation");
let wanted_conf = attached_location_conf(
generation,
&self.shard,
&self.config,
!self.intent.secondary.is_empty(),
);
match self.observed.locations.get(&node_id) {
Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
Some(_) | None => {
dirty_nodes.insert(node_id);
}
}
}
for node_id in &self.intent.secondary {
let wanted_conf = secondary_location_conf(&self.shard, &self.config);
match self.observed.locations.get(node_id) {
Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
Some(_) | None => {
dirty_nodes.insert(*node_id);
}
}
}
for node_id in self.observed.locations.keys() {
if self.intent.attached != Some(*node_id) && !self.intent.secondary.contains(node_id) {
// We have observed state that isn't part of our intent: need to clean it up.
dirty_nodes.insert(*node_id);
}
}
dirty_nodes.retain(|node_id| {
nodes
.get(node_id)
.map(|n| n.is_available())
.unwrap_or(false)
});
!dirty_nodes.is_empty()
}
#[allow(clippy::too_many_arguments)]
#[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
pub(crate) fn maybe_reconcile(
&mut self,
result_tx: &tokio::sync::mpsc::UnboundedSender<ReconcileResult>,
pageservers: &Arc<HashMap<NodeId, Node>>,
compute_hook: &Arc<ComputeHook>,
service_config: &service::Config,
persistence: &Arc<Persistence>,
gate: &Gate,
cancel: &CancellationToken,
) -> Option<ReconcilerWaiter> {
// If there are any ambiguous observed states, and the nodes they refer to are available,
// we should reconcile to clean them up.
let mut dirty_observed = false;
for (node_id, observed_loc) in &self.observed.locations {
let node = pageservers
.get(node_id)
.expect("Nodes may not be removed while referenced");
if observed_loc.conf.is_none() && node.is_available() {
dirty_observed = true;
break;
}
}
let active_nodes_dirty = self.dirty(pageservers);
// Even if there is no pageserver work to be done, if we have a pending notification to computes,
// wake up a reconciler to send it.
let do_reconcile =
active_nodes_dirty || dirty_observed || self.pending_compute_notification;
if !do_reconcile {
tracing::info!("Not dirty, no reconciliation needed.");
return None;
}
// If we are currently splitting, then never start a reconciler task: the splitting logic
// requires that shards are not interfered with while it runs. Do this check here rather than
// up top, so that we only log this message if we would otherwise have done a reconciliation.
if !matches!(self.splitting, SplitState::Idle) {
tracing::info!("Refusing to reconcile, splitting in progress");
return None;
}
// Reconcile already in flight for the current sequence?
if let Some(handle) = &self.reconciler {
if handle.sequence == self.sequence {
tracing::info!(
"Reconciliation already in progress for sequence {:?}",
self.sequence,
);
return Some(ReconcilerWaiter {
tenant_shard_id: self.tenant_shard_id,
seq_wait: self.waiter.clone(),
error_seq_wait: self.error_waiter.clone(),
error: self.last_error.clone(),
seq: self.sequence,
});
}
}
// Build list of nodes from which the reconciler should detach
let mut detach = Vec::new();
for node_id in self.observed.locations.keys() {
if self.intent.get_attached() != &Some(*node_id)
&& !self.intent.secondary.contains(node_id)
{
detach.push(
pageservers
.get(node_id)
.expect("Intent references non-existent pageserver")
.clone(),
)
}
}
// Reconcile in flight for a stale sequence? Our sequence's task will wait for it before
// doing our sequence's work.
let old_handle = self.reconciler.take();
let Ok(gate_guard) = gate.enter() else {
// Shutting down, don't start a reconciler
return None;
};
// Advance the sequence before spawning a reconciler, so that sequence waiters
// can distinguish between before+after the reconcile completes.
self.sequence = self.sequence.next();
let reconciler_cancel = cancel.child_token();
let reconciler_intent = TargetState::from_intent(pageservers, &self.intent);
let mut reconciler = Reconciler {
tenant_shard_id: self.tenant_shard_id,
shard: self.shard,
generation: self.generation,
intent: reconciler_intent,
detach,
config: self.config.clone(),
observed: self.observed.clone(),
compute_hook: compute_hook.clone(),
service_config: service_config.clone(),
_gate_guard: gate_guard,
cancel: reconciler_cancel.clone(),
persistence: persistence.clone(),
compute_notify_failure: false,
};
let reconcile_seq = self.sequence;
tracing::info!(seq=%reconcile_seq, "Spawning Reconciler for sequence {}", self.sequence);
let must_notify = self.pending_compute_notification;
let reconciler_span = tracing::info_span!(parent: None, "reconciler", seq=%reconcile_seq,
tenant_id=%reconciler.tenant_shard_id.tenant_id,
shard_id=%reconciler.tenant_shard_id.shard_slug());
metrics::METRICS_REGISTRY
.metrics_group
.storage_controller_reconcile_spawn
.inc();
let result_tx = result_tx.clone();
let join_handle = tokio::task::spawn(
async move {
// Wait for any previous reconcile task to complete before we start
if let Some(old_handle) = old_handle {
old_handle.cancel.cancel();
if let Err(e) = old_handle.handle.await {
// We can't do much with this other than log it: the task is done, so
// we may proceed with our work.
tracing::error!("Unexpected join error waiting for reconcile task: {e}");
}
}
// Early check for cancellation before doing any work
// TODO: wrap all remote API operations in cancellation check
// as well.
if reconciler.cancel.is_cancelled() {
metrics::METRICS_REGISTRY
.metrics_group
.storage_controller_reconcile_complete
.inc(ReconcileCompleteLabelGroup {
status: ReconcileOutcome::Cancel,
});
return;
}
// Attempt to make observed state match intent state
let result = reconciler.reconcile().await;
// If we know we had a pending compute notification from some previous action, send a notification irrespective
// of whether the above reconcile() did any work
if result.is_ok() && must_notify {
// If this fails we will send the need to retry in [`ReconcileResult::pending_compute_notification`]
reconciler.compute_notify().await.ok();
}
// Update result counter
let outcome_label = match &result {
Ok(_) => ReconcileOutcome::Success,
Err(ReconcileError::Cancel) => ReconcileOutcome::Cancel,
Err(_) => ReconcileOutcome::Error,
};
metrics::METRICS_REGISTRY
.metrics_group
.storage_controller_reconcile_complete
.inc(ReconcileCompleteLabelGroup {
status: outcome_label,
});
result_tx
.send(ReconcileResult {
sequence: reconcile_seq,
result,
tenant_shard_id: reconciler.tenant_shard_id,
generation: reconciler.generation,
observed: reconciler.observed,
pending_compute_notification: reconciler.compute_notify_failure,
})
.ok();
}
.instrument(reconciler_span),
);
self.reconciler = Some(ReconcilerHandle {
sequence: self.sequence,
handle: join_handle,
cancel: reconciler_cancel,
});
Some(ReconcilerWaiter {
tenant_shard_id: self.tenant_shard_id,
seq_wait: self.waiter.clone(),
error_seq_wait: self.error_waiter.clone(),
error: self.last_error.clone(),
seq: self.sequence,
})
}
/// Called when a ReconcileResult has been emitted and the service is updating
/// our state: if the result is from a sequence >= my ReconcileHandle, then drop
/// the handle to indicate there is no longer a reconciliation in progress.
pub(crate) fn reconcile_complete(&mut self, sequence: Sequence) {
if let Some(reconcile_handle) = &self.reconciler {
if reconcile_handle.sequence <= sequence {
self.reconciler = None;
}
}
}
// If we had any state at all referring to this node ID, drop it. Does not
// attempt to reschedule.
pub(crate) fn deref_node(&mut self, node_id: NodeId) {
if self.intent.attached == Some(node_id) {
self.intent.attached = None;
}
self.intent.secondary.retain(|n| n != &node_id);
self.observed.locations.remove(&node_id);
debug_assert!(!self.intent.all_pageservers().contains(&node_id));
}
pub(crate) fn to_persistent(&self) -> TenantShardPersistence {
TenantShardPersistence {
tenant_id: self.tenant_shard_id.tenant_id.to_string(),
shard_number: self.tenant_shard_id.shard_number.0 as i32,
shard_count: self.tenant_shard_id.shard_count.literal() as i32,
shard_stripe_size: self.shard.stripe_size.0 as i32,
generation: self.generation.map(|g| g.into().unwrap_or(0) as i32),
generation_pageserver: self.intent.get_attached().map(|n| n.0 as i64),
placement_policy: serde_json::to_string(&self.policy).unwrap(),
config: serde_json::to_string(&self.config).unwrap(),
splitting: SplitState::default(),
}
}
}
#[cfg(test)]
pub(crate) mod tests {
use pageserver_api::{
controller_api::NodeAvailability,
shard::{ShardCount, ShardNumber},
};
use utils::id::TenantId;
use crate::scheduler::test_utils::make_test_nodes;
use super::*;
fn make_test_tenant_shard(policy: PlacementPolicy) -> TenantState {
let tenant_id = TenantId::generate();
let shard_number = ShardNumber(0);
let shard_count = ShardCount::new(1);
let tenant_shard_id = TenantShardId {
tenant_id,
shard_number,
shard_count,
};
TenantState::new(
tenant_shard_id,
ShardIdentity::new(
shard_number,
shard_count,
pageserver_api::shard::ShardStripeSize(32768),
)
.unwrap(),
policy,
)
}
/// Test the scheduling behaviors used when a tenant configured for HA is subject
/// to nodes being marked offline.
#[test]
fn tenant_ha_scheduling() -> anyhow::Result<()> {
// Start with three nodes. Our tenant will only use two. The third one is
// expected to remain unused.
let mut nodes = make_test_nodes(3);
let mut scheduler = Scheduler::new(nodes.values());
let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
tenant_state
.schedule(&mut scheduler)
.expect("we have enough nodes, scheduling should work");
// Expect to initially be schedule on to different nodes
assert_eq!(tenant_state.intent.secondary.len(), 1);
assert!(tenant_state.intent.attached.is_some());
let attached_node_id = tenant_state.intent.attached.unwrap();
let secondary_node_id = *tenant_state.intent.secondary.iter().last().unwrap();
assert_ne!(attached_node_id, secondary_node_id);
// Notifying the attached node is offline should demote it to a secondary
let changed = tenant_state.intent.demote_attached(attached_node_id);
assert!(changed);
assert!(tenant_state.intent.attached.is_none());
assert_eq!(tenant_state.intent.secondary.len(), 2);
// Update the scheduler state to indicate the node is offline
nodes
.get_mut(&attached_node_id)
.unwrap()
.set_availability(NodeAvailability::Offline);
scheduler.node_upsert(nodes.get(&attached_node_id).unwrap());
// Scheduling the node should promote the still-available secondary node to attached
tenant_state
.schedule(&mut scheduler)
.expect("active nodes are available");
assert_eq!(tenant_state.intent.attached.unwrap(), secondary_node_id);
// The original attached node should have been retained as a secondary
assert_eq!(
*tenant_state.intent.secondary.iter().last().unwrap(),
attached_node_id
);
tenant_state.intent.clear(&mut scheduler);
Ok(())
}
#[test]
fn intent_from_observed() -> anyhow::Result<()> {
let nodes = make_test_nodes(3);
let mut scheduler = Scheduler::new(nodes.values());
let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
tenant_state.observed.locations.insert(
NodeId(3),
ObservedStateLocation {
conf: Some(LocationConfig {
mode: LocationConfigMode::AttachedMulti,
generation: Some(2),
secondary_conf: None,
shard_number: tenant_state.shard.number.0,
shard_count: tenant_state.shard.count.literal(),
shard_stripe_size: tenant_state.shard.stripe_size.0,
tenant_conf: TenantConfig::default(),
}),
},
);
tenant_state.observed.locations.insert(
NodeId(2),
ObservedStateLocation {
conf: Some(LocationConfig {
mode: LocationConfigMode::AttachedStale,
generation: Some(1),
secondary_conf: None,
shard_number: tenant_state.shard.number.0,
shard_count: tenant_state.shard.count.literal(),
shard_stripe_size: tenant_state.shard.stripe_size.0,
tenant_conf: TenantConfig::default(),
}),
},
);
tenant_state.intent_from_observed(&mut scheduler);
// The highest generationed attached location gets used as attached
assert_eq!(tenant_state.intent.attached, Some(NodeId(3)));
// Other locations get used as secondary
assert_eq!(tenant_state.intent.secondary, vec![NodeId(2)]);
scheduler.consistency_check(nodes.values(), [&tenant_state].into_iter())?;
tenant_state.intent.clear(&mut scheduler);
Ok(())
}
}

View File

@@ -36,11 +36,11 @@ use utils::pid_file::{self, PidFileRead};
// it's waiting. If the process hasn't started/stopped after 5 seconds, // it's waiting. If the process hasn't started/stopped after 5 seconds,
// it prints a notice that it's taking long, but keeps waiting. // it prints a notice that it's taking long, but keeps waiting.
// //
const STOP_RETRY_TIMEOUT: Duration = Duration::from_secs(10); const RETRY_UNTIL_SECS: u64 = 10;
const STOP_RETRIES: u128 = STOP_RETRY_TIMEOUT.as_millis() / RETRY_INTERVAL.as_millis(); const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
const RETRY_INTERVAL: Duration = Duration::from_millis(100); const RETRY_INTERVAL_MILLIS: u64 = 100;
const DOT_EVERY_RETRIES: u128 = 10; const DOT_EVERY_RETRIES: u64 = 10;
const NOTICE_AFTER_RETRIES: u128 = 50; const NOTICE_AFTER_RETRIES: u64 = 50;
/// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates /// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
/// it itself. /// it itself.
@@ -52,7 +52,6 @@ pub enum InitialPidFile {
} }
/// Start a background child process using the parameters given. /// Start a background child process using the parameters given.
#[allow(clippy::too_many_arguments)]
pub async fn start_process<F, Fut, AI, A, EI>( pub async fn start_process<F, Fut, AI, A, EI>(
process_name: &str, process_name: &str,
datadir: &Path, datadir: &Path,
@@ -60,7 +59,6 @@ pub async fn start_process<F, Fut, AI, A, EI>(
args: AI, args: AI,
envs: EI, envs: EI,
initial_pid_file: InitialPidFile, initial_pid_file: InitialPidFile,
retry_timeout: &Duration,
process_status_check: F, process_status_check: F,
) -> anyhow::Result<()> ) -> anyhow::Result<()>
where where
@@ -71,10 +69,6 @@ where
// Not generic AsRef<OsStr>, otherwise empty `envs` prevents type inference // Not generic AsRef<OsStr>, otherwise empty `envs` prevents type inference
EI: IntoIterator<Item = (String, String)>, EI: IntoIterator<Item = (String, String)>,
{ {
let retries: u128 = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
if !datadir.metadata().context("stat datadir")?.is_dir() {
anyhow::bail!("`datadir` must be a directory when calling this function: {datadir:?}");
}
let log_path = datadir.join(format!("{process_name}.log")); let log_path = datadir.join(format!("{process_name}.log"));
let process_log_file = fs::OpenOptions::new() let process_log_file = fs::OpenOptions::new()
.create(true) .create(true)
@@ -91,17 +85,8 @@ where
let background_command = command let background_command = command
.stdout(process_log_file) .stdout(process_log_file)
.stderr(same_file_for_stderr) .stderr(same_file_for_stderr)
.args(args) .args(args);
// spawn all child processes in their datadir, useful for all kinds of things, let filled_cmd = fill_remote_storage_secrets_vars(fill_rust_env_vars(background_command));
// not least cleaning up child processes e.g. after an unclean exit from the test suite:
// ```
// lsof -d cwd -a +D Users/cs/src/neon/test_output
// ```
.current_dir(datadir);
let filled_cmd = fill_env_vars_prefixed_neon(fill_remote_storage_secrets_vars(
fill_rust_env_vars(background_command),
));
filled_cmd.envs(envs); filled_cmd.envs(envs);
let pid_file_to_check = match &initial_pid_file { let pid_file_to_check = match &initial_pid_file {
@@ -133,7 +118,7 @@ where
.unwrap(); .unwrap();
}); });
for retries in 0..retries { for retries in 0..RETRIES {
match process_started(pid, pid_file_to_check, &process_status_check).await { match process_started(pid, pid_file_to_check, &process_status_check).await {
Ok(true) => { Ok(true) => {
println!("\n{process_name} started and passed status check, pid: {pid}"); println!("\n{process_name} started and passed status check, pid: {pid}");
@@ -151,7 +136,7 @@ where
print!("."); print!(".");
io::stdout().flush().unwrap(); io::stdout().flush().unwrap();
} }
thread::sleep(RETRY_INTERVAL); thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
} }
Err(e) => { Err(e) => {
println!("error starting process {process_name:?}: {e:#}"); println!("error starting process {process_name:?}: {e:#}");
@@ -160,10 +145,9 @@ where
} }
} }
println!(); println!();
anyhow::bail!(format!( anyhow::bail!(
"{} did not start+pass status checks within {:?} seconds", "{process_name} did not start+pass status checks within {RETRY_UNTIL_SECS} seconds"
process_name, retry_timeout );
));
} }
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running. /// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
@@ -219,7 +203,7 @@ pub fn stop_process(
} }
pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> { pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
for retries in 0..STOP_RETRIES { for retries in 0..RETRIES {
match process_has_stopped(pid) { match process_has_stopped(pid) {
Ok(true) => { Ok(true) => {
println!("\n{process_name} stopped"); println!("\n{process_name} stopped");
@@ -235,7 +219,7 @@ pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
print!("."); print!(".");
io::stdout().flush().unwrap(); io::stdout().flush().unwrap();
} }
thread::sleep(RETRY_INTERVAL); thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
} }
Err(e) => { Err(e) => {
println!("{process_name} with pid {pid} failed to stop: {e:#}"); println!("{process_name} with pid {pid} failed to stop: {e:#}");
@@ -244,10 +228,7 @@ pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
} }
} }
println!(); println!();
anyhow::bail!(format!( anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
"{} with pid {} did not stop in {:?} seconds",
process_name, pid, STOP_RETRY_TIMEOUT
));
} }
fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command { fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
@@ -287,15 +268,6 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
cmd cmd
} }
fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {
for (var, val) in std::env::vars() {
if var.starts_with("NEON_") {
cmd = cmd.env(var, val);
}
}
cmd
}
/// Add a `pre_exec` to the cmd that, inbetween fork() and exec(), /// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
/// 1. Claims a pidfile with a fcntl lock on it and /// 1. Claims a pidfile with a fcntl lock on it and
/// 2. Sets up the pidfile's file descriptor so that it (and the lock) /// 2. Sets up the pidfile's file descriptor so that it (and the lock)
@@ -379,7 +351,7 @@ where
} }
} }
pub(crate) fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> { fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
match kill(pid, None) { match kill(pid, None) {
// Process exists, keep waiting // Process exists, keep waiting
Ok(_) => Ok(false), Ok(_) => Ok(false),

View File

@@ -9,25 +9,22 @@ use anyhow::{anyhow, bail, Context, Result};
use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum}; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
use compute_api::spec::ComputeMode; use compute_api::spec::ComputeMode;
use control_plane::endpoint::ComputeControlPlane; use control_plane::endpoint::ComputeControlPlane;
use control_plane::local_env::{ use control_plane::local_env::{InitForceMode, LocalEnv};
InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf, NeonLocalInitPageserverConf, use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
SafekeeperConf,
};
use control_plane::pageserver::PageServerNode;
use control_plane::safekeeper::SafekeeperNode; use control_plane::safekeeper::SafekeeperNode;
use control_plane::storage_controller::{ use control_plane::storage_controller::StorageController;
NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
};
use control_plane::{broker, local_env}; use control_plane::{broker, local_env};
use pageserver_api::config::{ use pageserver_api::controller_api::{
NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy, PlacementPolicy,
};
use pageserver_api::models::{
ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo,
};
use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
use pageserver_api::{
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT, DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT, DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
}; };
use pageserver_api::controller_api::{
NodeAvailabilityWrapper, PlacementPolicy, TenantCreateRequest,
};
use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInfo};
use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
use postgres_backend::AuthType; use postgres_backend::AuthType;
use postgres_connection::parse_host_port; use postgres_connection::parse_host_port;
use safekeeper_api::{ use safekeeper_api::{
@@ -38,7 +35,6 @@ use std::collections::{BTreeSet, HashMap};
use std::path::PathBuf; use std::path::PathBuf;
use std::process::exit; use std::process::exit;
use std::str::FromStr; use std::str::FromStr;
use std::time::Duration;
use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR; use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
use url::Host; use url::Host;
use utils::{ use utils::{
@@ -54,10 +50,48 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
const DEFAULT_BRANCH_NAME: &str = "main"; const DEFAULT_BRANCH_NAME: &str = "main";
project_git_version!(GIT_VERSION); project_git_version!(GIT_VERSION);
const DEFAULT_PG_VERSION: &str = "16"; const DEFAULT_PG_VERSION: &str = "15";
const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/"; const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
fn default_conf(num_pageservers: u16) -> String {
let mut template = format!(
r#"
# Default built-in configuration, defined in main.rs
control_plane_api = '{DEFAULT_PAGESERVER_CONTROL_PLANE_API}'
[broker]
listen_addr = '{DEFAULT_BROKER_ADDR}'
[[safekeepers]]
id = {DEFAULT_SAFEKEEPER_ID}
pg_port = {DEFAULT_SAFEKEEPER_PG_PORT}
http_port = {DEFAULT_SAFEKEEPER_HTTP_PORT}
"#,
);
for i in 0..num_pageservers {
let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
template += &format!(
r#"
[[pageservers]]
id = {pageserver_id}
listen_pg_addr = '127.0.0.1:{pg_port}'
listen_http_addr = '127.0.0.1:{http_port}'
pg_auth_type = '{trust_auth}'
http_auth_type = '{trust_auth}'
"#,
trust_auth = AuthType::Trust,
)
}
template
}
/// ///
/// Timelines tree element used as a value in the HashMap. /// Timelines tree element used as a value in the HashMap.
/// ///
@@ -90,8 +124,7 @@ fn main() -> Result<()> {
handle_init(sub_args).map(Some) handle_init(sub_args).map(Some)
} else { } else {
// all other commands need an existing config // all other commands need an existing config
let mut env = let mut env = LocalEnv::load_config().context("Error loading config")?;
LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
let original_env = env.clone(); let original_env = env.clone();
let rt = tokio::runtime::Builder::new_current_thread() let rt = tokio::runtime::Builder::new_current_thread()
@@ -102,7 +135,7 @@ fn main() -> Result<()> {
let subcommand_result = match sub_name { let subcommand_result = match sub_name {
"tenant" => rt.block_on(handle_tenant(sub_args, &mut env)), "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
"timeline" => rt.block_on(handle_timeline(sub_args, &mut env)), "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
"start" => rt.block_on(handle_start_all(&env, get_start_timeout(sub_args))), "start" => rt.block_on(handle_start_all(sub_args, &env)),
"stop" => rt.block_on(handle_stop_all(sub_args, &env)), "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
"pageserver" => rt.block_on(handle_pageserver(sub_args, &env)), "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
"storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)), "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
@@ -121,7 +154,7 @@ fn main() -> Result<()> {
}; };
match subcommand_result { match subcommand_result {
Ok(Some(updated_env)) => updated_env.persist_config()?, Ok(Some(updated_env)) => updated_env.persist_config(&updated_env.base_data_dir)?,
Ok(None) => (), Ok(None) => (),
Err(e) => { Err(e) => {
eprintln!("command failed: {e:?}"); eprintln!("command failed: {e:?}");
@@ -310,66 +343,48 @@ fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId
} }
fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> { fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
let num_pageservers = init_match.get_one::<u16>("num-pageservers"); let num_pageservers = init_match
.get_one::<u16>("num-pageservers")
let force = init_match.get_one("force").expect("we set a default value"); .expect("num-pageservers arg has a default");
// Create config file
// Create the in-memory `LocalEnv` that we'd normally load from disk in `load_config`. let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
let init_conf: NeonLocalInitConf = if let Some(config_path) =
init_match.get_one::<PathBuf>("config")
{
// User (likely the Python test suite) provided a description of the environment.
if num_pageservers.is_some() {
bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
}
// load and parse the file // load and parse the file
let contents = std::fs::read_to_string(config_path).with_context(|| { std::fs::read_to_string(config_path).with_context(|| {
format!( format!(
"Could not read configuration file '{}'", "Could not read configuration file '{}'",
config_path.display() config_path.display()
) )
})?; })?
toml_edit::de::from_str(&contents)?
} else { } else {
// User (likely interactive) did not provide a description of the environment, give them the default // Built-in default config
NeonLocalInitConf { default_conf(*num_pageservers)
control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
broker: NeonBroker {
listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
},
safekeepers: vec![SafekeeperConf {
id: DEFAULT_SAFEKEEPER_ID,
pg_port: DEFAULT_SAFEKEEPER_PG_PORT,
http_port: DEFAULT_SAFEKEEPER_HTTP_PORT,
..Default::default()
}],
pageservers: (0..num_pageservers.copied().unwrap_or(1))
.map(|i| {
let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
NeonLocalInitPageserverConf {
id: pageserver_id,
listen_pg_addr: format!("127.0.0.1:{pg_port}"),
listen_http_addr: format!("127.0.0.1:{http_port}"),
pg_auth_type: AuthType::Trust,
http_auth_type: AuthType::Trust,
other: Default::default(),
}
})
.collect(),
pg_distrib_dir: None,
neon_distrib_dir: None,
default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
storage_controller: None,
control_plane_compute_hook_api: None,
}
}; };
LocalEnv::init(init_conf, force) let pg_version = init_match
.context("materialize initial neon_local environment on disk")?; .get_one::<u32>("pg-version")
Ok(LocalEnv::load_config(&local_env::base_path()) .copied()
.expect("freshly written config should be loadable")) .context("Failed to parse postgres version from the argument string")?;
let mut env =
LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
let force = init_match.get_one("force").expect("we set a default value");
env.init(pg_version, force)
.context("Failed to initialize neon repository")?;
// Create remote storage location for default LocalFs remote storage
std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
// Initialize pageserver, create initial tenant and timeline.
for ps_conf in &env.pageservers {
PageServerNode::from_env(&env, ps_conf)
.initialize(&pageserver_config_overrides(init_match))
.unwrap_or_else(|e| {
eprintln!("pageserver init failed: {e:?}");
exit(1);
});
}
Ok(env)
} }
/// The default pageserver is the one where CLI tenant/timeline operations are sent by default. /// The default pageserver is the one where CLI tenant/timeline operations are sent by default.
@@ -384,6 +399,15 @@ fn get_default_pageserver(env: &local_env::LocalEnv) -> PageServerNode {
PageServerNode::from_env(env, ps_conf) PageServerNode::from_env(env, ps_conf)
} }
fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
init_match
.get_many::<String>("pageserver-config-override")
.into_iter()
.flatten()
.map(String::as_str)
.collect()
}
async fn handle_tenant( async fn handle_tenant(
tenant_match: &ArgMatches, tenant_match: &ArgMatches,
env: &mut local_env::LocalEnv, env: &mut local_env::LocalEnv,
@@ -395,54 +419,6 @@ async fn handle_tenant(
println!("{} {:?}", t.id, t.state); println!("{} {:?}", t.id, t.state);
} }
} }
Some(("import", import_match)) => {
let tenant_id = parse_tenant_id(import_match)?.unwrap_or_else(TenantId::generate);
let storage_controller = StorageController::from_env(env);
let create_response = storage_controller.tenant_import(tenant_id).await?;
let shard_zero = create_response
.shards
.first()
.expect("Import response omitted shards");
let attached_pageserver_id = shard_zero.node_id;
let pageserver =
PageServerNode::from_env(env, env.get_pageserver_conf(attached_pageserver_id)?);
println!(
"Imported tenant {tenant_id}, attached to pageserver {attached_pageserver_id}"
);
let timelines = pageserver
.http_client
.list_timelines(shard_zero.shard_id)
.await?;
// Pick a 'main' timeline that has no ancestors, the rest will get arbitrary names
let main_timeline = timelines
.iter()
.find(|t| t.ancestor_timeline_id.is_none())
.expect("No timelines found")
.timeline_id;
let mut branch_i = 0;
for timeline in timelines.iter() {
let branch_name = if timeline.timeline_id == main_timeline {
"main".to_string()
} else {
branch_i += 1;
format!("branch_{branch_i}")
};
println!(
"Importing timeline {tenant_id}/{} as branch {branch_name}",
timeline.timeline_id
);
env.register_branch_mapping(branch_name, tenant_id, timeline.timeline_id)?;
}
}
Some(("create", create_match)) => { Some(("create", create_match)) => {
let tenant_conf: HashMap<_, _> = create_match let tenant_conf: HashMap<_, _> = create_match
.get_many::<String>("config") .get_many::<String>("config")
@@ -602,9 +578,13 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
Some(("import", import_match)) => { Some(("import", import_match)) => {
let tenant_id = get_tenant_id(import_match, env)?; let tenant_id = get_tenant_id(import_match, env)?;
let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided"); let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
let branch_name = import_match let name = import_match
.get_one::<String>("branch-name") .get_one::<String>("node-name")
.ok_or_else(|| anyhow!("No branch name provided"))?; .ok_or_else(|| anyhow!("No node name provided"))?;
let update_catalog = import_match
.get_one::<bool>("update-catalog")
.cloned()
.unwrap_or_default();
// Parse base inputs // Parse base inputs
let base_tarfile = import_match let base_tarfile = import_match
@@ -631,11 +611,24 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
.copied() .copied()
.context("Failed to parse postgres version from the argument string")?; .context("Failed to parse postgres version from the argument string")?;
let mut cplane = ComputeControlPlane::load(env.clone())?;
println!("Importing timeline into pageserver ..."); println!("Importing timeline into pageserver ...");
pageserver pageserver
.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version) .timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)
.await?; .await?;
env.register_branch_mapping(branch_name.to_string(), tenant_id, timeline_id)?; env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
println!("Creating endpoint for imported timeline ...");
cplane.new_endpoint(
name,
tenant_id,
timeline_id,
None,
None,
pg_version,
ComputeMode::Primary,
!update_catalog,
)?;
println!("Done"); println!("Done");
} }
Some(("branch", branch_match)) => { Some(("branch", branch_match)) => {
@@ -798,8 +791,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
.copied() .copied()
.unwrap_or(false); .unwrap_or(false);
let allow_multiple = sub_args.get_flag("allow-multiple");
let mode = match (lsn, hot_standby) { let mode = match (lsn, hot_standby) {
(Some(lsn), false) => ComputeMode::Static(lsn), (Some(lsn), false) => ComputeMode::Static(lsn),
(None, true) => ComputeMode::Replica, (None, true) => ComputeMode::Replica,
@@ -817,9 +808,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
_ => {} _ => {}
} }
if !allow_multiple { cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
}
cplane.new_endpoint( cplane.new_endpoint(
&endpoint_id, &endpoint_id,
@@ -848,15 +837,20 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
let remote_ext_config = sub_args.get_one::<String>("remote-ext-config"); let remote_ext_config = sub_args.get_one::<String>("remote-ext-config");
let allow_multiple = sub_args.get_flag("allow-multiple"); // If --safekeepers argument is given, use only the listed safekeeper nodes.
let safekeepers =
// If --safekeepers argument is given, use only the listed if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
// safekeeper nodes; otherwise all from the env. let mut safekeepers: Vec<NodeId> = Vec::new();
let safekeepers = if let Some(safekeepers) = parse_safekeepers(sub_args)? { for sk_id in safekeepers_str.split(',').map(str::trim) {
safekeepers let sk_id = NodeId(u64::from_str(sk_id).map_err(|_| {
} else { anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list")
env.safekeepers.iter().map(|sk| sk.id).collect() })?);
}; safekeepers.push(sk_id);
}
safekeepers
} else {
env.safekeepers.iter().map(|sk| sk.id).collect()
};
let endpoint = cplane let endpoint = cplane
.endpoints .endpoints
@@ -868,13 +862,11 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
.cloned() .cloned()
.unwrap_or_default(); .unwrap_or_default();
if !allow_multiple { cplane.check_conflicting_endpoints(
cplane.check_conflicting_endpoints( endpoint.mode,
endpoint.mode, endpoint.tenant_id,
endpoint.tenant_id, endpoint.timeline_id,
endpoint.timeline_id, )?;
)?;
}
let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id { let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
let conf = env.get_pageserver_conf(pageserver_id).unwrap(); let conf = env.get_pageserver_conf(pageserver_id).unwrap();
@@ -960,10 +952,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
}) })
.collect::<Vec<_>>() .collect::<Vec<_>>()
}; };
// If --safekeepers argument is given, use only the listed endpoint.reconfigure(pageservers, None).await?;
// safekeeper nodes; otherwise all from the env.
let safekeepers = parse_safekeepers(sub_args)?;
endpoint.reconfigure(pageservers, None, safekeepers).await?;
} }
"stop" => { "stop" => {
let endpoint_id = sub_args let endpoint_id = sub_args
@@ -985,23 +974,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
Ok(()) Ok(())
} }
/// Parse --safekeepers as list of safekeeper ids.
fn parse_safekeepers(sub_args: &ArgMatches) -> Result<Option<Vec<NodeId>>> {
if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
let mut safekeepers: Vec<NodeId> = Vec::new();
for sk_id in safekeepers_str.split(',').map(str::trim) {
let sk_id = NodeId(
u64::from_str(sk_id)
.map_err(|_| anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list"))?,
);
safekeepers.push(sk_id);
}
Ok(Some(safekeepers))
} else {
Ok(None)
}
}
fn handle_mappings(sub_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> { fn handle_mappings(sub_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
let (sub_name, sub_args) = match sub_match.subcommand() { let (sub_name, sub_args) = match sub_match.subcommand() {
Some(ep_subcommand_data) => ep_subcommand_data, Some(ep_subcommand_data) => ep_subcommand_data,
@@ -1047,48 +1019,11 @@ fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageSe
)) ))
} }
fn get_start_timeout(args: &ArgMatches) -> &Duration {
let humantime_duration = args
.get_one::<humantime::Duration>("start-timeout")
.expect("invalid value for start-timeout");
humantime_duration.as_ref()
}
fn storage_controller_start_args(args: &ArgMatches) -> NeonStorageControllerStartArgs {
let maybe_instance_id = args.get_one::<u8>("instance-id");
let base_port = args.get_one::<u16>("base-port");
if maybe_instance_id.is_some() && base_port.is_none() {
panic!("storage-controller start specificied instance-id but did not provide base-port");
}
let start_timeout = args
.get_one::<humantime::Duration>("start-timeout")
.expect("invalid value for start-timeout");
NeonStorageControllerStartArgs {
instance_id: maybe_instance_id.copied().unwrap_or(1),
base_port: base_port.copied(),
start_timeout: *start_timeout,
}
}
fn storage_controller_stop_args(args: &ArgMatches) -> NeonStorageControllerStopArgs {
let maybe_instance_id = args.get_one::<u8>("instance-id");
let immediate = args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
NeonStorageControllerStopArgs {
instance_id: maybe_instance_id.copied().unwrap_or(1),
immediate,
}
}
async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
match sub_match.subcommand() { match sub_match.subcommand() {
Some(("start", subcommand_args)) => { Some(("start", subcommand_args)) => {
if let Err(e) = get_pageserver(env, subcommand_args)? if let Err(e) = get_pageserver(env, subcommand_args)?
.start(get_start_timeout(subcommand_args)) .start(&pageserver_config_overrides(subcommand_args))
.await .await
{ {
eprintln!("pageserver start failed: {e}"); eprintln!("pageserver start failed: {e}");
@@ -1116,12 +1051,30 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
exit(1); exit(1);
} }
if let Err(e) = pageserver.start(get_start_timeout(sub_match)).await { if let Err(e) = pageserver
.start(&pageserver_config_overrides(subcommand_args))
.await
{
eprintln!("pageserver start failed: {e}"); eprintln!("pageserver start failed: {e}");
exit(1); exit(1);
} }
} }
Some(("set-state", subcommand_args)) => {
let pageserver = get_pageserver(env, subcommand_args)?;
let scheduling = subcommand_args.get_one("scheduling");
let availability = subcommand_args.get_one("availability");
let storage_controller = StorageController::from_env(env);
storage_controller
.node_configure(NodeConfigureRequest {
node_id: pageserver.conf.id,
scheduling: scheduling.cloned(),
availability: availability.cloned(),
})
.await?;
}
Some(("status", subcommand_args)) => { Some(("status", subcommand_args)) => {
match get_pageserver(env, subcommand_args)?.check_status().await { match get_pageserver(env, subcommand_args)?.check_status().await {
Ok(_) => println!("Page server is up and running"), Ok(_) => println!("Page server is up and running"),
@@ -1144,15 +1097,20 @@ async fn handle_storage_controller(
) -> Result<()> { ) -> Result<()> {
let svc = StorageController::from_env(env); let svc = StorageController::from_env(env);
match sub_match.subcommand() { match sub_match.subcommand() {
Some(("start", start_match)) => { Some(("start", _start_match)) => {
if let Err(e) = svc.start(storage_controller_start_args(start_match)).await { if let Err(e) = svc.start().await {
eprintln!("start failed: {e}"); eprintln!("start failed: {e}");
exit(1); exit(1);
} }
} }
Some(("stop", stop_match)) => { Some(("stop", stop_match)) => {
if let Err(e) = svc.stop(storage_controller_stop_args(stop_match)).await { let immediate = stop_match
.get_one::<String>("stop-mode")
.map(|s| s.as_str())
== Some("immediate");
if let Err(e) = svc.stop(immediate).await {
eprintln!("stop failed: {}", e); eprintln!("stop failed: {}", e);
exit(1); exit(1);
} }
@@ -1199,10 +1157,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
"start" => { "start" => {
let extra_opts = safekeeper_extra_opts(sub_args); let extra_opts = safekeeper_extra_opts(sub_args);
if let Err(e) = safekeeper if let Err(e) = safekeeper.start(extra_opts).await {
.start(extra_opts, get_start_timeout(sub_args))
.await
{
eprintln!("safekeeper start failed: {}", e); eprintln!("safekeeper start failed: {}", e);
exit(1); exit(1);
} }
@@ -1228,10 +1183,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
} }
let extra_opts = safekeeper_extra_opts(sub_args); let extra_opts = safekeeper_extra_opts(sub_args);
if let Err(e) = safekeeper if let Err(e) = safekeeper.start(extra_opts).await {
.start(extra_opts, get_start_timeout(sub_args))
.await
{
eprintln!("safekeeper start failed: {}", e); eprintln!("safekeeper start failed: {}", e);
exit(1); exit(1);
} }
@@ -1244,23 +1196,15 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
Ok(()) Ok(())
} }
async fn handle_start_all( async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
env: &local_env::LocalEnv,
retry_timeout: &Duration,
) -> anyhow::Result<()> {
// Endpoints are not started automatically // Endpoints are not started automatically
broker::start_broker_process(env, retry_timeout).await?; broker::start_broker_process(env).await?;
// Only start the storage controller if the pageserver is configured to need it // Only start the storage controller if the pageserver is configured to need it
if env.control_plane_api.is_some() { if env.control_plane_api.is_some() {
let storage_controller = StorageController::from_env(env); let storage_controller = StorageController::from_env(env);
if let Err(e) = storage_controller if let Err(e) = storage_controller.start().await {
.start(NeonStorageControllerStartArgs::with_default_instance_id(
(*retry_timeout).into(),
))
.await
{
eprintln!("storage_controller start failed: {:#}", e); eprintln!("storage_controller start failed: {:#}", e);
try_stop_all(env, true).await; try_stop_all(env, true).await;
exit(1); exit(1);
@@ -1269,7 +1213,10 @@ async fn handle_start_all(
for ps_conf in &env.pageservers { for ps_conf in &env.pageservers {
let pageserver = PageServerNode::from_env(env, ps_conf); let pageserver = PageServerNode::from_env(env, ps_conf);
if let Err(e) = pageserver.start(retry_timeout).await { if let Err(e) = pageserver
.start(&pageserver_config_overrides(sub_match))
.await
{
eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e); eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
try_stop_all(env, true).await; try_stop_all(env, true).await;
exit(1); exit(1);
@@ -1278,76 +1225,15 @@ async fn handle_start_all(
for node in env.safekeepers.iter() { for node in env.safekeepers.iter() {
let safekeeper = SafekeeperNode::from_env(env, node); let safekeeper = SafekeeperNode::from_env(env, node);
if let Err(e) = safekeeper.start(vec![], retry_timeout).await { if let Err(e) = safekeeper.start(vec![]).await {
eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e); eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e);
try_stop_all(env, false).await; try_stop_all(env, false).await;
exit(1); exit(1);
} }
} }
neon_start_status_check(env, retry_timeout).await?;
Ok(()) Ok(())
} }
async fn neon_start_status_check(
env: &local_env::LocalEnv,
retry_timeout: &Duration,
) -> anyhow::Result<()> {
const RETRY_INTERVAL: Duration = Duration::from_millis(100);
const NOTICE_AFTER_RETRIES: Duration = Duration::from_secs(5);
if env.control_plane_api.is_none() {
return Ok(());
}
let storcon = StorageController::from_env(env);
let retries = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
let notice_after_retries = retry_timeout.as_millis() / NOTICE_AFTER_RETRIES.as_millis();
println!("\nRunning neon status check");
for retry in 0..retries {
if retry == notice_after_retries {
println!("\nNeon status check has not passed yet, continuing to wait")
}
let mut passed = true;
let mut nodes = storcon.node_list().await?;
let mut pageservers = env.pageservers.clone();
if nodes.len() != pageservers.len() {
continue;
}
nodes.sort_by_key(|ps| ps.id);
pageservers.sort_by_key(|ps| ps.id);
for (idx, pageserver) in pageservers.iter().enumerate() {
let node = &nodes[idx];
if node.id != pageserver.id {
passed = false;
break;
}
if !matches!(node.availability, NodeAvailabilityWrapper::Active) {
passed = false;
break;
}
}
if passed {
println!("\nNeon started and passed status check");
return Ok(());
}
tokio::time::sleep(RETRY_INTERVAL).await;
}
anyhow::bail!("\nNeon passed status check")
}
async fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { async fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
let immediate = let immediate =
sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate"); sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
@@ -1362,7 +1248,7 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
match ComputeControlPlane::load(env.clone()) { match ComputeControlPlane::load(env.clone()) {
Ok(cplane) => { Ok(cplane) => {
for (_k, node) in cplane.endpoints { for (_k, node) in cplane.endpoints {
if let Err(e) = node.stop(if immediate { "immediate" } else { "fast" }, false) { if let Err(e) = node.stop(if immediate { "immediate" } else { "fast " }, false) {
eprintln!("postgres stop failed: {e:#}"); eprintln!("postgres stop failed: {e:#}");
} }
} }
@@ -1390,35 +1276,15 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
eprintln!("neon broker stop failed: {e:#}"); eprintln!("neon broker stop failed: {e:#}");
} }
// Stop all storage controller instances. In the most common case there's only one, if env.control_plane_api.is_some() {
// but iterate though the base data directory in order to discover the instances.
let storcon_instances = env
.storage_controller_instances()
.await
.expect("Must inspect data dir");
for (instance_id, _instance_dir_path) in storcon_instances {
let storage_controller = StorageController::from_env(env); let storage_controller = StorageController::from_env(env);
let stop_args = NeonStorageControllerStopArgs { if let Err(e) = storage_controller.stop(immediate).await {
instance_id, eprintln!("storage controller stop failed: {e:#}");
immediate,
};
if let Err(e) = storage_controller.stop(stop_args).await {
eprintln!("Storage controller instance {instance_id} stop failed: {e:#}");
} }
} }
} }
fn cli() -> Command { fn cli() -> Command {
let timeout_arg = Arg::new("start-timeout")
.long("start-timeout")
.short('t')
.global(true)
.help("timeout until we fail the command, e.g. 30s")
.value_parser(value_parser!(humantime::Duration))
.default_value("10s")
.required(false);
let branch_name_arg = Arg::new("branch-name") let branch_name_arg = Arg::new("branch-name")
.long("branch-name") .long("branch-name")
.help("Name of the branch to be created or used as an alias for other services") .help("Name of the branch to be created or used as an alias for other services")
@@ -1491,6 +1357,13 @@ fn cli() -> Command {
.required(false) .required(false)
.value_name("stop-mode"); .value_name("stop-mode");
let pageserver_config_args = Arg::new("pageserver-config-override")
.long("pageserver-config-override")
.num_args(1)
.action(ArgAction::Append)
.help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
.required(false);
let remote_ext_config_args = Arg::new("remote-ext-config") let remote_ext_config_args = Arg::new("remote-ext-config")
.long("remote-ext-config") .long("remote-ext-config")
.num_args(1) .num_args(1)
@@ -1524,7 +1397,9 @@ fn cli() -> Command {
let num_pageservers_arg = Arg::new("num-pageservers") let num_pageservers_arg = Arg::new("num-pageservers")
.value_parser(value_parser!(u16)) .value_parser(value_parser!(u16))
.long("num-pageservers") .long("num-pageservers")
.help("How many pageservers to create (default 1)"); .help("How many pageservers to create (default 1)")
.required(false)
.default_value("1");
let update_catalog = Arg::new("update-catalog") let update_catalog = Arg::new("update-catalog")
.value_parser(value_parser!(bool)) .value_parser(value_parser!(bool))
@@ -1538,37 +1413,20 @@ fn cli() -> Command {
.help("If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`") .help("If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`")
.required(false); .required(false);
let allow_multiple = Arg::new("allow-multiple")
.help("Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests.")
.long("allow-multiple")
.action(ArgAction::SetTrue)
.required(false);
let instance_id = Arg::new("instance-id")
.long("instance-id")
.help("Identifier used to distinguish storage controller instances (default 1)")
.value_parser(value_parser!(u8))
.required(false);
let base_port = Arg::new("base-port")
.long("base-port")
.help("Base port for the storage controller instance idenfified by instance-id (defaults to pagserver cplane api)")
.value_parser(value_parser!(u16))
.required(false);
Command::new("Neon CLI") Command::new("Neon CLI")
.arg_required_else_help(true) .arg_required_else_help(true)
.version(GIT_VERSION) .version(GIT_VERSION)
.subcommand( .subcommand(
Command::new("init") Command::new("init")
.about("Initialize a new Neon repository, preparing configs for services to start with") .about("Initialize a new Neon repository, preparing configs for services to start with")
.arg(pageserver_config_args.clone())
.arg(num_pageservers_arg.clone()) .arg(num_pageservers_arg.clone())
.arg( .arg(
Arg::new("config") Arg::new("config")
.long("config") .long("config")
.required(false) .required(false)
.value_parser(value_parser!(PathBuf)) .value_parser(value_parser!(PathBuf))
.value_name("config") .value_name("config"),
) )
.arg(pg_version_arg.clone()) .arg(pg_version_arg.clone())
.arg(force_arg) .arg(force_arg)
@@ -1576,7 +1434,6 @@ fn cli() -> Command {
.subcommand( .subcommand(
Command::new("timeline") Command::new("timeline")
.about("Manage timelines") .about("Manage timelines")
.arg_required_else_help(true)
.subcommand(Command::new("list") .subcommand(Command::new("list")
.about("List all timelines, available to this pageserver") .about("List all timelines, available to this pageserver")
.arg(tenant_id_arg.clone())) .arg(tenant_id_arg.clone()))
@@ -1599,7 +1456,8 @@ fn cli() -> Command {
.about("Import timeline from basebackup directory") .about("Import timeline from basebackup directory")
.arg(tenant_id_arg.clone()) .arg(tenant_id_arg.clone())
.arg(timeline_id_arg.clone()) .arg(timeline_id_arg.clone())
.arg(branch_name_arg.clone()) .arg(Arg::new("node-name").long("node-name")
.help("Name to assign to the imported timeline"))
.arg(Arg::new("base-tarfile") .arg(Arg::new("base-tarfile")
.long("base-tarfile") .long("base-tarfile")
.value_parser(value_parser!(PathBuf)) .value_parser(value_parser!(PathBuf))
@@ -1615,6 +1473,7 @@ fn cli() -> Command {
.arg(Arg::new("end-lsn").long("end-lsn") .arg(Arg::new("end-lsn").long("end-lsn")
.help("Lsn the basebackup ends at")) .help("Lsn the basebackup ends at"))
.arg(pg_version_arg.clone()) .arg(pg_version_arg.clone())
.arg(update_catalog.clone())
) )
).subcommand( ).subcommand(
Command::new("tenant") Command::new("tenant")
@@ -1637,8 +1496,6 @@ fn cli() -> Command {
.subcommand(Command::new("config") .subcommand(Command::new("config")
.arg(tenant_id_arg.clone()) .arg(tenant_id_arg.clone())
.arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))) .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false)))
.subcommand(Command::new("import").arg(tenant_id_arg.clone().required(true))
.about("Import a tenant that is present in remote storage, and create branches for its timelines"))
) )
.subcommand( .subcommand(
Command::new("pageserver") Command::new("pageserver")
@@ -1648,7 +1505,7 @@ fn cli() -> Command {
.subcommand(Command::new("status")) .subcommand(Command::new("status"))
.subcommand(Command::new("start") .subcommand(Command::new("start")
.about("Start local pageserver") .about("Start local pageserver")
.arg(timeout_arg.clone()) .arg(pageserver_config_args.clone())
) )
.subcommand(Command::new("stop") .subcommand(Command::new("stop")
.about("Stop local pageserver") .about("Stop local pageserver")
@@ -1656,20 +1513,22 @@ fn cli() -> Command {
) )
.subcommand(Command::new("restart") .subcommand(Command::new("restart")
.about("Restart local pageserver") .about("Restart local pageserver")
.arg(timeout_arg.clone()) .arg(pageserver_config_args.clone())
)
.subcommand(Command::new("set-state")
.arg(Arg::new("availability").value_parser(value_parser!(NodeAvailability)).long("availability").action(ArgAction::Set).help("Availability state: offline,active"))
.arg(Arg::new("scheduling").value_parser(value_parser!(NodeSchedulingPolicy)).long("scheduling").action(ArgAction::Set).help("Scheduling state: draining,pause,filling,active"))
.about("Set scheduling or availability state of pageserver node")
.arg(pageserver_config_args.clone())
) )
) )
.subcommand( .subcommand(
Command::new("storage_controller") Command::new("storage_controller")
.arg_required_else_help(true) .arg_required_else_help(true)
.about("Manage storage_controller") .about("Manage storage_controller")
.subcommand(Command::new("start").about("Start storage controller") .subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
.arg(timeout_arg.clone()) .subcommand(Command::new("stop").about("Stop local pageserver")
.arg(instance_id.clone()) .arg(stop_mode_arg.clone()))
.arg(base_port))
.subcommand(Command::new("stop").about("Stop storage controller")
.arg(stop_mode_arg.clone())
.arg(instance_id))
) )
.subcommand( .subcommand(
Command::new("safekeeper") Command::new("safekeeper")
@@ -1679,7 +1538,6 @@ fn cli() -> Command {
.about("Start local safekeeper") .about("Start local safekeeper")
.arg(safekeeper_id_arg.clone()) .arg(safekeeper_id_arg.clone())
.arg(safekeeper_extra_opt_arg.clone()) .arg(safekeeper_extra_opt_arg.clone())
.arg(timeout_arg.clone())
) )
.subcommand(Command::new("stop") .subcommand(Command::new("stop")
.about("Stop local safekeeper") .about("Stop local safekeeper")
@@ -1691,7 +1549,6 @@ fn cli() -> Command {
.arg(safekeeper_id_arg) .arg(safekeeper_id_arg)
.arg(stop_mode_arg.clone()) .arg(stop_mode_arg.clone())
.arg(safekeeper_extra_opt_arg) .arg(safekeeper_extra_opt_arg)
.arg(timeout_arg.clone())
) )
) )
.subcommand( .subcommand(
@@ -1716,22 +1573,18 @@ fn cli() -> Command {
.arg(pg_version_arg.clone()) .arg(pg_version_arg.clone())
.arg(hot_standby_arg.clone()) .arg(hot_standby_arg.clone())
.arg(update_catalog) .arg(update_catalog)
.arg(allow_multiple.clone())
) )
.subcommand(Command::new("start") .subcommand(Command::new("start")
.about("Start postgres.\n If the endpoint doesn't exist yet, it is created.") .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
.arg(endpoint_id_arg.clone()) .arg(endpoint_id_arg.clone())
.arg(endpoint_pageserver_id_arg.clone()) .arg(endpoint_pageserver_id_arg.clone())
.arg(safekeepers_arg.clone()) .arg(safekeepers_arg)
.arg(remote_ext_config_args) .arg(remote_ext_config_args)
.arg(create_test_user) .arg(create_test_user)
.arg(allow_multiple.clone())
.arg(timeout_arg.clone())
) )
.subcommand(Command::new("reconfigure") .subcommand(Command::new("reconfigure")
.about("Reconfigure the endpoint") .about("Reconfigure the endpoint")
.arg(endpoint_pageserver_id_arg) .arg(endpoint_pageserver_id_arg)
.arg(safekeepers_arg)
.arg(endpoint_id_arg.clone()) .arg(endpoint_id_arg.clone())
.arg(tenant_id_arg.clone()) .arg(tenant_id_arg.clone())
) )
@@ -1779,7 +1632,7 @@ fn cli() -> Command {
.subcommand( .subcommand(
Command::new("start") Command::new("start")
.about("Start page server and safekeepers") .about("Start page server and safekeepers")
.arg(timeout_arg.clone()) .arg(pageserver_config_args)
) )
.subcommand( .subcommand(
Command::new("stop") Command::new("stop")

View File

@@ -1,22 +1,17 @@
//! Code to manage the storage broker //! Code to manage the storage broker
//! //!
//! In the local test environment, the storage broker stores its data directly in //! In the local test environment, the data for each safekeeper is stored in
//! //!
//! ```text //! ```text
//! .neon //! .neon/safekeepers/<safekeeper id>
//! ``` //! ```
use std::time::Duration;
use anyhow::Context; use anyhow::Context;
use camino::Utf8PathBuf; use camino::Utf8PathBuf;
use crate::{background_process, local_env}; use crate::{background_process, local_env};
pub async fn start_broker_process( pub async fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
env: &local_env::LocalEnv,
retry_timeout: &Duration,
) -> anyhow::Result<()> {
let broker = &env.broker; let broker = &env.broker;
let listen_addr = &broker.listen_addr; let listen_addr = &broker.listen_addr;
@@ -32,7 +27,6 @@ pub async fn start_broker_process(
args, args,
[], [],
background_process::InitialPidFile::Create(storage_broker_pid_file_path(env)), background_process::InitialPidFile::Create(storage_broker_pid_file_path(env)),
retry_timeout,
|| async { || async {
let url = broker.client_url(); let url = broker.client_url();
let status_url = url.join("status").with_context(|| { let status_url = url.join("status").with_context(|| {

View File

@@ -499,23 +499,6 @@ impl Endpoint {
.join(",") .join(",")
} }
/// Map safekeepers ids to the actual connection strings.
fn build_safekeepers_connstrs(&self, sk_ids: Vec<NodeId>) -> Result<Vec<String>> {
let mut safekeeper_connstrings = Vec::new();
if self.mode == ComputeMode::Primary {
for sk_id in sk_ids {
let sk = self
.env
.safekeepers
.iter()
.find(|node| node.id == sk_id)
.ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.get_compute_port()));
}
}
Ok(safekeeper_connstrings)
}
pub async fn start( pub async fn start(
&self, &self,
auth_token: &Option<String>, auth_token: &Option<String>,
@@ -540,7 +523,18 @@ impl Endpoint {
let pageserver_connstring = Self::build_pageserver_connstr(&pageservers); let pageserver_connstring = Self::build_pageserver_connstr(&pageservers);
assert!(!pageserver_connstring.is_empty()); assert!(!pageserver_connstring.is_empty());
let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?; let mut safekeeper_connstrings = Vec::new();
if self.mode == ComputeMode::Primary {
for sk_id in safekeepers {
let sk = self
.env
.safekeepers
.iter()
.find(|node| node.id == sk_id)
.ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.get_compute_port()));
}
}
// check for file remote_extensions_spec.json // check for file remote_extensions_spec.json
// if it is present, read it and pass to compute_ctl // if it is present, read it and pass to compute_ctl
@@ -560,7 +554,6 @@ impl Endpoint {
format_version: 1.0, format_version: 1.0,
operation_uuid: None, operation_uuid: None,
features: self.features.clone(), features: self.features.clone(),
swap_size_bytes: None,
cluster: Cluster { cluster: Cluster {
cluster_id: None, // project ID: not used cluster_id: None, // project ID: not used
name: None, // project name: not used name: None, // project name: not used
@@ -598,6 +591,7 @@ impl Endpoint {
remote_extensions, remote_extensions,
pgbouncer_settings: None, pgbouncer_settings: None,
shard_stripe_size: Some(shard_stripe_size), shard_stripe_size: Some(shard_stripe_size),
primary_is_running: None,
}; };
let spec_path = self.endpoint_path().join("spec.json"); let spec_path = self.endpoint_path().join("spec.json");
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?; std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
@@ -746,7 +740,6 @@ impl Endpoint {
&self, &self,
mut pageservers: Vec<(Host, u16)>, mut pageservers: Vec<(Host, u16)>,
stripe_size: Option<ShardStripeSize>, stripe_size: Option<ShardStripeSize>,
safekeepers: Option<Vec<NodeId>>,
) -> Result<()> { ) -> Result<()> {
let mut spec: ComputeSpec = { let mut spec: ComputeSpec = {
let spec_path = self.endpoint_path().join("spec.json"); let spec_path = self.endpoint_path().join("spec.json");
@@ -781,12 +774,6 @@ impl Endpoint {
spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize); spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
} }
// If safekeepers are not specified, don't change them.
if let Some(safekeepers) = safekeepers {
let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
spec.safekeeper_connstrings = safekeeper_connstrings;
}
let client = reqwest::Client::builder() let client = reqwest::Client::builder()
.timeout(Duration::from_secs(30)) .timeout(Duration::from_secs(30))
.build() .build()
@@ -824,12 +811,11 @@ impl Endpoint {
// cleanup work to do after postgres stops, like syncing safekeepers, // cleanup work to do after postgres stops, like syncing safekeepers,
// etc. // etc.
// //
// If destroying or stop mode is immediate, send it SIGTERM before // If destroying, send it SIGTERM before waiting. Sometimes we do *not*
// waiting. Sometimes we do *not* want this cleanup: tests intentionally // want this cleanup: tests intentionally do stop when majority of
// do stop when majority of safekeepers is down, so sync-safekeepers // safekeepers is down, so sync-safekeepers would hang otherwise. This
// would hang otherwise. This could be a separate flag though. // could be a separate flag though.
let send_sigterm = destroy || mode == "immediate"; self.wait_for_compute_ctl_to_exit(destroy)?;
self.wait_for_compute_ctl_to_exit(send_sigterm)?;
if destroy { if destroy {
println!( println!(
"Destroying postgres data directory '{}'", "Destroying postgres data directory '{}'",

View File

@@ -3,7 +3,7 @@
//! Now it also provides init method which acts like a stub for proper installation //! Now it also provides init method which acts like a stub for proper installation
//! script which will use local paths. //! script which will use local paths.
use anyhow::{bail, Context}; use anyhow::{bail, ensure, Context};
use clap::ValueEnum; use clap::ValueEnum;
use postgres_backend::AuthType; use postgres_backend::AuthType;
@@ -17,17 +17,14 @@ use std::net::Ipv4Addr;
use std::net::SocketAddr; use std::net::SocketAddr;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::process::{Command, Stdio}; use std::process::{Command, Stdio};
use std::time::Duration;
use utils::{ use utils::{
auth::{encode_from_key_file, Claims}, auth::{encode_from_key_file, Claims},
id::{NodeId, TenantId, TenantTimelineId, TimelineId}, id::{NodeId, TenantId, TenantTimelineId, TimelineId},
}; };
use crate::pageserver::PageServerNode;
use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
use crate::safekeeper::SafekeeperNode; use crate::safekeeper::SafekeeperNode;
pub const DEFAULT_PG_VERSION: u32 = 16; pub const DEFAULT_PG_VERSION: u32 = 15;
// //
// This data structures represents neon_local CLI config // This data structures represents neon_local CLI config
@@ -36,107 +33,63 @@ pub const DEFAULT_PG_VERSION: u32 = 16;
// to 'neon_local init --config=<path>' option. See control_plane/simple.conf for // to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
// an example. // an example.
// //
#[derive(PartialEq, Eq, Clone, Debug)] #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
pub struct LocalEnv { pub struct LocalEnv {
// Base directory for all the nodes (the pageserver, safekeepers and // Base directory for all the nodes (the pageserver, safekeepers and
// compute endpoints). // compute endpoints).
// //
// This is not stored in the config file. Rather, this is the path where the // This is not stored in the config file. Rather, this is the path where the
// config file itself is. It is read from the NEON_REPO_DIR env variable which // config file itself is. It is read from the NEON_REPO_DIR env variable or
// must be an absolute path. If the env var is not set, $PWD/.neon is used. // '.neon' if not given.
#[serde(skip)]
pub base_data_dir: PathBuf, pub base_data_dir: PathBuf,
// Path to postgres distribution. It's expected that "bin", "include", // Path to postgres distribution. It's expected that "bin", "include",
// "lib", "share" from postgres distribution are there. If at some point // "lib", "share" from postgres distribution are there. If at some point
// in time we will be able to run against vanilla postgres we may split that // in time we will be able to run against vanilla postgres we may split that
// to four separate paths and match OS-specific installation layout. // to four separate paths and match OS-specific installation layout.
#[serde(default)]
pub pg_distrib_dir: PathBuf, pub pg_distrib_dir: PathBuf,
// Path to pageserver binary. // Path to pageserver binary.
#[serde(default)]
pub neon_distrib_dir: PathBuf, pub neon_distrib_dir: PathBuf,
// Default tenant ID to use with the 'neon_local' command line utility, when // Default tenant ID to use with the 'neon_local' command line utility, when
// --tenant_id is not explicitly specified. // --tenant_id is not explicitly specified.
#[serde(default)]
pub default_tenant_id: Option<TenantId>, pub default_tenant_id: Option<TenantId>,
// used to issue tokens during e.g pg start // used to issue tokens during e.g pg start
#[serde(default)]
pub private_key_path: PathBuf, pub private_key_path: PathBuf,
pub broker: NeonBroker, pub broker: NeonBroker,
// Configuration for the storage controller (1 per neon_local environment)
pub storage_controller: NeonStorageControllerConf,
/// This Vec must always contain at least one pageserver /// This Vec must always contain at least one pageserver
/// Populdated by [`Self::load_config`] from the individual `pageserver.toml`s.
/// NB: not used anymore except for informing users that they need to change their `.neon/config`.
pub pageservers: Vec<PageServerConf>, pub pageservers: Vec<PageServerConf>,
#[serde(default)]
pub safekeepers: Vec<SafekeeperConf>, pub safekeepers: Vec<SafekeeperConf>,
// Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will // Control plane upcall API for pageserver: if None, we will not run storage_controller If set, this will
// be propagated into each pageserver's configuration. // be propagated into each pageserver's configuration.
#[serde(default)]
pub control_plane_api: Option<Url>, pub control_plane_api: Option<Url>,
// Control plane upcall API for storage controller. If set, this will be propagated into the // Control plane upcall API for storage controller. If set, this will be propagated into the
// storage controller's configuration. // storage controller's configuration.
#[serde(default)]
pub control_plane_compute_hook_api: Option<Url>, pub control_plane_compute_hook_api: Option<Url>,
/// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user. /// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
#[serde(default)]
// A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here, // A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
// but deserialization into a generic toml object as `toml::Value::try_from` fails with an error. // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
// https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table". // https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
}
/// On-disk state stored in `.neon/config`.
#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
#[serde(default, deny_unknown_fields)]
pub struct OnDiskConfig {
pub pg_distrib_dir: PathBuf,
pub neon_distrib_dir: PathBuf,
pub default_tenant_id: Option<TenantId>,
pub private_key_path: PathBuf,
pub broker: NeonBroker,
pub storage_controller: NeonStorageControllerConf,
#[serde(
skip_serializing,
deserialize_with = "fail_if_pageservers_field_specified"
)]
pub pageservers: Vec<PageServerConf>,
pub safekeepers: Vec<SafekeeperConf>,
pub control_plane_api: Option<Url>,
pub control_plane_compute_hook_api: Option<Url>,
branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>, branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
} }
fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
where
D: serde::Deserializer<'de>,
{
Err(serde::de::Error::custom(
"The 'pageservers' field is no longer used; pageserver.toml is now authoritative; \
Please remove the `pageservers` from your .neon/config.",
))
}
/// The description of the neon_local env to be initialized by `neon_local init --config`.
#[derive(Clone, Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct NeonLocalInitConf {
// TODO: do we need this? Seems unused
pub pg_distrib_dir: Option<PathBuf>,
// TODO: do we need this? Seems unused
pub neon_distrib_dir: Option<PathBuf>,
pub default_tenant_id: TenantId,
pub broker: NeonBroker,
pub storage_controller: Option<NeonStorageControllerConf>,
pub pageservers: Vec<NeonLocalInitPageserverConf>,
pub safekeepers: Vec<SafekeeperConf>,
pub control_plane_api: Option<Option<Url>>,
pub control_plane_compute_hook_api: Option<Option<Url>>,
}
/// Broker config for cluster internal communication. /// Broker config for cluster internal communication.
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)] #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[serde(default)] #[serde(default)]
@@ -145,48 +98,6 @@ pub struct NeonBroker {
pub listen_addr: SocketAddr, pub listen_addr: SocketAddr,
} }
/// Broker config for cluster internal communication.
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[serde(default)]
pub struct NeonStorageControllerConf {
/// Heartbeat timeout before marking a node offline
#[serde(with = "humantime_serde")]
pub max_offline: Duration,
#[serde(with = "humantime_serde")]
pub max_warming_up: Duration,
pub start_as_candidate: bool,
/// Database url used when running multiple storage controller instances
pub database_url: Option<SocketAddr>,
/// Threshold for auto-splitting a tenant into shards
pub split_threshold: Option<u64>,
pub max_secondary_lag_bytes: Option<u64>,
}
impl NeonStorageControllerConf {
// Use a shorter pageserver unavailability interval than the default to speed up tests.
const DEFAULT_MAX_OFFLINE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);
const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);
}
impl Default for NeonStorageControllerConf {
fn default() -> Self {
Self {
max_offline: Self::DEFAULT_MAX_OFFLINE_INTERVAL,
max_warming_up: Self::DEFAULT_MAX_WARMING_UP_INTERVAL,
start_as_candidate: false,
database_url: None,
split_threshold: None,
max_secondary_lag_bytes: None,
}
}
}
// Dummy Default impl to satisfy Deserialize derive. // Dummy Default impl to satisfy Deserialize derive.
impl Default for NeonBroker { impl Default for NeonBroker {
fn default() -> Self { fn default() -> Self {
@@ -202,18 +113,22 @@ impl NeonBroker {
} }
} }
// neon_local needs to know this subset of pageserver configuration.
// For legacy reasons, this information is duplicated from `pageserver.toml` into `.neon/config`.
// It can get stale if `pageserver.toml` is changed.
// TODO(christian): don't store this at all in `.neon/config`, always load it from `pageserver.toml`
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)] #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[serde(default, deny_unknown_fields)] #[serde(default, deny_unknown_fields)]
pub struct PageServerConf { pub struct PageServerConf {
// node id
pub id: NodeId, pub id: NodeId,
// Pageserver connection settings
pub listen_pg_addr: String, pub listen_pg_addr: String,
pub listen_http_addr: String, pub listen_http_addr: String,
// auth type used for the PG and HTTP ports
pub pg_auth_type: AuthType, pub pg_auth_type: AuthType,
pub http_auth_type: AuthType, pub http_auth_type: AuthType,
pub(crate) virtual_file_io_engine: Option<String>,
pub(crate) get_vectored_impl: Option<String>,
} }
impl Default for PageServerConf { impl Default for PageServerConf {
@@ -224,40 +139,8 @@ impl Default for PageServerConf {
listen_http_addr: String::new(), listen_http_addr: String::new(),
pg_auth_type: AuthType::Trust, pg_auth_type: AuthType::Trust,
http_auth_type: AuthType::Trust, http_auth_type: AuthType::Trust,
} virtual_file_io_engine: None,
} get_vectored_impl: None,
}
/// The toml that can be passed to `neon_local init --config`.
/// This is a subset of the `pageserver.toml` configuration.
// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
pub struct NeonLocalInitPageserverConf {
pub id: NodeId,
pub listen_pg_addr: String,
pub listen_http_addr: String,
pub pg_auth_type: AuthType,
pub http_auth_type: AuthType,
#[serde(flatten)]
pub other: HashMap<String, toml::Value>,
}
impl From<&NeonLocalInitPageserverConf> for PageServerConf {
fn from(conf: &NeonLocalInitPageserverConf) -> Self {
let NeonLocalInitPageserverConf {
id,
listen_pg_addr,
listen_http_addr,
pg_auth_type,
http_auth_type,
other: _,
} = conf;
Self {
id: *id,
listen_pg_addr: listen_pg_addr.clone(),
listen_http_addr: listen_http_addr.clone(),
pg_auth_type: *pg_auth_type,
http_auth_type: *http_auth_type,
} }
} }
} }
@@ -273,7 +156,6 @@ pub struct SafekeeperConf {
pub remote_storage: Option<String>, pub remote_storage: Option<String>,
pub backup_threads: Option<u32>, pub backup_threads: Option<u32>,
pub auth_enabled: bool, pub auth_enabled: bool,
pub listen_addr: Option<String>,
} }
impl Default for SafekeeperConf { impl Default for SafekeeperConf {
@@ -287,7 +169,6 @@ impl Default for SafekeeperConf {
remote_storage: None, remote_storage: None,
backup_threads: None, backup_threads: None,
auth_enabled: false, auth_enabled: false,
listen_addr: None,
} }
} }
} }
@@ -340,16 +221,11 @@ impl LocalEnv {
} }
} }
pub fn pg_dir(&self, pg_version: u32, dir_name: &str) -> anyhow::Result<PathBuf> {
Ok(self.pg_distrib_dir(pg_version)?.join(dir_name))
}
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> { pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
self.pg_dir(pg_version, "bin") Ok(self.pg_distrib_dir(pg_version)?.join("bin"))
} }
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> { pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
self.pg_dir(pg_version, "lib") Ok(self.pg_distrib_dir(pg_version)?.join("lib"))
} }
pub fn pageserver_bin(&self) -> PathBuf { pub fn pageserver_bin(&self) -> PathBuf {
@@ -399,36 +275,6 @@ impl LocalEnv {
} }
} }
/// Inspect the base data directory and extract the instance id and instance directory path
/// for all storage controller instances
pub async fn storage_controller_instances(&self) -> std::io::Result<Vec<(u8, PathBuf)>> {
let mut instances = Vec::default();
let dir = std::fs::read_dir(self.base_data_dir.clone())?;
for dentry in dir {
let dentry = dentry?;
let is_dir = dentry.metadata()?.is_dir();
let filename = dentry.file_name().into_string().unwrap();
let parsed_instance_id = match filename.strip_prefix("storage_controller_") {
Some(suffix) => suffix.parse::<u8>().ok(),
None => None,
};
let is_instance_dir = is_dir && parsed_instance_id.is_some();
if !is_instance_dir {
continue;
}
instances.push((
parsed_instance_id.expect("Checked previously"),
dentry.path(),
));
}
Ok(instances)
}
pub fn register_branch_mapping( pub fn register_branch_mapping(
&mut self, &mut self,
branch_name: String, branch_name: String,
@@ -480,8 +326,44 @@ impl LocalEnv {
.collect() .collect()
} }
/// Construct `Self` from on-disk state. /// Create a LocalEnv from a config file.
pub fn load_config(repopath: &Path) -> anyhow::Result<Self> { ///
/// Unlike 'load_config', this function fills in any defaults that are missing
/// from the config file.
pub fn parse_config(toml: &str) -> anyhow::Result<Self> {
let mut env: LocalEnv = toml::from_str(toml)?;
// Find postgres binaries.
// Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
// Note that later in the code we assume, that distrib dirs follow the same pattern
// for all postgres versions.
if env.pg_distrib_dir == Path::new("") {
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
env.pg_distrib_dir = postgres_bin.into();
} else {
let cwd = env::current_dir()?;
env.pg_distrib_dir = cwd.join("pg_install")
}
}
// Find neon binaries.
if env.neon_distrib_dir == Path::new("") {
env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
}
if env.pageservers.is_empty() {
anyhow::bail!("Configuration must contain at least one pageserver");
}
env.base_data_dir = base_path();
Ok(env)
}
/// Locate and load config
pub fn load_config() -> anyhow::Result<Self> {
let repopath = base_path();
if !repopath.exists() { if !repopath.exists() {
bail!( bail!(
"Neon config is not found in {}. You need to run 'neon_local init' first", "Neon config is not found in {}. You need to run 'neon_local init' first",
@@ -492,140 +374,38 @@ impl LocalEnv {
// TODO: check that it looks like a neon repository // TODO: check that it looks like a neon repository
// load and parse file // load and parse file
let config_file_contents = fs::read_to_string(repopath.join("config"))?; let config = fs::read_to_string(repopath.join("config"))?;
let on_disk_config: OnDiskConfig = toml::from_str(config_file_contents.as_str())?; let mut env: LocalEnv = toml::from_str(config.as_str())?;
let mut env = {
let OnDiskConfig {
pg_distrib_dir,
neon_distrib_dir,
default_tenant_id,
private_key_path,
broker,
storage_controller,
pageservers,
safekeepers,
control_plane_api,
control_plane_compute_hook_api,
branch_name_mappings,
} = on_disk_config;
LocalEnv {
base_data_dir: repopath.to_owned(),
pg_distrib_dir,
neon_distrib_dir,
default_tenant_id,
private_key_path,
broker,
storage_controller,
pageservers,
safekeepers,
control_plane_api,
control_plane_compute_hook_api,
branch_name_mappings,
}
};
// The source of truth for pageserver configuration is the pageserver.toml. env.base_data_dir = repopath;
assert!(
env.pageservers.is_empty(),
"we ensure this during deserialization"
);
env.pageservers = {
let iter = std::fs::read_dir(repopath).context("open dir")?;
let mut pageservers = Vec::new();
for res in iter {
let dentry = res?;
const PREFIX: &str = "pageserver_";
let dentry_name = dentry
.file_name()
.into_string()
.ok()
.with_context(|| format!("non-utf8 dentry: {:?}", dentry.path()))
.unwrap();
if !dentry_name.starts_with(PREFIX) {
continue;
}
if !dentry.file_type().context("determine file type")?.is_dir() {
anyhow::bail!("expected a directory, got {:?}", dentry.path());
}
let id = dentry_name[PREFIX.len()..]
.parse::<NodeId>()
.with_context(|| format!("parse id from {:?}", dentry.path()))?;
// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
#[derive(serde::Serialize, serde::Deserialize)]
// (allow unknown fields, unlike PageServerConf)
struct PageserverConfigTomlSubset {
listen_pg_addr: String,
listen_http_addr: String,
pg_auth_type: AuthType,
http_auth_type: AuthType,
}
let config_toml_path = dentry.path().join("pageserver.toml");
let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(
&std::fs::read_to_string(&config_toml_path)
.with_context(|| format!("read {:?}", config_toml_path))?,
)
.context("parse pageserver.toml")?;
let identity_toml_path = dentry.path().join("identity.toml");
#[derive(serde::Serialize, serde::Deserialize)]
struct IdentityTomlSubset {
id: NodeId,
}
let identity_toml: IdentityTomlSubset = toml_edit::de::from_str(
&std::fs::read_to_string(&identity_toml_path)
.with_context(|| format!("read {:?}", identity_toml_path))?,
)
.context("parse identity.toml")?;
let PageserverConfigTomlSubset {
listen_pg_addr,
listen_http_addr,
pg_auth_type,
http_auth_type,
} = config_toml;
let IdentityTomlSubset {
id: identity_toml_id,
} = identity_toml;
let conf = PageServerConf {
id: {
anyhow::ensure!(
identity_toml_id == id,
"id mismatch: identity.toml:id={identity_toml_id} pageserver_(.*) id={id}",
);
id
},
listen_pg_addr,
listen_http_addr,
pg_auth_type,
http_auth_type,
};
pageservers.push(conf);
}
pageservers
};
Ok(env) Ok(env)
} }
pub fn persist_config(&self) -> anyhow::Result<()> { pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
Self::persist_config_impl( // Currently, the user first passes a config file with 'neon_local init --config=<path>'
&self.base_data_dir, // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
&OnDiskConfig { // to .neon/config. TODO: We lose any formatting and comments along the way, which is
pg_distrib_dir: self.pg_distrib_dir.clone(), // a bit sad.
neon_distrib_dir: self.neon_distrib_dir.clone(), let mut conf_content = r#"# This file describes a local deployment of the page server
default_tenant_id: self.default_tenant_id, # and safekeeeper node. It is read by the 'neon_local' command-line
private_key_path: self.private_key_path.clone(), # utility.
broker: self.broker.clone(), "#
storage_controller: self.storage_controller.clone(), .to_string();
pageservers: vec![], // it's skip_serializing anyway
safekeepers: self.safekeepers.clone(), // Convert the LocalEnv to a toml file.
control_plane_api: self.control_plane_api.clone(), //
control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(), // This could be as simple as this:
branch_name_mappings: self.branch_name_mappings.clone(), //
}, // conf_content += &toml::to_string_pretty(env)?;
) //
} // But it results in a "values must be emitted before tables". I'm not sure
// why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
// Maybe rust reorders the fields to squeeze avoid padding or something?
// In any case, converting to toml::Value first, and serializing that, works.
// See https://github.com/alexcrichton/toml-rs/issues/142
conf_content += &toml::to_string_pretty(&toml::Value::try_from(self)?)?;
pub fn persist_config_impl(base_path: &Path, config: &OnDiskConfig) -> anyhow::Result<()> {
let conf_content = &toml::to_string_pretty(config)?;
let target_config_path = base_path.join("config"); let target_config_path = base_path.join("config");
fs::write(&target_config_path, conf_content).with_context(|| { fs::write(&target_config_path, conf_content).with_context(|| {
format!( format!(
@@ -650,13 +430,17 @@ impl LocalEnv {
} }
} }
/// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`]. //
pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> { // Initialize a new Neon repository
let base_path = base_path(); //
assert_ne!(base_path, Path::new("")); pub fn init(&mut self, pg_version: u32, force: &InitForceMode) -> anyhow::Result<()> {
let base_path = &base_path; // check if config already exists
let base_path = &self.base_data_dir;
ensure!(
base_path != Path::new(""),
"repository base path is missing"
);
// create base_path dir
if base_path.exists() { if base_path.exists() {
match force { match force {
InitForceMode::MustNotExist => { InitForceMode::MustNotExist => {
@@ -688,115 +472,74 @@ impl LocalEnv {
} }
} }
} }
if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
bail!(
"Can't find postgres binary at {}",
self.pg_bin_dir(pg_version)?.display()
);
}
for binary in ["pageserver", "safekeeper"] {
if !self.neon_distrib_dir.join(binary).exists() {
bail!(
"Can't find binary '{binary}' in neon distrib dir '{}'",
self.neon_distrib_dir.display()
);
}
}
if !base_path.exists() { if !base_path.exists() {
fs::create_dir(base_path)?; fs::create_dir(base_path)?;
} }
let NeonLocalInitConf {
pg_distrib_dir,
neon_distrib_dir,
default_tenant_id,
broker,
storage_controller,
pageservers,
safekeepers,
control_plane_api,
control_plane_compute_hook_api,
} = conf;
// Find postgres binaries.
// Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
// Note that later in the code we assume, that distrib dirs follow the same pattern
// for all postgres versions.
let pg_distrib_dir = pg_distrib_dir.unwrap_or_else(|| {
if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
postgres_bin.into()
} else {
let cwd = env::current_dir().unwrap();
cwd.join("pg_install")
}
});
// Find neon binaries.
let neon_distrib_dir = neon_distrib_dir
.unwrap_or_else(|| env::current_exe().unwrap().parent().unwrap().to_owned());
// Generate keypair for JWT. // Generate keypair for JWT.
// //
// The keypair is only needed if authentication is enabled in any of the // The keypair is only needed if authentication is enabled in any of the
// components. For convenience, we generate the keypair even if authentication // components. For convenience, we generate the keypair even if authentication
// is not enabled, so that you can easily enable it after the initialization // is not enabled, so that you can easily enable it after the initialization
// step. // step. However, if the key generation fails, we treat it as non-fatal if
generate_auth_keys( // authentication was not enabled.
base_path.join("auth_private_key.pem").as_path(), if self.private_key_path == PathBuf::new() {
base_path.join("auth_public_key.pem").as_path(), match generate_auth_keys(
) base_path.join("auth_private_key.pem").as_path(),
.context("generate auth keys")?; base_path.join("auth_public_key.pem").as_path(),
let private_key_path = PathBuf::from("auth_private_key.pem"); ) {
Ok(()) => {
// create the runtime type because the remaining initialization code below needs self.private_key_path = PathBuf::from("auth_private_key.pem");
// a LocalEnv instance op operation }
// TODO: refactor to avoid this, LocalEnv should only be constructed from on-disk state Err(e) => {
let env = LocalEnv { if !self.auth_keys_needed() {
base_data_dir: base_path.clone(), eprintln!("Could not generate keypair for JWT authentication: {e}");
pg_distrib_dir, eprintln!("Continuing anyway because authentication was not enabled");
neon_distrib_dir, self.private_key_path = PathBuf::from("auth_private_key.pem");
default_tenant_id: Some(default_tenant_id), } else {
private_key_path, return Err(e);
broker, }
storage_controller: storage_controller.unwrap_or_default(), }
pageservers: pageservers.iter().map(Into::into).collect(), }
safekeepers,
control_plane_api: control_plane_api.unwrap_or_default(),
control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
branch_name_mappings: Default::default(),
};
// create endpoints dir
fs::create_dir_all(env.endpoints_path())?;
// create safekeeper dirs
for safekeeper in &env.safekeepers {
fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
} }
// initialize pageserver state fs::create_dir_all(self.endpoints_path())?;
for (i, ps) in pageservers.into_iter().enumerate() {
let runtime_ps = &env.pageservers[i]; for safekeeper in &self.safekeepers {
assert_eq!(&PageServerConf::from(&ps), runtime_ps); fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
fs::create_dir(env.pageserver_data_dir(ps.id))?;
PageServerNode::from_env(&env, runtime_ps)
.initialize(ps)
.context("pageserver init failed")?;
} }
// setup remote remote location for default LocalFs remote storage self.persist_config(base_path)
std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?; }
env.persist_config() fn auth_keys_needed(&self) -> bool {
self.pageservers.iter().any(|ps| {
ps.pg_auth_type == AuthType::NeonJWT || ps.http_auth_type == AuthType::NeonJWT
}) || self.safekeepers.iter().any(|sk| sk.auth_enabled)
} }
} }
pub fn base_path() -> PathBuf { fn base_path() -> PathBuf {
let path = match std::env::var_os("NEON_REPO_DIR") { match std::env::var_os("NEON_REPO_DIR") {
Some(val) => { Some(val) => PathBuf::from(val),
let path = PathBuf::from(val); None => PathBuf::from(".neon"),
if !path.is_absolute() { }
// repeat the env var in the error because our default is always absolute
panic!("NEON_REPO_DIR must be an absolute path, got {path:?}");
}
path
}
None => {
let pwd = std::env::current_dir()
// technically this can fail but it's quite unlikeley
.expect("determine current directory");
let pwd_abs = pwd.canonicalize().expect("canonicalize current directory");
pwd_abs.join(".neon")
}
};
assert!(path.is_absolute());
path
} }
/// Generate a public/private key pair for JWT authentication /// Generate a public/private key pair for JWT authentication
@@ -835,3 +578,31 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
} }
Ok(()) Ok(())
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple_conf_parsing() {
let simple_conf_toml = include_str!("../simple.conf");
let simple_conf_parse_result = LocalEnv::parse_config(simple_conf_toml);
assert!(
simple_conf_parse_result.is_ok(),
"failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
);
let string_to_replace = "listen_addr = '127.0.0.1:50051'";
let spoiled_url_str = "listen_addr = '!@$XOXO%^&'";
let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
assert!(
spoiled_url_toml.contains(spoiled_url_str),
"Failed to replace string {string_to_replace} in the toml file {simple_conf_toml}"
);
let spoiled_url_parse_result = LocalEnv::parse_config(&spoiled_url_toml);
assert!(
spoiled_url_parse_result.is_err(),
"expected toml with invalid Url {spoiled_url_toml} to fail the parsing, but got {spoiled_url_parse_result:?}"
);
}
}

Some files were not shown because too many files have changed in this diff Show More