Compare commits

..

1 Commits

Author SHA1 Message Date
Vlad Lazar
11a97bbfd6 pageserver: handle rel drops correctly in rel size cache 2025-05-14 17:09:01 +02:00
585 changed files with 11197 additions and 29627 deletions

View File

@@ -4,7 +4,6 @@
!Cargo.lock
!Cargo.toml
!Makefile
!postgres.mk
!rust-toolchain.toml
!scripts/ninstall.sh
!docker-compose/run-tests.sh

View File

@@ -49,6 +49,10 @@ inputs:
description: 'A JSON object with project settings'
required: false
default: '{}'
default_endpoint_settings:
description: 'A JSON object with the default endpoint settings'
required: false
default: '{}'
outputs:
dsn:
@@ -135,6 +139,21 @@ runs:
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
-d "{\"scheduling\": \"Essential\"}"
fi
# XXX
# This is a workaround for the default endpoint settings, which currently do not allow some settings in the public API.
# https://github.com/neondatabase/cloud/issues/27108
if [[ -n ${DEFAULT_ENDPOINT_SETTINGS} && ${DEFAULT_ENDPOINT_SETTINGS} != "{}" ]] ; then
PROJECT_DATA=$(curl -X GET \
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/projects/${project_id}" \
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
-d "{\"scheduling\": \"Essential\"}"
)
NEW_DEFAULT_ENDPOINT_SETTINGS=$(echo ${PROJECT_DATA} | jq -rc ".project.default_endpoint_settings + ${DEFAULT_ENDPOINT_SETTINGS}")
curl -X POST --fail \
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/projects/${project_id}/default_endpoint_settings" \
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
--data "${NEW_DEFAULT_ENDPOINT_SETTINGS}"
fi
env:
@@ -152,3 +171,4 @@ runs:
PSQL: ${{ inputs.psql_path }}
LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
PROJECT_SETTINGS: ${{ inputs.project_settings }}
DEFAULT_ENDPOINT_SETTINGS: ${{ inputs.default_endpoint_settings }}

View File

@@ -38,11 +38,6 @@ on:
required: false
default: 1
type: number
rerun-failed:
description: 'rerun failed tests to ignore flaky tests'
required: false
default: true
type: boolean
defaults:
run:
@@ -104,10 +99,11 @@ jobs:
# Set some environment variables used by all the steps.
#
# CARGO_FLAGS is extra options to pass to all "cargo" subcommands.
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
# It also includes --features, if any
#
# CARGO_PROFILE is passed to "cargo build", "cargo test" etc, but not to
# "cargo metadata", because it doesn't accept --release or --debug options.
# CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
# because "cargo metadata" doesn't accept --release or --debug options
#
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
# corresponding Cargo.toml files for their descriptions.
@@ -116,16 +112,16 @@ jobs:
ARCH: ${{ inputs.arch }}
SANITIZERS: ${{ inputs.sanitizers }}
run: |
CARGO_FLAGS="--locked --features testing"
CARGO_FEATURES="--features testing"
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
CARGO_PROFILE=""
CARGO_FLAGS="--locked"
elif [[ $BUILD_TYPE == "debug" ]]; then
cov_prefix=""
CARGO_PROFILE=""
CARGO_FLAGS="--locked"
elif [[ $BUILD_TYPE == "release" ]]; then
cov_prefix=""
CARGO_PROFILE="--release"
CARGO_FLAGS="--locked --release"
fi
if [[ $SANITIZERS == 'enabled' ]]; then
make_vars="WITH_SANITIZERS=yes"
@@ -135,8 +131,8 @@ jobs:
{
echo "cov_prefix=${cov_prefix}"
echo "make_vars=${make_vars}"
echo "CARGO_FEATURES=${CARGO_FEATURES}"
echo "CARGO_FLAGS=${CARGO_FLAGS}"
echo "CARGO_PROFILE=${CARGO_PROFILE}"
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
} >> $GITHUB_ENV
@@ -188,18 +184,34 @@ jobs:
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
- name: Build all
# Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables
run: mold -run make ${make_vars} all -j$(nproc) CARGO_BUILD_FLAGS="$CARGO_FLAGS"
- name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: mold -run make ${make_vars} postgres-v14 -j$(nproc)
- name: Build postgres v15
if: steps.cache_pg_15.outputs.cache-hit != 'true'
run: mold -run make ${make_vars} postgres-v15 -j$(nproc)
- name: Build postgres v16
if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: mold -run make ${make_vars} postgres-v16 -j$(nproc)
- name: Build postgres v17
if: steps.cache_pg_17.outputs.cache-hit != 'true'
run: mold -run make ${make_vars} postgres-v17 -j$(nproc)
- name: Build neon extensions
run: mold -run make ${make_vars} neon-pg-ext -j$(nproc)
- name: Build walproposer-lib
run: mold -run make ${make_vars} walproposer-lib -j$(nproc)
- name: Build unit tests
if: inputs.sanitizers != 'enabled'
- name: Run cargo build
env:
WITH_TESTS: ${{ inputs.sanitizers != 'enabled' && '--tests' || '' }}
run: |
export ASAN_OPTIONS=detect_leaks=0
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_PROFILE --tests
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins ${WITH_TESTS}
# Do install *before* running rust tests because they might recompile the
# binaries with different features/flags.
@@ -211,7 +223,7 @@ jobs:
# Install target binaries
mkdir -p /tmp/neon/bin/
binaries=$(
${cov_prefix} cargo metadata $CARGO_FLAGS --format-version=1 --no-deps |
${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
)
for bin in $binaries; do
@@ -228,7 +240,7 @@ jobs:
mkdir -p /tmp/neon/test_bin/
test_exe_paths=$(
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_PROFILE --message-format=json --no-run |
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
jq -r '.executable | select(. != null)'
)
for bin in $test_exe_paths; do
@@ -262,25 +274,29 @@ jobs:
export LD_LIBRARY_PATH
#nextest does not yet support running doctests
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_PROFILE
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
# run all non-pageserver tests
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E '!package(pageserver)'
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
# run pageserver tests
# (When developing new pageserver features gated by config fields, we commonly make the rust
# unit tests sensitive to an environment variable NEON_PAGESERVER_UNIT_TEST_FEATURENAME.
# Then run the nextest invocation below for all relevant combinations. Singling out the
# pageserver tests from non-pageserver tests cuts down the time it takes for this CI step.)
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=tokio-epoll-uring \
${cov_prefix} \
cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(pageserver)'
# run pageserver tests with different settings
for get_vectored_concurrent_io in sequential sidecar-task; do
for io_engine in std-fs tokio-epoll-uring ; do
for io_mode in buffered direct direct-rw ; do
NEON_PAGESERVER_UNIT_TEST_GET_VECTORED_CONCURRENT_IO=$get_vectored_concurrent_io \
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine \
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IO_MODE=$io_mode \
${cov_prefix} \
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
done
done
done
# Run separate tests for real S3
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
export REMOTE_STORAGE_S3_REGION=eu-central-1
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(remote_storage)' -E 'test(test_real_s3)'
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
# Run separate tests for real Azure Blob Storage
# XXX: replace region with `eu-central-1`-like region
@@ -289,17 +305,17 @@ jobs:
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(remote_storage)' -E 'test(test_real_azure)'
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
- name: Install postgres binaries
run: |
# Use tar to copy files matching the pattern, preserving the paths in the destionation
tar c \
pg_install/v* \
build/*/src/test/regress/*.so \
build/*/src/test/regress/pg_regress \
build/*/src/test/isolation/isolationtester \
build/*/src/test/isolation/pg_isolation_regress \
pg_install/build/*/src/test/regress/*.so \
pg_install/build/*/src/test/regress/pg_regress \
pg_install/build/*/src/test/isolation/isolationtester \
pg_install/build/*/src/test/isolation/pg_isolation_regress \
| tar x -C /tmp/neon
- name: Upload Neon artifact
@@ -367,7 +383,7 @@ jobs:
- name: Pytest regression tests
continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
uses: ./.github/actions/run-python-test-set
timeout-minutes: ${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 75 || 180 }}
timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 75 || 180 }}
with:
build_type: ${{ inputs.build-type }}
test_selection: regress
@@ -375,20 +391,22 @@ jobs:
run_with_real_s3: true
real_s3_bucket: neon-github-ci-tests
real_s3_region: eu-central-1
rerun_failed: ${{ inputs.rerun-failed }}
rerun_failed: ${{ inputs.test-run-count == 1 }}
pg_version: ${{ matrix.pg_version }}
sanitizers: ${{ inputs.sanitizers }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
# `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
# Attempt to stop tests gracefully to generate test reports
# until they are forcibly stopped by the stricter `timeout-minutes` limit.
extra_params: --session-timeout=${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 3000 || 10200 }} --count=${{ inputs.test-run-count }}
extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }} --count=${{ inputs.test-run-count }}
${{ inputs.test-selection != '' && format('-k "{0}"', inputs.test-selection) || '' }}
env:
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
BUILD_TAG: ${{ inputs.build-tag }}
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
# Temporary disable this step until we figure out why it's so flaky

View File

@@ -94,6 +94,11 @@ jobs:
run: |
make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)
- name: Get postgres headers ${{ matrix.postgres-version }}
if: steps.cache_pg.outputs.cache-hit != 'true'
run: |
make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
- name: Upload "pg_install/${{ matrix.postgres-version }}" artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
@@ -105,7 +110,7 @@ jobs:
build-walproposer-lib:
if: |
contains(inputs.pg_versions, 'v17') || inputs.rebuild_everything ||
inputs.pg_versions != '[]' || inputs.rebuild_everything ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main'
@@ -135,17 +140,11 @@ jobs:
name: pg_install--v17
path: pg_install/v17
# `actions/download-artifact` doesn't preserve permissions:
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
- name: Make pg_install/v*/bin/* executable
run: |
chmod +x pg_install/v*/bin/*
- name: Cache walproposer-lib
id: cache_walproposer_lib
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: build/walproposer-lib
path: pg_install/build/walproposer-lib
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Checkout submodule vendor/postgres-v17
@@ -168,13 +167,13 @@ jobs:
- name: Build walproposer-lib (only for v17)
if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
run:
make walproposer-lib -j$(sysctl -n hw.ncpu) PG_INSTALL_CACHED=1
make walproposer-lib -j$(sysctl -n hw.ncpu)
- name: Upload "build/walproposer-lib" artifact
- name: Upload "pg_install/build/walproposer-lib" artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: build--walproposer-lib
path: build/walproposer-lib
name: pg_install--build--walproposer-lib
path: pg_install/build/walproposer-lib
# The artifact is supposed to be used by the next job in the same workflow,
# so theres no need to store it for too long.
retention-days: 1
@@ -227,11 +226,11 @@ jobs:
name: pg_install--v17
path: pg_install/v17
- name: Download "build/walproposer-lib" artifact
- name: Download "pg_install/build/walproposer-lib" artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: build--walproposer-lib
path: build/walproposer-lib
name: pg_install--build--walproposer-lib
path: pg_install/build/walproposer-lib
# `actions/download-artifact` doesn't preserve permissions:
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss

View File

@@ -58,7 +58,6 @@ jobs:
test-cfg: ${{ inputs.pg-versions }}
test-selection: ${{ inputs.test-selection }}
test-run-count: ${{ fromJson(inputs.run-count) }}
rerun-failed: false
secrets: inherit
create-test-report:

View File

@@ -69,7 +69,7 @@ jobs:
submodules: true
- name: Check for file changes
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
uses: step-security/paths-filter@v3
id: files-changed
with:
token: ${{ secrets.GITHUB_TOKEN }}
@@ -199,28 +199,6 @@ jobs:
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
secrets: inherit
validate-compute-manifest:
runs-on: ubuntu-22.04
needs: [ meta, check-permissions ]
# We do need to run this in `.*-rc-pr` because of hotfixes.
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Node.js
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version: '24'
- name: Validate manifest against schema
run: |
make -C compute manifest-schema-validation
build-and-test-locally:
needs: [ meta, build-build-tools-image ]
# We do need to run this in `.*-rc-pr` because of hotfixes.
@@ -336,8 +314,7 @@ jobs:
test_selection: performance
run_in_parallel: false
save_perf_report: ${{ github.ref_name == 'main' }}
# test_pageserver_max_throughput_getpage_at_latest_lsn is run in separate workflow periodic_pagebench.yml because it needs snapshots
extra_params: --splits 5 --group ${{ matrix.pytest_split_group }} --ignore=test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
extra_params: --splits 5 --group ${{ matrix.pytest_split_group }}
benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }}
pg_version: v16
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
@@ -346,6 +323,8 @@ jobs:
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
PAGESERVER_VIRTUAL_FILE_IO_MODE: direct-rw
SYNC_BETWEEN_TESTS: true
# XXX: no coverage data handling here, since benchmarks are run on release builds,
# while coverage is currently collected for the debug ones
@@ -670,7 +649,7 @@ jobs:
ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64
compute-node-image-arch:
needs: [ check-permissions, meta ]
needs: [ check-permissions, build-build-tools-image, meta ]
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
permissions:
id-token: write # aws-actions/configure-aws-credentials
@@ -743,6 +722,7 @@ jobs:
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
PG_VERSION=${{ matrix.version.pg }}
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
DEBIAN_VERSION=${{ matrix.version.debian }}
provenance: false
push: true
@@ -762,6 +742,7 @@ jobs:
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
PG_VERSION=${{ matrix.version.pg }}
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
DEBIAN_VERSION=${{ matrix.version.debian }}
provenance: false
push: true
@@ -984,7 +965,7 @@ jobs:
fi
- name: Verify docker-compose example and test extensions
timeout-minutes: 60
timeout-minutes: 20
env:
TAG: >-
${{

View File

@@ -1,151 +0,0 @@
name: Build and Test Fully
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 3 * * *' # run once a day, timezone is utc
workflow_dispatch:
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
tag:
runs-on: [ self-hosted, small ]
container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
outputs:
build-tag: ${{steps.build-tag.outputs.tag}}
steps:
# Need `fetch-depth: 0` to count the number of commits in the branch
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Get build tag
run: |
echo run:$GITHUB_RUN_ID
echo ref:$GITHUB_REF_NAME
echo rev:$(git rev-list --count HEAD)
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
else
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'"
echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
fi
shell: bash
id: build-tag
build-build-tools-image:
uses: ./.github/workflows/build-build-tools-image.yml
secrets: inherit
build-and-test-locally:
needs: [ tag, build-build-tools-image ]
strategy:
fail-fast: false
matrix:
arch: [ x64, arm64 ]
build-type: [ debug, release ]
uses: ./.github/workflows/_build-and-test-locally.yml
with:
arch: ${{ matrix.arch }}
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
build-tag: ${{ needs.tag.outputs.build-tag }}
build-type: ${{ matrix.build-type }}
rerun-failed: false
test-cfg: '[{"pg_version":"v14", "lfc_state": "with-lfc"},
{"pg_version":"v15", "lfc_state": "with-lfc"},
{"pg_version":"v16", "lfc_state": "with-lfc"},
{"pg_version":"v17", "lfc_state": "with-lfc"},
{"pg_version":"v14", "lfc_state": "without-lfc"},
{"pg_version":"v15", "lfc_state": "without-lfc"},
{"pg_version":"v16", "lfc_state": "without-lfc"},
{"pg_version":"v17", "lfc_state": "withouts-lfc"}]'
secrets: inherit
create-test-report:
needs: [ build-and-test-locally, build-build-tools-image ]
if: ${{ !cancelled() }}
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
contents: write
pull-requests: write
outputs:
report-url: ${{ steps.create-allure-report.outputs.report-url }}
runs-on: [ self-hosted, small ]
container:
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
options: --init
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Create Allure report
if: ${{ !cancelled() }}
id: create-allure-report
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
if: ${{ !cancelled() }}
with:
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
retries: 5
script: |
const report = {
reportUrl: "${{ steps.create-allure-report.outputs.report-url }}",
reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
}
const coverage = {}
const script = require("./scripts/comment-test-report.js")
await script({
github,
context,
fetch,
report,
coverage,
})

View File

@@ -79,7 +79,6 @@ jobs:
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
build-tag: ${{ needs.tag.outputs.build-tag }}
build-type: ${{ matrix.build-type }}
rerun-failed: false
test-cfg: '[{"pg_version":"v17"}]'
sanitizers: enabled
secrets: inherit

View File

@@ -35,7 +35,7 @@ jobs:
matrix:
pg-version: [16, 17]
runs-on: us-east-2
runs-on: [ self-hosted, small ]
container:
# We use the neon-test-extensions image here as it contains the source code for the extensions.
image: ghcr.io/neondatabase/neon-test-extensions-v${{ matrix.pg-version }}:latest
@@ -71,7 +71,20 @@ jobs:
region_id: ${{ inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ matrix.pg-version }}
project_settings: ${{ steps.project-settings.outputs.settings }}
# We need these settings to get the expected output results.
# We cannot use the environment variables e.g. PGTZ due to
# https://github.com/neondatabase/neon/issues/1287
default_endpoint_settings: >
{
"pg_settings": {
"DateStyle": "Postgres,MDY",
"TimeZone": "America/Los_Angeles",
"compute_query_id": "off",
"neon.allow_unstable_extensions": "on"
}
}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
- name: Run the regression tests
run: /run-tests.sh -r /ext-src

View File

@@ -33,19 +33,11 @@ jobs:
fail-fast: false # allow other variants to continue even if one fails
matrix:
include:
# test only read-only custom scripts in new branch without database maintenance
- target: new_branch
custom_scripts: select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3
test_maintenance: false
# test all custom scripts in new branch with database maintenance
- target: new_branch
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
test_maintenance: true
# test all custom scripts in reuse branch with database maintenance
- target: reuse_branch
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
test_maintenance: true
max-parallel: 1 # we want to run each benchmark sequentially to not have noisy neighbors on shared storage (PS, SK)
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
permissions:
contents: write
statuses: write
@@ -153,7 +145,6 @@ jobs:
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Benchmark database maintenance
if: ${{ matrix.test_maintenance }}
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}

View File

@@ -1,175 +0,0 @@
name: large oltp growth
# workflow to grow the reuse branch of large oltp benchmark continuously (about 16 GB per run)
on:
# uncomment to run on push for debugging your PR
# push:
# branches: [ bodobolero/increase_large_oltp_workload ]
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 6 * * *' # 06:00 UTC
- cron: '0 8 * * *' # 08:00 UTC
- cron: '0 10 * * *' # 10:00 UTC
- cron: '0 12 * * *' # 12:00 UTC
- cron: '0 14 * * *' # 14:00 UTC
- cron: '0 16 * * *' # 16:00 UTC
workflow_dispatch: # adds ability to run this manually
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow globally because we need dedicated resources which only exist once
group: large-oltp-growth
cancel-in-progress: true
permissions:
contents: read
jobs:
oltp:
strategy:
fail-fast: false # allow other variants to continue even if one fails
matrix:
include:
# for now only grow the reuse branch, not the other branches.
- target: reuse_branch
custom_scripts:
- grow_action_blocks.sql
- grow_action_kwargs.sql
- grow_device_fingerprint_event.sql
- grow_edges.sql
- grow_hotel_rate_mapping.sql
- grow_ocr_pipeline_results_version.sql
- grow_priceline_raw_response.sql
- grow_relabled_transactions.sql
- grow_state_values.sql
- grow_values.sql
- grow_vertices.sql
- update_accounting_coding_body_tracking_category_selection.sql
- update_action_blocks.sql
- update_action_kwargs.sql
- update_denormalized_approval_workflow.sql
- update_device_fingerprint_event.sql
- update_edges.sql
- update_heron_transaction_enriched_log.sql
- update_heron_transaction_enrichment_requests.sql
- update_hotel_rate_mapping.sql
- update_incoming_webhooks.sql
- update_manual_transaction.sql
- update_ml_receipt_matching_log.sql
- update_ocr_pipeine_results_version.sql
- update_orc_pipeline_step_results.sql
- update_orc_pipeline_step_results_version.sql
- update_priceline_raw_response.sql
- update_quickbooks_transactions.sql
- update_raw_finicity_transaction.sql
- update_relabeled_transactions.sql
- update_state_values.sql
- update_stripe_authorization_event_log.sql
- update_transaction.sql
- update_values.sql
- update_vertices.sql
max-parallel: 1 # we want to run each growth workload sequentially (for now there is just one)
permissions:
contents: write
statuses: write
id-token: write # aws-actions/configure-aws-credentials
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "1h"
TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ join(matrix.custom_scripts, ' ') }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
PG_VERSION: 16 # pre-determined by pre-determined project
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
PLATFORM: ${{ matrix.target }}
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
options: --init
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Configure AWS credentials # necessary to download artefacts
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Set up Connection String
id: set-up-connstr
run: |
case "${{ matrix.target }}" in
reuse_branch)
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
;;
*)
echo >&2 "Unknown target=${{ matrix.target }}"
exit 1
;;
esac
CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
- name: pgbench with custom-scripts
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: performance
run_in_parallel: false
save_perf_report: true
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_growth
pg_version: ${{ env.PG_VERSION }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
with:
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
slack-message: |
Periodic large oltp tenant growth increase: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -53,7 +53,7 @@ jobs:
submodules: true
- name: Check for Postgres changes
uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242 #v3
uses: step-security/paths-filter@v3
id: files_changed
with:
token: ${{ github.token }}
@@ -63,10 +63,8 @@ jobs:
- name: Filter out only v-string for build matrix
id: postgres_changes
env:
CHANGES: ${{ steps.files_changed.outputs.changes }}
run: |
v_strings_only_as_json_array=$(echo ${CHANGES} | jq '.[]|select(test("v\\d+"))' | jq --slurp -c)
v_strings_only_as_json_array=$(echo ${{ steps.files_changed.outputs.chnages }} | jq '.[]|select(test("v\\d+"))' | jq --slurp -c)
echo "changes=${v_strings_only_as_json_array}" | tee -a "${GITHUB_OUTPUT}"
check-macos-build:

View File

@@ -1,4 +1,4 @@
name: Periodic pagebench performance test on unit-perf hetzner runner
name: Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
on:
schedule:
@@ -8,7 +8,7 @@ on:
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '0 */4 * * *' # Runs every 4 hours
- cron: '0 */3 * * *' # Runs every 3 hours
workflow_dispatch: # Allows manual triggering of the workflow
inputs:
commit_hash:
@@ -16,11 +16,6 @@ on:
description: 'The long neon repo commit hash for the system under test (pageserver) to be tested.'
required: false
default: ''
recreate_snapshots:
type: boolean
description: 'Recreate snapshots - !!!WARNING!!! We should only recreate snapshots if the previous ones are no longer compatible. Otherwise benchmarking results are not comparable across runs.'
required: false
default: false
defaults:
run:
@@ -34,13 +29,13 @@ permissions:
contents: read
jobs:
run_periodic_pagebench_test:
trigger_bench_on_ec2_machine_in_eu_central_1:
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
contents: write
pull-requests: write
runs-on: [ self-hosted, unit-perf ]
runs-on: [ self-hosted, small ]
container:
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
credentials:
@@ -49,13 +44,10 @@ jobs:
options: --init
timeout-minutes: 360 # Set the timeout to 6 hours
env:
API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }}
RUN_ID: ${{ github.run_id }}
DEFAULT_PG_VERSION: 16
BUILD_TYPE: release
RUST_BACKTRACE: 1
# NEON_ENV_BUILDER_USE_OVERLAYFS_FOR_SNAPSHOTS: 1 - doesn't work without root in container
S3_BUCKET: neon-github-public-dev
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
AWS_DEFAULT_REGION : "eu-central-1"
AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
steps:
# we don't need the neon source code because we run everything remotely
# however we still need the local github actions to run the allure step below
@@ -64,194 +56,99 @@ jobs:
with:
egress-policy: audit
- name: Set up the environment which depends on $RUNNER_TEMP on nvme drive
id: set-env
shell: bash -euxo pipefail {0}
run: |
{
echo "NEON_DIR=${RUNNER_TEMP}/neon"
echo "NEON_BIN=${RUNNER_TEMP}/neon/bin"
echo "POSTGRES_DISTRIB_DIR=${RUNNER_TEMP}/neon/pg_install"
echo "LD_LIBRARY_PATH=${RUNNER_TEMP}/neon/pg_install/v${DEFAULT_PG_VERSION}/lib"
echo "BACKUP_DIR=${RUNNER_TEMP}/instance_store/saved_snapshots"
echo "TEST_OUTPUT=${RUNNER_TEMP}/neon/test_output"
echo "PERF_REPORT_DIR=${RUNNER_TEMP}/neon/test_output/perf-report-local"
echo "ALLURE_DIR=${RUNNER_TEMP}/neon/test_output/allure-results"
echo "ALLURE_RESULTS_DIR=${RUNNER_TEMP}/neon/test_output/allure-results/results"
} >> "$GITHUB_ENV"
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
echo "allure_results_dir=${RUNNER_TEMP}/neon/test_output/allure-results/results" >> "$GITHUB_OUTPUT"
- name: Show my own (github runner) external IP address - usefull for IP allowlisting
run: curl https://ifconfig.me
- uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
- name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine)
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
role-duration-seconds: 18000 # max 5 hours (needed in case commit hash is still being built)
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }}
role-duration-seconds: 3600
- name: Start EC2 instance and wait for the instance to boot up
run: |
aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID
aws ec2 wait instance-running --instance-ids $AWS_INSTANCE_ID
sleep 60 # sleep some time to allow cloudinit and our API server to start up
- name: Determine public IP of the EC2 instance and set env variable EC2_MACHINE_URL_US
run: |
public_ip=$(aws ec2 describe-instances --instance-ids $AWS_INSTANCE_ID --query 'Reservations[*].Instances[*].PublicIpAddress' --output text)
echo "Public IP of the EC2 instance: $public_ip"
echo "EC2_MACHINE_URL_US=https://${public_ip}:8443" >> $GITHUB_ENV
- name: Determine commit hash
id: commit_hash
shell: bash -euxo pipefail {0}
env:
INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }}
run: |
if [[ -z "${INPUT_COMMIT_HASH}" ]]; then
COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV
echo "commit_hash=$COMMIT_HASH" >> "$GITHUB_OUTPUT"
if [ -z "$INPUT_COMMIT_HASH" ]; then
echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV
echo "COMMIT_HASH_TYPE=latest" >> $GITHUB_ENV
else
COMMIT_HASH="${INPUT_COMMIT_HASH}"
echo "COMMIT_HASH=$COMMIT_HASH" >> $GITHUB_ENV
echo "commit_hash=$COMMIT_HASH" >> "$GITHUB_OUTPUT"
echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
echo "COMMIT_HASH_TYPE=manual" >> $GITHUB_ENV
fi
- name: Checkout the neon repository at given commit hash
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ steps.commit_hash.outputs.commit_hash }}
# does not reuse ./.github/actions/download because we need to download the artifact for the given commit hash
# example artifact
# s3://neon-github-public-dev/artifacts/48b870bc078bd2c450eb7b468e743b9c118549bf/15036827400/1/neon-Linux-X64-release-artifact.tar.zst /instance_store/artifacts/neon-Linux-release-artifact.tar.zst
- name: Determine artifact S3_KEY for given commit hash and download and extract artifact
id: artifact_prefix
shell: bash -euxo pipefail {0}
env:
ARCHIVE: ${{ runner.temp }}/downloads/neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst
COMMIT_HASH: ${{ env.COMMIT_HASH }}
COMMIT_HASH_TYPE: ${{ env.COMMIT_HASH_TYPE }}
- name: Start Bench with run_id
run: |
attempt=0
max_attempts=24 # 5 minutes * 24 = 2 hours
curl -k -X 'POST' \
"${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\", \"neonRepoCommitHashType\": \"${COMMIT_HASH_TYPE}\"}"
while [[ $attempt -lt $max_attempts ]]; do
# the following command will fail until the artifacts are available ...
S3_KEY=$(aws s3api list-objects-v2 --bucket "$S3_BUCKET" --prefix "artifacts/$COMMIT_HASH/" \
| jq -r '.Contents[]?.Key' \
| grep "neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst" \
| sort --version-sort \
| tail -1) || true # ... thus ignore errors from the command
if [[ -n "${S3_KEY}" ]]; then
echo "Artifact found: $S3_KEY"
echo "S3_KEY=$S3_KEY" >> $GITHUB_ENV
- name: Poll Test Status
id: poll_step
run: |
status=""
while [[ "$status" != "failure" && "$status" != "success" ]]; do
response=$(curl -k -X 'GET' \
"${EC2_MACHINE_URL_US}/test_status/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H "Authorization: Bearer $API_KEY")
echo "Response: $response"
set +x
status=$(echo $response | jq -r '.status')
echo "Test status: $status"
if [[ "$status" == "failure" ]]; then
echo "Test failed"
exit 1 # Fail the job step if status is failure
elif [[ "$status" == "success" || "$status" == "null" ]]; then
break
elif [[ "$status" == "too_many_runs" ]]; then
echo "Too many runs already running"
echo "too_many_runs=true" >> "$GITHUB_OUTPUT"
exit 1
fi
# Increment attempt counter and sleep for 5 minutes
attempt=$((attempt + 1))
echo "Attempt $attempt of $max_attempts to find artifacts in S3 bucket s3://$S3_BUCKET/artifacts/$COMMIT_HASH failed. Retrying in 5 minutes..."
sleep 300 # Sleep for 5 minutes
sleep 60 # Poll every 60 seconds
done
if [[ -z "${S3_KEY}" ]]; then
echo "Error: artifact not found in S3 bucket s3://$S3_BUCKET/artifacts/$COMMIT_HASH" after 2 hours
else
mkdir -p $(dirname $ARCHIVE)
time aws s3 cp --only-show-errors s3://$S3_BUCKET/${S3_KEY} ${ARCHIVE}
mkdir -p ${NEON_DIR}
time tar -xf ${ARCHIVE} -C ${NEON_DIR}
rm -f ${ARCHIVE}
fi
- name: Download snapshots from S3
if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.recreate_snapshots == 'false' || github.event.inputs.recreate_snapshots == '' }}
id: download_snapshots
shell: bash -euxo pipefail {0}
- name: Retrieve Test Logs
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
# Download the snapshots from S3
mkdir -p ${TEST_OUTPUT}
mkdir -p $BACKUP_DIR
cd $BACKUP_DIR
mkdir parts
cd parts
PART=$(aws s3api list-objects-v2 --bucket $S3_BUCKET --prefix performance/pagebench/ \
| jq -r '.Contents[]?.Key' \
| grep -E 'shared-snapshots-[0-9]{4}-[0-9]{2}-[0-9]{2}' \
| sort \
| tail -1)
echo "Latest PART: $PART"
if [[ -z "$PART" ]]; then
echo "ERROR: No matching S3 key found" >&2
exit 1
fi
S3_KEY=$(dirname $PART)
time aws s3 cp --only-show-errors --recursive s3://${S3_BUCKET}/$S3_KEY/ .
cd $TEST_OUTPUT
time cat $BACKUP_DIR/parts/* | zstdcat | tar --extract --preserve-permissions
rm -rf ${BACKUP_DIR}
curl -k -X 'GET' \
"${EC2_MACHINE_URL_US}/test_log/${GITHUB_RUN_ID}" \
-H 'accept: application/gzip' \
-H "Authorization: Bearer $API_KEY" \
--output "test_log_${GITHUB_RUN_ID}.gz"
- name: Cache poetry deps
uses: actions/cache@v4
with:
path: ~/.cache/pypoetry/virtualenvs
key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-bookworm-${{ hashFiles('poetry.lock') }}
- name: Install Python deps
shell: bash -euxo pipefail {0}
run: ./scripts/pysync
# we need high number of open files for pagebench
- name: show ulimits
shell: bash -euxo pipefail {0}
- name: Unzip Test Log and Print it into this job's log
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
ulimit -a
- name: Run pagebench testcase
shell: bash -euxo pipefail {0}
env:
CI: false # need to override this env variable set by github to enforce using snapshots
run: |
export PLATFORM=hetzner-unit-perf-${COMMIT_HASH_TYPE}
# report the commit hash of the neon repository in the revision of the test results
export GITHUB_SHA=${COMMIT_HASH}
rm -rf ${PERF_REPORT_DIR}
rm -rf ${ALLURE_RESULTS_DIR}
mkdir -p ${PERF_REPORT_DIR}
mkdir -p ${ALLURE_RESULTS_DIR}
PARAMS="--alluredir=${ALLURE_RESULTS_DIR} --tb=short --verbose -rA"
EXTRA_PARAMS="--out-dir ${PERF_REPORT_DIR} --durations-path $TEST_OUTPUT/benchmark_durations.json"
# run only two selected tests
# environment set by parent:
# RUST_BACKTRACE=1 DEFAULT_PG_VERSION=16 BUILD_TYPE=release
./scripts/pytest ${PARAMS} test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_characterize_throughput_with_n_tenants ${EXTRA_PARAMS}
./scripts/pytest ${PARAMS} test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py::test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant ${EXTRA_PARAMS}
- name: upload the performance metrics to the Neon performance database which is used by grafana dashboards to display the results
shell: bash -euxo pipefail {0}
run: |
export REPORT_FROM="$PERF_REPORT_DIR"
export GITHUB_SHA=${COMMIT_HASH}
time ./scripts/generate_and_push_perf_report.sh
- name: Upload test results
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-store
with:
report-dir: ${{ steps.set-env.outputs.allure_results_dir }}
unique-key: ${{ env.BUILD_TYPE }}-${{ env.DEFAULT_PG_VERSION }}-${{ runner.arch }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
gzip -d "test_log_${GITHUB_RUN_ID}.gz"
cat "test_log_${GITHUB_RUN_ID}"
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Upload snapshots
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.recreate_snapshots != 'false' && github.event.inputs.recreate_snapshots != '' }}
id: upload_snapshots
shell: bash -euxo pipefail {0}
run: |
mkdir -p $BACKUP_DIR
cd $TEST_OUTPUT
tar --create --preserve-permissions --file - shared-snapshots | zstd -o $BACKUP_DIR/shared_snapshots.tar.zst
cd $BACKUP_DIR
mkdir parts
split -b 1G shared_snapshots.tar.zst ./parts/shared_snapshots.tar.zst.part.
SNAPSHOT_DATE=$(date +%F) # YYYY-MM-DD
cd parts
time aws s3 cp --recursive . s3://${S3_BUCKET}/performance/pagebench/shared-snapshots-${SNAPSHOT_DATE}/
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
@@ -260,22 +157,26 @@ jobs:
slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
- name: Cleanup Test Resources
if: always()
shell: bash -euxo pipefail {0}
env:
ARCHIVE: ${{ runner.temp }}/downloads/neon-${{ runner.os }}-${{ runner.arch }}-release-artifact.tar.zst
run: |
# Cleanup the test resources
if [[ -d "${BACKUP_DIR}" ]]; then
rm -rf ${BACKUP_DIR}
fi
if [[ -d "${TEST_OUTPUT}" ]]; then
rm -rf ${TEST_OUTPUT}
fi
if [[ -d "${NEON_DIR}" ]]; then
rm -rf ${NEON_DIR}
fi
rm -rf $(dirname $ARCHIVE)
curl -k -X 'POST' \
"${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \
-H 'accept: application/json' \
-H "Authorization: Bearer $API_KEY" \
-d ''
- name: Assume AWS OIDC role that allows to manage (start/stop/describe... EC machine)
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
with:
aws-region: eu-central-1
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_MANAGE_BENCHMARK_EC2_VMS_ARN }}
role-duration-seconds: 3600
- name: Stop EC2 instance and wait for the instance to be stopped
if: always() && steps.poll_step.outputs.too_many_runs != 'true'
run: |
aws ec2 stop-instances --instance-ids $AWS_INSTANCE_ID
aws ec2 wait instance-stopped --instance-ids $AWS_INSTANCE_ID

View File

@@ -34,7 +34,7 @@ jobs:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
- uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
id: python-src
with:
files: |
@@ -45,7 +45,7 @@ jobs:
poetry.lock
pyproject.toml
- uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
- uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
id: rust-src
with:
files: |

View File

@@ -1,84 +0,0 @@
name: Periodic proxy performance test on unit-perf hetzner runner
on:
push: # TODO: remove after testing
branches:
- test-proxy-bench # Runs on pushes to branches starting with test-proxy-bench
# schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
# - cron: '0 5 * * *' # Runs at 5 UTC once a day
workflow_dispatch: # adds an ability to run this manually
defaults:
run:
shell: bash -euo pipefail {0}
concurrency:
group: ${{ github.workflow }}
cancel-in-progress: false
permissions:
contents: read
jobs:
run_periodic_proxybench_test:
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
contents: write
pull-requests: write
runs-on: [self-hosted, unit-perf]
timeout-minutes: 60 # 1h timeout
container:
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
options: --init
steps:
- name: Checkout proxy-bench Repo
uses: actions/checkout@v4
with:
repository: neondatabase/proxy-bench
path: proxy-bench
- name: Set up the environment which depends on $RUNNER_TEMP on nvme drive
id: set-env
shell: bash -euxo pipefail {0}
run: |
PROXY_BENCH_PATH=$(realpath ./proxy-bench)
{
echo "PROXY_BENCH_PATH=$PROXY_BENCH_PATH"
echo "NEON_DIR=${RUNNER_TEMP}/neon"
echo "TEST_OUTPUT=${PROXY_BENCH_PATH}/test_output"
echo ""
} >> "$GITHUB_ENV"
- name: Run proxy-bench
run: ${PROXY_BENCH_PATH}/run.sh
- name: Ingest Bench Results # neon repo script
if: always()
run: |
mkdir -p $TEST_OUTPUT
python $NEON_DIR/scripts/proxy_bench_results_ingest.py --out $TEST_OUTPUT
- name: Push Metrics to Proxy perf database
if: always()
env:
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PROXY_TEST_RESULT_CONNSTR }}"
REPORT_FROM: $TEST_OUTPUT
run: $NEON_DIR/scripts/generate_and_push_perf_report.sh
- name: Docker cleanup
if: always()
run: docker compose down
- name: Notify Failure
if: failure()
run: echo "Proxy bench job failed" && exit 1

1
.gitignore vendored
View File

@@ -1,5 +1,4 @@
/artifact_cache
/build
/pg_install
/target
/tmp_check

414
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,6 @@ members = [
"pageserver/ctl",
"pageserver/client",
"pageserver/pagebench",
"pageserver/page_api",
"proxy",
"safekeeper",
"safekeeper/client",
@@ -22,11 +21,8 @@ members = [
"libs/http-utils",
"libs/pageserver_api",
"libs/postgres_ffi",
"libs/postgres_ffi_types",
"libs/postgres_versioninfo",
"libs/safekeeper_api",
"libs/desim",
"libs/neon-shmem",
"libs/utils",
"libs/consumption_metrics",
"libs/postgres_backend",
@@ -73,8 +69,8 @@ aws-credential-types = "1.2.0"
aws-sigv4 = { version = "1.2", features = ["sign-http"] }
aws-types = "1.3"
axum = { version = "0.8.1", features = ["ws"] }
axum-extra = { version = "0.10.0", features = ["typed-header", "query"] }
base64 = "0.22"
axum-extra = { version = "0.10.0", features = ["typed-header"] }
base64 = "0.13.0"
bincode = "1.3"
bindgen = "0.71"
bit_field = "0.10.2"
@@ -131,7 +127,7 @@ md5 = "0.7.0"
measured = { version = "0.0.22", features=["lasso"] }
measured-process = { version = "0.0.22" }
memoffset = "0.9"
nix = { version = "0.30.1", features = ["dir", "fs", "mman", "process", "socket", "signal", "poll"] }
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
# Do not update to >= 7.0.0, at least. The update will have a significant impact
# on compute startup metrics (start_postgres_ms), >= 25% degradation.
notify = "6.0.0"
@@ -151,8 +147,7 @@ pin-project-lite = "0.2"
pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] }
procfs = "0.16"
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
prost = "0.13.5"
prost-types = "0.13.5"
prost = "0.13"
rand = "0.8"
redis = { version = "0.29.2", features = ["tokio-rustls-comp", "keep-alive"] }
regex = "1.10.2"
@@ -174,9 +169,8 @@ sentry = { version = "0.37", default-features = false, features = ["backtrace",
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"
serde_path_to_error = "0.1"
serde_with = { version = "3", features = [ "base64" ] }
serde_with = { version = "2.0", features = [ "base64" ] }
serde_assert = "0.5.0"
serde_repr = "0.1.20"
sha2 = "0.10.2"
signal-hook = "0.3"
smallvec = "1.11"
@@ -200,11 +194,10 @@ tokio-postgres-rustls = "0.12.0"
tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
tokio-stream = "0.1"
tokio-tar = "0.3"
tokio-util = { version = "0.7.10", features = ["io", "io-util", "rt"] }
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
toml = "0.8"
toml_edit = "0.22"
tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "gzip", "prost", "router", "server", "tls-ring", "tls-native-roots", "zstd"] }
tonic-reflection = { version = "0.13.1", features = ["server"] }
tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
tower = { version = "0.5.2", default-features = false }
tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] }
@@ -251,7 +244,6 @@ azure_storage_blobs = { git = "https://github.com/neondatabase/azure-sdk-for-rus
## Local libraries
compute_api = { version = "0.1", path = "./libs/compute_api/" }
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
desim = { version = "0.1", path = "./libs/desim" }
endpoint_storage = { version = "0.0.1", path = "./endpoint_storage/" }
http-utils = { version = "0.1", path = "./libs/http-utils/" }
metrics = { version = "0.1", path = "./libs/metrics/" }
@@ -259,26 +251,23 @@ pageserver = { path = "./pageserver" }
pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
pageserver_client = { path = "./pageserver/client" }
pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
pageserver_page_api = { path = "./pageserver/page_api" }
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
postgres_ffi_types = { version = "0.1", path = "./libs/postgres_ffi_types/" }
postgres_versioninfo = { version = "0.1", path = "./libs/postgres_versioninfo/" }
postgres_initdb = { path = "./libs/postgres_initdb" }
posthog_client_lite = { version = "0.1", path = "./libs/posthog_client_lite" }
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
safekeeper_client = { path = "./safekeeper/client" }
desim = { version = "0.1", path = "./libs/desim" }
storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
storage_controller_client = { path = "./storage_controller/client" }
tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
utils = { version = "0.1", path = "./libs/utils/" }
vm_monitor = { version = "0.1", path = "./libs/vm_monitor/" }
wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }
walproposer = { version = "0.1", path = "./libs/walproposer/" }
wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }
## Common library dependency
workspace_hack = { version = "0.1", path = "./workspace_hack/" }
@@ -288,7 +277,7 @@ criterion = "0.5.1"
rcgen = "0.13"
rstest = "0.18"
camino-tempfile = "1.0.2"
tonic-build = "0.13.1"
tonic-build = "0.12"
[patch.crates-io]

View File

@@ -5,6 +5,8 @@
ARG REPOSITORY=ghcr.io/neondatabase
ARG IMAGE=build-tools
ARG TAG=pinned
ARG DEFAULT_PG_VERSION=17
ARG STABLE_PG_VERSION=16
ARG DEBIAN_VERSION=bookworm
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
@@ -40,12 +42,12 @@ COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
COPY --chown=nonroot pgxn pgxn
COPY --chown=nonroot Makefile Makefile
COPY --chown=nonroot postgres.mk postgres.mk
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
ENV BUILD_TYPE=release
RUN set -e \
&& mold -run make -j $(nproc) -s neon-pg-ext \
&& rm -rf pg_install/build \
&& tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .
# Prepare cargo-chef recipe
@@ -61,11 +63,14 @@ FROM $REPOSITORY/$IMAGE:$TAG AS build
WORKDIR /home/nonroot
ARG GIT_VERSION=local
ARG BUILD_TAG
ARG STABLE_PG_VERSION
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
COPY --from=pg-build /home/nonroot/pg_install/v17/lib pg_install/v17/lib
COPY --from=plan /home/nonroot/recipe.json recipe.json
ARG ADDITIONAL_RUSTFLAGS=""
@@ -92,6 +97,7 @@ RUN set -e \
# Build final image
#
FROM $BASE_IMAGE_SHA
ARG DEFAULT_PG_VERSION
WORKDIR /data
RUN set -e \
@@ -101,20 +107,9 @@ RUN set -e \
libreadline-dev \
libseccomp-dev \
ca-certificates \
# System postgres for use with client libraries (e.g. in storage controller)
postgresql-15 \
openssl \
unzip \
curl \
&& ARCH=$(uname -m) \
&& if [ "$ARCH" = "x86_64" ]; then \
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"; \
elif [ "$ARCH" = "aarch64" ]; then \
curl "https://awscli.amazonaws.com/awscli-exe-linux-aarch64.zip" -o "awscliv2.zip"; \
else \
echo "Unsupported architecture: $ARCH" && exit 1; \
fi \
&& unzip awscliv2.zip \
&& ./aws/install \
&& rm -rf aws awscliv2.zip \
&& rm -f /etc/apt/apt.conf.d/80-retries \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& useradd -d /data neon \

244
Makefile
View File

@@ -1,21 +1,8 @@
ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
# Where to install Postgres, default is ./pg_install, maybe useful for package
# managers.
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
# Supported PostgreSQL versions
POSTGRES_VERSIONS = v17 v16 v15 v14
# CARGO_BUILD_FLAGS: Extra flags to pass to `cargo build`. `--locked`
# and `--features testing` are popular examples.
#
# CARGO_PROFILE: Set to override the cargo profile to use. By default,
# it is derived from BUILD_TYPE.
# All intermediate build artifacts are stored here.
BUILD_DIR := build
ICU_PREFIX_DIR := /usr/local/icu
#
@@ -29,12 +16,12 @@ ifeq ($(BUILD_TYPE),release)
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
PG_CFLAGS += -O2 -g3 $(CFLAGS)
PG_LDFLAGS = $(LDFLAGS)
CARGO_PROFILE ?= --profile=release
# Unfortunately, `--profile=...` is a nightly feature
CARGO_BUILD_FLAGS += --release
else ifeq ($(BUILD_TYPE),debug)
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
PG_CFLAGS += -O0 -g3 $(CFLAGS)
PG_LDFLAGS = $(LDFLAGS)
CARGO_PROFILE ?= --profile=dev
else
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
endif
@@ -98,29 +85,141 @@ CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
# Top level Makefile to build Neon and PostgreSQL
#
.PHONY: all
all: neon postgres-install neon-pg-ext
all: neon postgres neon-pg-ext
### Neon Rust bits
#
# The 'postgres_ffi' depends on the Postgres headers.
.PHONY: neon
neon: postgres-headers-install walproposer-lib cargo-target-dir
neon: postgres-headers walproposer-lib cargo-target-dir
+@echo "Compiling Neon"
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS) $(CARGO_PROFILE)
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
.PHONY: cargo-target-dir
cargo-target-dir:
# https://github.com/rust-lang/cargo/issues/14281
mkdir -p target
test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG
### PostgreSQL parts
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
# to avoid the duplication in the future, but it's tolerable for now.
#
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
mkdir -p $(POSTGRES_INSTALL_DIR)
test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
+@echo "Configuring Postgres $* build"
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
exit 1; }
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
VERSION=$*; \
EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
CFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \
$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
# nicer alias to run 'configure'
# Note: I've been unable to use templates for this part of our configuration.
# I'm not sure why it wouldn't work, but this is the only place (apart from
# the "build-all-versions" entry points) where direct mention of PostgreSQL
# versions is used.
.PHONY: postgres-configure-v17
postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
.PHONY: postgres-configure-v16
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
.PHONY: postgres-configure-v15
postgres-configure-v15: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
.PHONY: postgres-configure-v14
postgres-configure-v14: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
.PHONY: postgres-headers-%
postgres-headers-%: postgres-configure-%
+@echo "Installing PostgreSQL $* headers"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/include MAKELEVEL=0 install
# Compile and install PostgreSQL
.PHONY: postgres-%
postgres-%: postgres-configure-% \
postgres-headers-% # to prevent `make install` conflicts with neon's `postgres-headers`
+@echo "Compiling PostgreSQL $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 install
+@echo "Compiling libpq $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq install
+@echo "Compiling pg_prewarm $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_prewarm install
+@echo "Compiling pg_buffercache $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache install
+@echo "Compiling pg_visibility $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install
+@echo "Compiling pageinspect $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
+@echo "Compiling pg_trgm $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_trgm install
+@echo "Compiling amcheck $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
+@echo "Compiling test_decoding $*"
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
.PHONY: postgres-clean-%
postgres-clean-%:
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect clean
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq clean
.PHONY: postgres-check-%
postgres-check-%: postgres-%
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 check
.PHONY: neon-pg-ext-%
neon-pg-ext-%: postgres-install-%
+@echo "Compiling neon-specific Postgres extensions for $*"
mkdir -p $(BUILD_DIR)/pgxn-$*
neon-pg-ext-%: postgres-%
+@echo "Compiling neon $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(BUILD_DIR)/pgxn-$*\
-f $(ROOT_PROJECT_DIR)/pgxn/Makefile install
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
+@echo "Compiling neon_walredo $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
+@echo "Compiling neon_rmgr $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
+@echo "Compiling neon_test_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
+@echo "Compiling neon_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
.PHONY: neon-pg-clean-ext-%
neon-pg-clean-ext-%:
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile clean
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile clean
# Build walproposer as a static library. walproposer source code is located
# in the pgxn/neon directory.
@@ -134,15 +233,15 @@ neon-pg-ext-%: postgres-install-%
.PHONY: walproposer-lib
walproposer-lib: neon-pg-ext-v17
+@echo "Compiling walproposer-lib"
mkdir -p $(BUILD_DIR)/walproposer-lib
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
-C $(BUILD_DIR)/walproposer-lib \
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(BUILD_DIR)/walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(BUILD_DIR)/walproposer-lib
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgport.a \
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
pg_strong_random.o
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
checksum_helper.o \
cryptohash_openssl.o \
hmac_openssl.o \
@@ -150,18 +249,69 @@ walproposer-lib: neon-pg-ext-v17
parse_manifest.o \
scram-common.o
ifeq ($(UNAME_S),Linux)
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
pg_crc32c.o
endif
# Shorthand to call neon-pg-ext-% target for all Postgres versions
.PHONY: walproposer-lib-clean
walproposer-lib-clean:
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
.PHONY: neon-pg-ext
neon-pg-ext: $(foreach pg_version,$(POSTGRES_VERSIONS),neon-pg-ext-$(pg_version))
neon-pg-ext: \
neon-pg-ext-v14 \
neon-pg-ext-v15 \
neon-pg-ext-v16 \
neon-pg-ext-v17
.PHONY: neon-pg-clean-ext
neon-pg-clean-ext: \
neon-pg-clean-ext-v14 \
neon-pg-clean-ext-v15 \
neon-pg-clean-ext-v16 \
neon-pg-clean-ext-v17
# shorthand to build all Postgres versions
.PHONY: postgres
postgres: \
postgres-v14 \
postgres-v15 \
postgres-v16 \
postgres-v17
.PHONY: postgres-headers
postgres-headers: \
postgres-headers-v14 \
postgres-headers-v15 \
postgres-headers-v16 \
postgres-headers-v17
.PHONY: postgres-clean
postgres-clean: \
postgres-clean-v14 \
postgres-clean-v15 \
postgres-clean-v16 \
postgres-clean-v17
.PHONY: postgres-check
postgres-check: \
postgres-check-v14 \
postgres-check-v15 \
postgres-check-v16 \
postgres-check-v17
# This doesn't remove the effects of 'configure'.
.PHONY: clean
clean: postgres-clean neon-pg-clean-ext
$(MAKE) -C compute clean
$(CARGO_CMD_PREFIX) cargo clean
# This removes everything
.PHONY: distclean
distclean:
$(RM) -r $(POSTGRES_INSTALL_DIR) $(BUILD_DIR)
$(RM) -r $(POSTGRES_INSTALL_DIR)
$(CARGO_CMD_PREFIX) cargo clean
.PHONY: fmt
@@ -170,7 +320,7 @@ fmt:
postgres-%-pg-bsd-indent: postgres-%
+@echo "Compiling pg_bsd_indent"
$(MAKE) -C $(BUILD_DIR)/$*/src/tools/pg_bsd_indent/
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/
# Create typedef list for the core. Note that generally it should be combined with
# buildfarm one to cover platform specific stuff.
@@ -189,7 +339,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
cat $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/typedefs.list |\
cat - postgres-$*-typedefs.list | sort | uniq > postgres-$*-typedefs-full.list
+@echo note: you might want to run it on selected files/dirs instead.
INDENT=$(BUILD_DIR)/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
INDENT=$(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/pgindent --typedefs postgres-$*-typedefs-full.list \
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/ \
--excludes $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/exclude_file_patterns
@@ -200,28 +350,12 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
-C $(BUILD_DIR)/neon-v17 \
-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
.PHONY: setup-pre-commit-hook
setup-pre-commit-hook:
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
# Targets for building PostgreSQL are defined in postgres.mk.
#
# But if the caller has indicated that PostgreSQL is already
# installed, by setting the PG_INSTALL_CACHED variable, skip it.
ifdef PG_INSTALL_CACHED
postgres-install: skip-install
$(foreach pg_version,$(POSTGRES_VERSIONS),postgres-install-$(pg_version)): skip-install
postgres-headers-install:
+@echo "Skipping installation of PostgreSQL headers because PG_INSTALL_CACHED is set"
skip-install:
+@echo "Skipping PostgreSQL installation because PG_INSTALL_CACHED is set"
else
include postgres.mk
endif

View File

@@ -155,7 +155,7 @@ RUN set -e \
# Keep the version the same as in compute/compute-node.Dockerfile and
# test_runner/regress/test_compute_metrics.py.
ENV SQL_EXPORTER_VERSION=0.17.3
ENV SQL_EXPORTER_VERSION=0.17.0
RUN curl -fsSL \
"https://github.com/burningalchemist/sql_exporter/releases/download/${SQL_EXPORTER_VERSION}/sql_exporter-${SQL_EXPORTER_VERSION}.linux-$(case "$(uname -m)" in x86_64) echo amd64;; aarch64) echo arm64;; esac).tar.gz" \
--output sql_exporter.tar.gz \
@@ -165,7 +165,6 @@ RUN curl -fsSL \
&& rm sql_exporter.tar.gz
# protobuf-compiler (protoc)
# Keep the version the same as in compute/compute-node.Dockerfile
ENV PROTOC_VERSION=25.1
RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
&& unzip -q protoc.zip -d protoc \
@@ -180,7 +179,7 @@ RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/
&& mv s5cmd /usr/local/bin/s5cmd
# LLVM
ENV LLVM_VERSION=20
ENV LLVM_VERSION=19
RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
&& echo "deb http://apt.llvm.org/${DEBIAN_VERSION}/ llvm-toolchain-${DEBIAN_VERSION}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
&& apt update \
@@ -293,7 +292,7 @@ WORKDIR /home/nonroot
# Rust
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
ENV RUSTC_VERSION=1.88.0
ENV RUSTC_VERSION=1.86.0
ENV RUSTUP_HOME="/home/nonroot/.rustup"
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
ARG RUSTFILT_VERSION=0.2.1
@@ -311,13 +310,13 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
. "$HOME/.cargo/env" && \
cargo --version && rustup --version && \
rustup component add llvm-tools rustfmt clippy && \
cargo install rustfilt --version ${RUSTFILT_VERSION} --locked && \
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} --locked && \
cargo install cargo-deny --version ${CARGO_DENY_VERSION} --locked && \
cargo install cargo-hack --version ${CARGO_HACK_VERSION} --locked && \
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} --locked && \
cargo install cargo-chef --version ${CARGO_CHEF_VERSION} --locked && \
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} --locked \
cargo install rustfilt --version ${RUSTFILT_VERSION} && \
cargo install cargo-hakari --version ${CARGO_HAKARI_VERSION} && \
cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
cargo install cargo-hack --version ${CARGO_HACK_VERSION} && \
cargo install cargo-nextest --version ${CARGO_NEXTEST_VERSION} && \
cargo install cargo-chef --locked --version ${CARGO_CHEF_VERSION} && \
cargo install diesel_cli --version ${CARGO_DIESEL_CLI_VERSION} \
--features postgres-bundled --no-default-features && \
rm -rf /home/nonroot/.cargo/registry && \
rm -rf /home/nonroot/.cargo/git

3
compute/.gitignore vendored
View File

@@ -3,6 +3,3 @@ etc/neon_collector.yml
etc/neon_collector_autoscaling.yml
etc/sql_exporter.yml
etc/sql_exporter_autoscaling.yml
# Node.js dependencies
node_modules/

View File

@@ -22,7 +22,7 @@ sql_exporter.yml: $(jsonnet_files)
--output-file etc/$@ \
--tla-str collector_name=neon_collector \
--tla-str collector_file=neon_collector.yml \
--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter&pgaudit.log=none' \
--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter' \
etc/sql_exporter.jsonnet
sql_exporter_autoscaling.yml: $(jsonnet_files)
@@ -30,7 +30,7 @@ sql_exporter_autoscaling.yml: $(jsonnet_files)
--output-file etc/$@ \
--tla-str collector_name=neon_collector_autoscaling \
--tla-str collector_file=neon_collector_autoscaling.yml \
--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling&pgaudit.log=none' \
--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling' \
etc/sql_exporter.jsonnet
.PHONY: clean
@@ -48,11 +48,3 @@ jsonnetfmt-test:
.PHONY: jsonnetfmt-format
jsonnetfmt-format:
jsonnetfmt --in-place $(jsonnet_files)
.PHONY: manifest-schema-validation
manifest-schema-validation: node_modules
node_modules/.bin/jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
node_modules: package.json
npm install
touch node_modules

View File

@@ -77,6 +77,9 @@
# build_and_test.yml github workflow for how that's done.
ARG PG_VERSION
ARG REPOSITORY=ghcr.io/neondatabase
ARG IMAGE=build-tools
ARG TAG=pinned
ARG BUILD_TAG
ARG DEBIAN_VERSION=bookworm
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
@@ -115,9 +118,6 @@ ARG EXTENSIONS=all
FROM $BASE_IMAGE_SHA AS build-deps
ARG DEBIAN_VERSION
# Keep in sync with build-tools.Dockerfile
ENV PROTOC_VERSION=25.1
# Use strict mode for bash to catch errors early
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
@@ -149,17 +149,8 @@ RUN case $DEBIAN_VERSION in \
ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd curl unzip g++ \
libclang-dev \
jsonnet \
$VERSION_INSTALLS \
&& apt clean && rm -rf /var/lib/apt/lists/* \
&& useradd -ms /bin/bash nonroot -b /home \
# Install protoc from binary release, since Debian's versions are too old.
&& curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
&& unzip -q protoc.zip -d protoc \
&& mv protoc/bin/protoc /usr/local/bin/protoc \
&& mv protoc/include/google /usr/local/include/google \
&& rm -rf protoc.zip protoc
&& apt clean && rm -rf /var/lib/apt/lists/*
#########################################################################################
#
@@ -180,6 +171,9 @@ RUN cd postgres && \
eval $CONFIGURE_CMD && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
# Install headers
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
# Enable some of contrib extensions
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
@@ -303,7 +297,6 @@ RUN ./autogen.sh && \
./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
make staged-install && \
cd extensions/postgis && \
make clean && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -589,38 +582,6 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
make -j $(getconf _NPROCESSORS_ONLN) install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
#########################################################################################
#
# Layer "online_advisor-build"
# compile online_advisor extension
#
#########################################################################################
FROM build-deps AS online_advisor-src
ARG PG_VERSION
# online_advisor supports all Postgres version starting from PG14, but prior to PG17 has to be included in preload_shared_libraries
# last release 1.0 - May 15, 2025
WORKDIR /ext-src
RUN case "${PG_VERSION:?}" in \
"v17") \
;; \
*) \
echo "skipping the version of online_advistor for $PG_VERSION" && exit 0 \
;; \
esac && \
wget https://github.com/knizhnik/online_advisor/archive/refs/tags/1.0.tar.gz -O online_advisor.tar.gz && \
echo "37dcadf8f7cc8d6cc1f8831276ee245b44f1b0274f09e511e47a67738ba9ed0f online_advisor.tar.gz" | sha256sum --check && \
mkdir online_advisor-src && cd online_advisor-src && tar xzf ../online_advisor.tar.gz --strip-components=1 -C .
FROM pg-build AS online_advisor-build
COPY --from=online_advisor-src /ext-src/ /ext-src/
WORKDIR /ext-src/
RUN if [ -d online_advisor-src ]; then \
cd online_advisor-src && \
make -j install && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/online_advisor.control; \
fi
#########################################################################################
#
# Layer "pg_hashids-build"
@@ -1063,10 +1024,17 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
#########################################################################################
#
# Layer "build-deps with Rust toolchain installed"
# Layer "pg build with nonroot user and cargo installed"
# This layer is base and common for layers with `pgrx`
#
#########################################################################################
FROM build-deps AS build-deps-with-cargo
FROM pg-build AS pg-build-nonroot-with-cargo
ARG PG_VERSION
RUN apt update && \
apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
apt clean && rm -rf /var/lib/apt/lists/* && \
useradd -ms /bin/bash nonroot -b /home
ENV HOME=/home/nonroot
ENV PATH="/home/nonroot/.cargo/bin:$PATH"
@@ -1081,29 +1049,13 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
rm rustup-init
#########################################################################################
#
# Layer "pg-build with Rust toolchain installed"
# This layer is base and common for layers with `pgrx`
#
#########################################################################################
FROM pg-build AS pg-build-with-cargo
ARG PG_VERSION
ENV HOME=/home/nonroot
ENV PATH="/home/nonroot/.cargo/bin:$PATH"
USER nonroot
WORKDIR /home/nonroot
COPY --from=build-deps-with-cargo /home/nonroot /home/nonroot
#########################################################################################
#
# Layer "rust extensions"
# This layer is used to build `pgrx` deps
#
#########################################################################################
FROM pg-build-with-cargo AS rust-extensions-build
FROM pg-build-nonroot-with-cargo AS rust-extensions-build
ARG PG_VERSION
RUN case "${PG_VERSION:?}" in \
@@ -1125,7 +1077,7 @@ USER root
# and eventually get merged with `rust-extensions-build`
#
#########################################################################################
FROM pg-build-with-cargo AS rust-extensions-build-pgrx12
FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx12
ARG PG_VERSION
RUN cargo install --locked --version 0.12.9 cargo-pgrx && \
@@ -1142,7 +1094,7 @@ USER root
# and eventually get merged with `rust-extensions-build`
#
#########################################################################################
FROM pg-build-with-cargo AS rust-extensions-build-pgrx14
FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx14
ARG PG_VERSION
RUN cargo install --locked --version 0.14.1 cargo-pgrx && \
@@ -1159,12 +1111,10 @@ USER root
FROM build-deps AS pgrag-src
ARG PG_VERSION
WORKDIR /ext-src
COPY compute/patches/onnxruntime.patch .
WORKDIR /ext-src
RUN wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \
mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
patch -p1 < /ext-src/onnxruntime.patch && \
echo "#nothing to test here" > neon-test.sh
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.1.2.tar.gz -O pgrag.tar.gz && \
@@ -1179,7 +1129,7 @@ COPY --from=pgrag-src /ext-src/ /ext-src/
# Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise
WORKDIR /ext-src/onnxruntime-src
RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
python3 python3-pip python3-venv && \
python3 python3-pip python3-venv protobuf-compiler && \
apt clean && rm -rf /var/lib/apt/lists/* && \
python3 -m venv venv && \
. venv/bin/activate && \
@@ -1198,14 +1148,14 @@ RUN cd exts/rag && \
RUN cd exts/rag_bge_small_en_v15 && \
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
REMOTE_ONNX_URL=http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/pgrag-data/bge_small_en_v15.onnx \
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
cargo pgrx install --release --features remote_onnx && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control
RUN cd exts/rag_jina_reranker_v1_tiny_en && \
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
REMOTE_ONNX_URL=http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/pgrag-data/jina_reranker_v1_tiny_en.onnx \
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
cargo pgrx install --release --features remote_onnx && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_jina_reranker_v1_tiny_en.control
@@ -1574,20 +1524,20 @@ ARG PG_VERSION
WORKDIR /ext-src
RUN case "${PG_VERSION}" in \
"v14") \
export PGAUDIT_VERSION=1.6.3 \
export PGAUDIT_CHECKSUM=37a8f5a7cc8d9188e536d15cf0fdc457fcdab2547caedb54442c37f124110919 \
export PGAUDIT_VERSION=1.6.2 \
export PGAUDIT_CHECKSUM=1f350d70a0cbf488c0f2b485e3a5c9b11f78ad9e3cbb95ef6904afa1eb3187eb \
;; \
"v15") \
export PGAUDIT_VERSION=1.7.1 \
export PGAUDIT_CHECKSUM=e9c8e6e092d82b2f901d72555ce0fe7780552f35f8985573796cd7e64b09d4ec \
export PGAUDIT_VERSION=1.7.0 \
export PGAUDIT_CHECKSUM=8f4a73e451c88c567e516e6cba7dc1e23bc91686bb6f1f77f8f3126d428a8bd8 \
;; \
"v16") \
export PGAUDIT_VERSION=16.1 \
export PGAUDIT_CHECKSUM=3bae908ab70ba0c6f51224009dbcfff1a97bd6104c6273297a64292e1b921fee \
export PGAUDIT_VERSION=16.0 \
export PGAUDIT_CHECKSUM=d53ef985f2d0b15ba25c512c4ce967dce07b94fd4422c95bd04c4c1a055fe738 \
;; \
"v17") \
export PGAUDIT_VERSION=17.1 \
export PGAUDIT_CHECKSUM=9c5f37504d393486cc75d2ced83f75f5899be64fa85f689d6babb833b4361e6c \
export PGAUDIT_VERSION=17.0 \
export PGAUDIT_CHECKSUM=7d0d08d030275d525f36cd48b38c6455f1023da863385badff0cec44965bfd8c \
;; \
*) \
echo "pgaudit is not supported on this PostgreSQL version" && exit 1;; \
@@ -1638,7 +1588,18 @@ FROM pg-build AS neon-ext-build
ARG PG_VERSION
COPY pgxn/ pgxn/
RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute
RUN make -j $(getconf _NPROCESSORS_ONLN) \
-C pgxn/neon \
-s install && \
make -j $(getconf _NPROCESSORS_ONLN) \
-C pgxn/neon_utils \
-s install && \
make -j $(getconf _NPROCESSORS_ONLN) \
-C pgxn/neon_test_utils \
-s install && \
make -j $(getconf _NPROCESSORS_ONLN) \
-C pgxn/neon_rmgr \
-s install
#########################################################################################
#
@@ -1687,7 +1648,6 @@ COPY --from=pg_jsonschema-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_graphql-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_tiktoken-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=hypopg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=online_advisor-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_hashids-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=rum-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pgtap-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -1728,7 +1688,7 @@ FROM extensions-${EXTENSIONS} AS neon-pg-ext-build
# Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries
#
#########################################################################################
FROM build-deps-with-cargo AS compute-tools
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
ARG BUILD_TAG
ENV BUILD_TAG=$BUILD_TAG
@@ -1738,7 +1698,7 @@ COPY --chown=nonroot . .
RUN --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/registry \
--mount=type=cache,uid=1000,target=/home/nonroot/.cargo/git \
--mount=type=cache,uid=1000,target=/home/nonroot/target \
cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy && \
mold -run cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy && \
mkdir target-bin && \
cp target/release-line-debug-size-lto/compute_ctl \
target/release-line-debug-size-lto/fast_import \
@@ -1791,17 +1751,17 @@ ARG TARGETARCH
RUN if [ "$TARGETARCH" = "amd64" ]; then\
postgres_exporter_sha256='59aa4a7bb0f7d361f5e05732f5ed8c03cc08f78449cef5856eadec33a627694b';\
pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\
sql_exporter_sha256='9a41127a493e8bfebfe692bf78c7ed2872a58a3f961ee534d1b0da9ae584aaab';\
sql_exporter_sha256='38e439732bbf6e28ca4a94d7bc3686d3fa1abdb0050773d5617a9efdb9e64d08';\
else\
postgres_exporter_sha256='d1dedea97f56c6d965837bfd1fbb3e35a3b4a4556f8cccee8bd513d8ee086124';\
pgbouncer_exporter_sha256='217c4afd7e6492ae904055bc14fe603552cf9bac458c063407e991d68c519da3';\
sql_exporter_sha256='530e6afc77c043497ed965532c4c9dfa873bc2a4f0b3047fad367715c0081d6a';\
sql_exporter_sha256='11918b00be6e2c3a67564adfdb2414fdcbb15a5db76ea17d1d1a944237a893c6';\
fi\
&& curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.17.1/postgres_exporter-0.17.1.linux-${TARGETARCH}.tar.gz\
| tar xzf - --strip-components=1 -C.\
&& curl -sL https://github.com/prometheus-community/pgbouncer_exporter/releases/download/v0.10.2/pgbouncer_exporter-0.10.2.linux-${TARGETARCH}.tar.gz\
| tar xzf - --strip-components=1 -C.\
&& curl -sL https://github.com/burningalchemist/sql_exporter/releases/download/0.17.3/sql_exporter-0.17.3.linux-${TARGETARCH}.tar.gz\
&& curl -sL https://github.com/burningalchemist/sql_exporter/releases/download/0.17.0/sql_exporter-0.17.0.linux-${TARGETARCH}.tar.gz\
| tar xzf - --strip-components=1 -C.\
&& echo "${postgres_exporter_sha256} postgres_exporter" | sha256sum -c -\
&& echo "${pgbouncer_exporter_sha256} pgbouncer_exporter" | sha256sum -c -\
@@ -1832,11 +1792,10 @@ RUN rm /usr/local/pgsql/lib/lib*.a
# Preprocess the sql_exporter configuration files
#
#########################################################################################
FROM build-deps AS sql_exporter_preprocessor
FROM $REPOSITORY/$IMAGE:$TAG AS sql_exporter_preprocessor
ARG PG_VERSION
USER nonroot
WORKDIR /home/nonroot
COPY --chown=nonroot compute compute
@@ -1850,27 +1809,12 @@ RUN make PG_VERSION="${PG_VERSION:?}" -C compute
FROM pg-build AS extension-tests
ARG PG_VERSION
# This is required for the PostGIS test
RUN apt-get update && case $DEBIAN_VERSION in \
bullseye) \
apt-get install -y libproj19 libgdal28 time; \
;; \
bookworm) \
apt-get install -y libgdal32 libproj25 time; \
;; \
*) \
echo "Unknown Debian version ${DEBIAN_VERSION}" && exit 1 \
;; \
esac
COPY docker-compose/ext-src/ /ext-src/
COPY --from=pg-build /postgres /postgres
COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=postgis-build /ext-src/postgis-src /ext-src/postgis-src
COPY --from=postgis-build /sfcgal/* /usr
#COPY --from=postgis-src /ext-src/ /ext-src/
COPY --from=plv8-src /ext-src/ /ext-src/
COPY --from=h3-pg-src /ext-src/h3-pg-src /ext-src/h3-pg-src
#COPY --from=h3-pg-src /ext-src/ /ext-src/
COPY --from=postgresql-unit-src /ext-src/ /ext-src/
COPY --from=pgvector-src /ext-src/ /ext-src/
COPY --from=pgjwt-src /ext-src/ /ext-src/
@@ -1879,7 +1823,6 @@ COPY --from=pgjwt-src /ext-src/ /ext-src/
COPY --from=pg_graphql-src /ext-src/ /ext-src/
#COPY --from=pg_tiktoken-src /ext-src/ /ext-src/
COPY --from=hypopg-src /ext-src/ /ext-src/
COPY --from=online_advisor-src /ext-src/ /ext-src/
COPY --from=pg_hashids-src /ext-src/ /ext-src/
COPY --from=rum-src /ext-src/ /ext-src/
COPY --from=pgtap-src /ext-src/ /ext-src/
@@ -1909,7 +1852,6 @@ COPY compute/patches/pg_repack.patch /ext-src
RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /ext-src/pg_repack.patch
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
RUN echo /usr/local/pgsql/lib > /etc/ld.so.conf.d/00-neon.conf && /sbin/ldconfig
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq \
&& apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
ENV PATH=/usr/local/pgsql/bin:$PATH

View File

@@ -21,8 +21,6 @@ unix_socket_dir=/tmp/
unix_socket_mode=0777
; required for pgbouncer_exporter
ignore_startup_parameters=extra_float_digits
; pidfile for graceful termination
pidfile=/tmp/pgbouncer.pid
;; Disable connection logging. It produces a lot of logs that no one looks at,
;; and we can get similar log entries from the proxy too. We had incidents in

View File

@@ -1,209 +0,0 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Neon Compute Manifest Schema",
"description": "Schema for Neon compute node configuration manifest",
"type": "object",
"properties": {
"pg_settings": {
"type": "object",
"properties": {
"common": {
"type": "object",
"properties": {
"client_connection_check_interval": {
"type": "string",
"description": "Check for client disconnection interval in milliseconds"
},
"effective_io_concurrency": {
"type": "string",
"description": "Effective IO concurrency setting"
},
"fsync": {
"type": "string",
"enum": ["on", "off"],
"description": "Whether to force fsync to disk"
},
"hot_standby": {
"type": "string",
"enum": ["on", "off"],
"description": "Whether hot standby is enabled"
},
"idle_in_transaction_session_timeout": {
"type": "string",
"description": "Timeout for idle transactions in milliseconds"
},
"listen_addresses": {
"type": "string",
"description": "Addresses to listen on"
},
"log_connections": {
"type": "string",
"enum": ["on", "off"],
"description": "Whether to log connections"
},
"log_disconnections": {
"type": "string",
"enum": ["on", "off"],
"description": "Whether to log disconnections"
},
"log_temp_files": {
"type": "string",
"description": "Size threshold for logging temporary files in KB"
},
"log_error_verbosity": {
"type": "string",
"enum": ["terse", "verbose", "default"],
"description": "Error logging verbosity level"
},
"log_min_error_statement": {
"type": "string",
"description": "Minimum error level for statement logging"
},
"maintenance_io_concurrency": {
"type": "string",
"description": "Maintenance IO concurrency setting"
},
"max_connections": {
"type": "string",
"description": "Maximum number of connections"
},
"max_replication_flush_lag": {
"type": "string",
"description": "Maximum replication flush lag"
},
"max_replication_slots": {
"type": "string",
"description": "Maximum number of replication slots"
},
"max_replication_write_lag": {
"type": "string",
"description": "Maximum replication write lag"
},
"max_wal_senders": {
"type": "string",
"description": "Maximum number of WAL senders"
},
"max_wal_size": {
"type": "string",
"description": "Maximum WAL size"
},
"neon.unstable_extensions": {
"type": "string",
"description": "List of unstable extensions"
},
"neon.protocol_version": {
"type": "string",
"description": "Neon protocol version"
},
"password_encryption": {
"type": "string",
"description": "Password encryption method"
},
"restart_after_crash": {
"type": "string",
"enum": ["on", "off"],
"description": "Whether to restart after crash"
},
"superuser_reserved_connections": {
"type": "string",
"description": "Number of reserved connections for superuser"
},
"synchronous_standby_names": {
"type": "string",
"description": "Names of synchronous standby servers"
},
"wal_keep_size": {
"type": "string",
"description": "WAL keep size"
},
"wal_level": {
"type": "string",
"description": "WAL level"
},
"wal_log_hints": {
"type": "string",
"enum": ["on", "off"],
"description": "Whether to log hints in WAL"
},
"wal_sender_timeout": {
"type": "string",
"description": "WAL sender timeout in milliseconds"
}
},
"required": [
"client_connection_check_interval",
"effective_io_concurrency",
"fsync",
"hot_standby",
"idle_in_transaction_session_timeout",
"listen_addresses",
"log_connections",
"log_disconnections",
"log_temp_files",
"log_error_verbosity",
"log_min_error_statement",
"maintenance_io_concurrency",
"max_connections",
"max_replication_flush_lag",
"max_replication_slots",
"max_replication_write_lag",
"max_wal_senders",
"max_wal_size",
"neon.unstable_extensions",
"neon.protocol_version",
"password_encryption",
"restart_after_crash",
"superuser_reserved_connections",
"synchronous_standby_names",
"wal_keep_size",
"wal_level",
"wal_log_hints",
"wal_sender_timeout"
]
},
"replica": {
"type": "object",
"properties": {
"hot_standby": {
"type": "string",
"enum": ["on", "off"],
"description": "Whether hot standby is enabled for replicas"
}
},
"required": ["hot_standby"]
},
"per_version": {
"type": "object",
"patternProperties": {
"^1[4-7]$": {
"type": "object",
"properties": {
"common": {
"type": "object",
"properties": {
"io_combine_limit": {
"type": "string",
"description": "IO combine limit"
}
}
},
"replica": {
"type": "object",
"properties": {
"recovery_prefetch": {
"type": "string",
"enum": ["on", "off"],
"description": "Whether to enable recovery prefetch for PostgreSQL replicas"
}
}
}
}
}
}
}
},
"required": ["common", "replica", "per_version"]
}
},
"required": ["pg_settings"]
}

View File

@@ -1,121 +0,0 @@
pg_settings:
# Common settings for primaries and replicas of all versions.
common:
# Check for client disconnection every 1 minute. By default, Postgres will detect the
# loss of the connection only at the next interaction with the socket, when it waits
# for, receives or sends data, so it will likely waste resources till the end of the
# query execution. There should be no drawbacks in setting this for everyone, so enable
# it by default. If anyone will complain, we can allow editing it.
# https://www.postgresql.org/docs/16/runtime-config-connection.html#GUC-CLIENT-CONNECTION-CHECK-INTERVAL
client_connection_check_interval: "60000" # 1 minute
# ---- IO ----
effective_io_concurrency: "20"
maintenance_io_concurrency: "100"
fsync: "off"
hot_standby: "off"
# We allow users to change this if needed, but by default we
# just don't want to see long-lasting idle transactions, as they
# prevent activity monitor from suspending projects.
idle_in_transaction_session_timeout: "300000" # 5 minutes
listen_addresses: "*"
# --- LOGGING ---- helps investigations
log_connections: "on"
log_disconnections: "on"
# 1GB, unit is KB
log_temp_files: "1048576"
# Disable dumping customer data to logs, both to increase data privacy
# and to reduce the amount the logs.
log_error_verbosity: "terse"
log_min_error_statement: "panic"
max_connections: "100"
# --- WAL ---
# - flush lag is the max amount of WAL that has been generated but not yet stored
# to disk in the page server. A smaller value means less delay after a pageserver
# restart, but if you set it too small you might again need to slow down writes if the
# pageserver cannot flush incoming WAL to disk fast enough. This must be larger
# than the pageserver's checkpoint interval, currently 1 GB! Otherwise you get a
# a deadlock where the compute node refuses to generate more WAL before the
# old WAL has been uploaded to S3, but the pageserver is waiting for more WAL
# to be generated before it is uploaded to S3.
max_replication_flush_lag: "10GB"
max_replication_slots: "10"
# Backpressure configuration:
# - write lag is the max amount of WAL that has been generated by Postgres but not yet
# processed by the page server. Making this smaller reduces the worst case latency
# of a GetPage request, if you request a page that was recently modified. On the other
# hand, if this is too small, the compute node might need to wait on a write if there is a
# hiccup in the network or page server so that the page server has temporarily fallen
# behind.
#
# Previously it was set to 500 MB, but it caused compute being unresponsive under load
# https://github.com/neondatabase/neon/issues/2028
max_replication_write_lag: "500MB"
max_wal_senders: "10"
# A Postgres checkpoint is cheap in storage, as doesn't involve any significant amount
# of real I/O. Only the SLRU buffers and some other small files are flushed to disk.
# However, as long as we have full_page_writes=on, page updates after a checkpoint
# include full-page images which bloats the WAL. So may want to bump max_wal_size to
# reduce the WAL bloating, but at the same it will increase pg_wal directory size on
# compute and can lead to out of disk error on k8s nodes.
max_wal_size: "1024"
wal_keep_size: "0"
wal_level: "replica"
# Reduce amount of WAL generated by default.
wal_log_hints: "off"
# - without wal_sender_timeout set we don't get feedback messages,
# required for backpressure.
wal_sender_timeout: "10000"
# We have some experimental extensions, which we don't want users to install unconsciously.
# To install them, users would need to set the `neon.allow_unstable_extensions` setting.
# There are two of them currently:
# - `pgrag` - https://github.com/neondatabase-labs/pgrag - extension is actually called just `rag`,
# and two dependencies:
# - `rag_bge_small_en_v15`
# - `rag_jina_reranker_v1_tiny_en`
# - `pg_mooncake` - https://github.com/Mooncake-Labs/pg_mooncake/
neon.unstable_extensions: "rag,rag_bge_small_en_v15,rag_jina_reranker_v1_tiny_en,pg_mooncake,anon"
neon.protocol_version: "3"
password_encryption: "scram-sha-256"
# This is important to prevent Postgres from trying to perform
# a local WAL redo after backend crash. It should exit and let
# the systemd or k8s to do a fresh startup with compute_ctl.
restart_after_crash: "off"
# By default 3. We have the following persistent connections in the VM:
# * compute_activity_monitor (from compute_ctl)
# * postgres-exporter (metrics collector; it has 2 connections)
# * sql_exporter (metrics collector; we have 2 instances [1 for us & users; 1 for autoscaling])
# * vm-monitor (to query & change file cache size)
# i.e. total of 6. Let's reserve 7, so there's still at least one left over.
superuser_reserved_connections: "7"
synchronous_standby_names: "walproposer"
replica:
hot_standby: "on"
per_version:
17:
common:
# PostgreSQL 17 has a new IO system called "read stream", which can combine IOs up to some
# size. It still has some issues with readahead, though, so we default to disabled/
# "no combining of IOs" to make sure we get the maximum prefetch depth.
# See also: https://github.com/neondatabase/neon/pull/9860
io_combine_limit: "1"
replica:
# prefetching of blocks referenced in WAL doesn't make sense for us
# Neon hot standby ignores pages that are not in the shared_buffers
recovery_prefetch: "off"
16:
common: {}
replica:
# prefetching of blocks referenced in WAL doesn't make sense for us
# Neon hot standby ignores pages that are not in the shared_buffers
recovery_prefetch: "off"
15:
common: {}
replica:
# prefetching of blocks referenced in WAL doesn't make sense for us
# Neon hot standby ignores pages that are not in the shared_buffers
recovery_prefetch: "off"
14:
common: {}
replica: {}

View File

@@ -1,37 +0,0 @@
{
"name": "neon-compute",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "neon-compute",
"dependencies": {
"@sourcemeta/jsonschema": "9.3.4"
}
},
"node_modules/@sourcemeta/jsonschema": {
"version": "9.3.4",
"resolved": "https://registry.npmjs.org/@sourcemeta/jsonschema/-/jsonschema-9.3.4.tgz",
"integrity": "sha512-hkujfkZAIGXUs4U//We9faZW8LZ4/H9LqagRYsFSulH/VLcKPNhZyCTGg7AhORuzm27zqENvKpnX4g2FzudYFw==",
"cpu": [
"x64",
"arm64"
],
"license": "AGPL-3.0",
"os": [
"darwin",
"linux",
"win32"
],
"bin": {
"jsonschema": "cli.js"
},
"engines": {
"node": ">=16"
},
"funding": {
"url": "https://github.com/sponsors/sourcemeta"
}
}
}
}

View File

@@ -1,7 +0,0 @@
{
"name": "neon-compute",
"private": true,
"dependencies": {
"@sourcemeta/jsonschema": "9.3.4"
}
}

View File

@@ -1,15 +0,0 @@
diff --git a/cmake/deps.txt b/cmake/deps.txt
index d213b09034..229de2ebf0 100644
--- a/cmake/deps.txt
+++ b/cmake/deps.txt
@@ -22,7 +22,9 @@ dlpack;https://github.com/dmlc/dlpack/archive/refs/tags/v0.6.zip;4d565dd2e5b3132
# it contains changes on top of 3.4.0 which are required to fix build issues.
# Until the 3.4.1 release this is the best option we have.
# Issue link: https://gitlab.com/libeigen/eigen/-/issues/2744
-eigen;https://gitlab.com/libeigen/eigen/-/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;be8be39fdbc6e60e94fa7870b280707069b5b81a
+# Moved to github mirror to avoid gitlab issues.Add commentMore actions
+# Issue link: https://github.com/bazelbuild/bazel-central-registry/issues/4355
+eigen;https://github.com/eigen-mirror/eigen/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;61418a349000ba7744a3ad03cf5071f22ebf860a
flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v23.5.26.zip;59422c3b5e573dd192fead2834d25951f1c1670c
fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494
fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1

View File

@@ -7,7 +7,7 @@ index 255e616..1c6edb7 100644
RelationGetRelationName(index));
+#ifdef NEON_SMGR
+ smgr_start_unlogged_build(RelationGetSmgr(index));
+ smgr_start_unlogged_build(index->rd_smgr);
+#endif
+
initRumState(&buildstate.rumstate, index);
@@ -18,7 +18,7 @@ index 255e616..1c6edb7 100644
rumUpdateStats(index, &buildstate.buildStats, buildstate.rumstate.isBuild);
+#ifdef NEON_SMGR
+ smgr_finish_unlogged_build_phase_1(RelationGetSmgr(index));
+ smgr_finish_unlogged_build_phase_1(index->rd_smgr);
+#endif
+
/*
@@ -29,7 +29,7 @@ index 255e616..1c6edb7 100644
}
+#ifdef NEON_SMGR
+ smgr_end_unlogged_build(RelationGetSmgr(index));
+ smgr_end_unlogged_build(index->rd_smgr);
+#endif
+
/*

View File

@@ -26,7 +26,7 @@ commands:
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
- name: pgbouncer-exporter
user: postgres
sysvInitAction: respawn
@@ -59,7 +59,7 @@ files:
# the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
# resolve host" log messages that they generate.
Defaults !fqdn
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
# regardless of hostname (ALL)

View File

@@ -26,7 +26,7 @@ commands:
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
- name: pgbouncer-exporter
user: postgres
sysvInitAction: respawn
@@ -59,7 +59,7 @@ files:
# the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
# resolve host" log messages that they generate.
Defaults !fqdn
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
# regardless of hostname (ALL)

View File

@@ -38,7 +38,6 @@ once_cell.workspace = true
opentelemetry.workspace = true
opentelemetry_sdk.workspace = true
p256 = { version = "0.13", features = ["pem"] }
pageserver_page_api.workspace = true
postgres.workspace = true
regex.workspace = true
reqwest = { workspace = true, features = ["json"] }
@@ -54,7 +53,6 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tokio-postgres.workspace = true
tokio-util.workspace = true
tokio-stream.workspace = true
tonic.workspace = true
tower-otel.workspace = true
tracing.workspace = true
tracing-opentelemetry.workspace = true
@@ -66,7 +64,6 @@ uuid.workspace = true
walkdir.workspace = true
x509-cert.workspace = true
postgres_versioninfo.workspace = true
postgres_initdb.workspace = true
compute_api.workspace = true
utils.workspace = true

View File

@@ -36,13 +36,11 @@
use std::ffi::OsString;
use std::fs::File;
use std::process::exit;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
use std::sync::mpsc;
use std::thread;
use std::time::Duration;
use anyhow::{Context, Result, bail};
use anyhow::{Context, Result};
use clap::Parser;
use compute_api::responses::ComputeConfig;
use compute_tools::compute::{
@@ -59,15 +57,31 @@ use tracing::{error, info};
use url::Url;
use utils::failpoint_support;
#[derive(Debug, Parser)]
// Compatibility hack: if the control plane specified any remote-ext-config
// use the default value for extension storage proxy gateway.
// Remove this once the control plane is updated to pass the gateway URL
fn parse_remote_ext_base_url(arg: &str) -> Result<String> {
const FALLBACK_PG_EXT_GATEWAY_BASE_URL: &str =
"http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local";
Ok(if arg.starts_with("http") {
arg
} else {
FALLBACK_PG_EXT_GATEWAY_BASE_URL
}
.to_owned())
}
#[derive(Parser)]
#[command(rename_all = "kebab-case")]
struct Cli {
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
pub pgbin: String,
/// The base URL for the remote extension storage proxy gateway.
#[arg(short = 'r', long, value_parser = Self::parse_remote_ext_base_url)]
pub remote_ext_base_url: Option<Url>,
/// Should be in the form of `http(s)://<gateway-hostname>[:<port>]`.
#[arg(short = 'r', long, value_parser = parse_remote_ext_base_url, alias = "remote-ext-config")]
pub remote_ext_base_url: Option<String>,
/// The port to bind the external listening HTTP server to. Clients running
/// outside the compute will talk to the compute through this port. Keep
@@ -122,33 +136,6 @@ struct Cli {
requires = "compute-id"
)]
pub control_plane_uri: Option<String>,
/// Interval in seconds for collecting installed extensions statistics
#[arg(long, default_value = "3600")]
pub installed_extensions_collection_interval: u64,
/// Run in development mode, skipping VM-specific operations like process termination
#[arg(long, action = clap::ArgAction::SetTrue)]
pub dev: bool,
}
impl Cli {
/// Parse a URL from an argument. By default, this isn't necessary, but we
/// want to do some sanity checking.
fn parse_remote_ext_base_url(value: &str) -> Result<Url> {
// Remove extra trailing slashes, and add one. We use Url::join() later
// when downloading remote extensions. If the base URL is something like
// http://example.com/pg-ext-s3-gateway, and join() is called with
// something like "xyz", the resulting URL is http://example.com/xyz.
let value = value.trim_end_matches('/').to_owned() + "/";
let url = Url::parse(&value)?;
if url.query_pairs().count() != 0 {
bail!("parameters detected in remote extensions base URL")
}
Ok(url)
}
}
fn main() -> Result<()> {
@@ -165,7 +152,7 @@ fn main() -> Result<()> {
.build()?;
let _rt_guard = runtime.enter();
runtime.block_on(init(cli.dev))?;
runtime.block_on(init())?;
// enable core dumping for all child processes
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
@@ -192,9 +179,6 @@ fn main() -> Result<()> {
cgroup: cli.cgroup,
#[cfg(target_os = "linux")]
vm_monitor_addr: cli.vm_monitor_addr,
installed_extensions_collection_interval: Arc::new(AtomicU64::new(
cli.installed_extensions_collection_interval,
)),
},
config,
)?;
@@ -206,13 +190,13 @@ fn main() -> Result<()> {
deinit_and_exit(exit_code);
}
async fn init(dev_mode: bool) -> Result<()> {
async fn init() -> Result<()> {
init_tracing_and_logging(DEFAULT_LOG_LEVEL).await?;
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
thread::spawn(move || {
for sig in signals.forever() {
handle_exit_signal(sig, dev_mode);
handle_exit_signal(sig);
}
});
@@ -271,16 +255,15 @@ fn deinit_and_exit(exit_code: Option<i32>) -> ! {
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
/// to prevent leakage. TODO: it is better to convert compute_ctl to async and
/// wait for termination which would be easy then.
fn handle_exit_signal(sig: i32, dev_mode: bool) {
fn handle_exit_signal(sig: i32) {
info!("received {sig} termination signal");
forward_termination_signal(dev_mode);
forward_termination_signal();
exit(1);
}
#[cfg(test)]
mod test {
use clap::{CommandFactory, Parser};
use url::Url;
use clap::CommandFactory;
use super::Cli;
@@ -290,41 +273,16 @@ mod test {
}
#[test]
fn verify_remote_ext_base_url() {
let cli = Cli::parse_from([
"compute_ctl",
"--pgdata=test",
"--connstr=test",
"--compute-id=test",
"--remote-ext-base-url",
"https://example.com/subpath",
]);
assert_eq!(
cli.remote_ext_base_url.unwrap(),
Url::parse("https://example.com/subpath/").unwrap()
);
fn parse_pg_ext_gateway_base_url() {
let arg = "http://pg-ext-s3-gateway2";
let result = super::parse_remote_ext_base_url(arg).unwrap();
assert_eq!(result, arg);
let cli = Cli::parse_from([
"compute_ctl",
"--pgdata=test",
"--connstr=test",
"--compute-id=test",
"--remote-ext-base-url",
"https://example.com//",
]);
let arg = "pg-ext-s3-gateway";
let result = super::parse_remote_ext_base_url(arg).unwrap();
assert_eq!(
cli.remote_ext_base_url.unwrap(),
Url::parse("https://example.com").unwrap()
result,
"http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local"
);
Cli::try_parse_from([
"compute_ctl",
"--pgdata=test",
"--connstr=test",
"--compute-id=test",
"--remote-ext-base-url",
"https://example.com?hello=world",
])
.expect_err("URL parameters are not allowed");
}
}

View File

@@ -29,7 +29,7 @@ use anyhow::{Context, bail};
use aws_config::BehaviorVersion;
use camino::{Utf8Path, Utf8PathBuf};
use clap::{Parser, Subcommand};
use compute_tools::extension_server::get_pg_version;
use compute_tools::extension_server::{PostgresMajorVersion, get_pg_version};
use nix::unistd::Pid;
use std::ops::Not;
use tracing::{Instrument, error, info, info_span, warn};
@@ -179,8 +179,12 @@ impl PostgresProcess {
.await
.context("create pgdata directory")?;
let pg_version = get_pg_version(self.pgbin.as_ref());
let pg_version = match get_pg_version(self.pgbin.as_ref()) {
PostgresMajorVersion::V14 => 14,
PostgresMajorVersion::V15 => 15,
PostgresMajorVersion::V16 => 16,
PostgresMajorVersion::V17 => 17,
};
postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
superuser: initdb_user,
locale: DEFAULT_LOCALE, // XXX: this shouldn't be hard-coded,
@@ -335,8 +339,6 @@ async fn run_dump_restore(
destination_connstring: String,
) -> Result<(), anyhow::Error> {
let dumpdir = workdir.join("dumpdir");
let num_jobs = num_cpus::get().to_string();
info!("using {num_jobs} jobs for dump/restore");
let common_args = [
// schema mapping (prob suffices to specify them on one side)
@@ -352,7 +354,7 @@ async fn run_dump_restore(
"directory".to_string(),
// concurrency
"--jobs".to_string(),
num_jobs,
num_cpus::get().to_string(),
// progress updates
"--verbose".to_string(),
];
@@ -482,8 +484,10 @@ async fn cmd_pgdata(
};
let superuser = "cloud_admin";
let destination_connstring =
format!("host=localhost port={pg_port} user={superuser} dbname=neondb");
let destination_connstring = format!(
"host=localhost port={} user={} dbname=neondb",
pg_port, superuser
);
let pgdata_dir = workdir.join("pgdata");
let mut proc = PostgresProcess::new(pgdata_dir.clone(), pg_bin_dir.clone(), pg_lib_dir.clone());

View File

@@ -69,7 +69,7 @@ impl clap::builder::TypedValueParser for S3Uri {
S3Uri::from_str(value_str).map_err(|e| {
clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
format!("Failed to parse S3 URI: {e}"),
format!("Failed to parse S3 URI: {}", e),
)
})
}

View File

@@ -22,7 +22,7 @@ pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<Cat
spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {e}");
eprintln!("connection error: {}", e);
}
});
@@ -119,7 +119,7 @@ pub async fn get_database_schema(
_ => {
let mut lines = stderr_reader.lines();
if let Some(line) = lines.next_line().await? {
if line.contains(&format!("FATAL: database \"{dbname}\" does not exist")) {
if line.contains(&format!("FATAL: database \"{}\" does not exist", dbname)) {
return Err(SchemaDumpError::DatabaseDoesNotExist);
}
warn!("pg_dump stderr: {}", line)

View File

@@ -3,10 +3,10 @@ use chrono::{DateTime, Utc};
use compute_api::privilege::Privilege;
use compute_api::responses::{
ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus, LfcOffloadState,
LfcPrewarmState, TlsConfig,
LfcPrewarmState,
};
use compute_api::spec::{
ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverProtocol, PgIdent,
ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent,
};
use futures::StreamExt;
use futures::future::join_all;
@@ -15,28 +15,25 @@ use itertools::Itertools;
use nix::sys::signal::{Signal, kill};
use nix::unistd::Pid;
use once_cell::sync::Lazy;
use pageserver_page_api::{self as page_api, BaseBackupCompression};
use postgres;
use postgres::NoTls;
use postgres::error::SqlState;
use remote_storage::{DownloadError, RemotePath};
use std::collections::{HashMap, HashSet};
use std::net::SocketAddr;
use std::os::unix::fs::{PermissionsExt, symlink};
use std::path::Path;
use std::process::{Command, Stdio};
use std::str::FromStr;
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, Condvar, Mutex, RwLock};
use std::time::{Duration, Instant};
use std::{env, fs};
use tokio::spawn;
use tracing::{Instrument, debug, error, info, instrument, warn};
use url::Url;
use utils::id::{TenantId, TimelineId};
use utils::lsn::Lsn;
use utils::measured_stream::MeasuredReader;
use utils::pid_file;
use utils::shard::{ShardCount, ShardIndex, ShardNumber};
use crate::configurator::launch_configurator;
use crate::disk_quota::set_disk_quota;
@@ -46,7 +43,6 @@ use crate::lsn_lease::launch_lsn_lease_bg_task_for_static;
use crate::metrics::COMPUTE_CTL_UP;
use crate::monitor::launch_monitor;
use crate::pg_helpers::*;
use crate::pgbouncer::*;
use crate::rsyslog::{
PostgresLogsRsyslogConfig, configure_audit_rsyslog, configure_postgres_logs_export,
launch_pgaudit_gc,
@@ -70,7 +66,6 @@ pub static BUILD_TAG: Lazy<String> = Lazy::new(|| {
.unwrap_or(BUILD_TAG_DEFAULT)
.to_string()
});
const DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL: u64 = 3600;
/// Static configuration params that don't change after startup. These mostly
/// come from the CLI args, or are derived from them.
@@ -101,10 +96,7 @@ pub struct ComputeNodeParams {
pub internal_http_port: u16,
/// the address of extension storage proxy gateway
pub remote_ext_base_url: Option<Url>,
/// Interval for installed extensions collection
pub installed_extensions_collection_interval: Arc<AtomicU64>,
pub remote_ext_base_url: Option<String>,
}
/// Compute node info shared across several `compute_ctl` threads.
@@ -127,9 +119,6 @@ pub struct ComputeNode {
// key: ext_archive_name, value: started download time, download_completed?
pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
pub compute_ctl_config: ComputeCtlConfig,
/// Handle to the extension stats collection task
extension_stats_task: Mutex<Option<tokio::task::JoinHandle<()>>>,
}
// store some metrics about download size that might impact startup time
@@ -168,10 +157,6 @@ pub struct ComputeState {
pub lfc_prewarm_state: LfcPrewarmState,
pub lfc_offload_state: LfcOffloadState,
/// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if
/// mode == ComputeMode::Primary. None otherwise
pub terminate_flush_lsn: Option<Lsn>,
pub metrics: ComputeMetrics,
}
@@ -187,7 +172,6 @@ impl ComputeState {
metrics: ComputeMetrics::default(),
lfc_prewarm_state: LfcPrewarmState::default(),
lfc_offload_state: LfcOffloadState::default(),
terminate_flush_lsn: None,
}
}
@@ -223,50 +207,10 @@ pub struct ParsedSpec {
pub pageserver_connstr: String,
pub safekeeper_connstrings: Vec<String>,
pub storage_auth_token: Option<String>,
/// k8s dns name and port
pub endpoint_storage_addr: Option<String>,
pub endpoint_storage_addr: Option<SocketAddr>,
pub endpoint_storage_token: Option<String>,
}
impl ParsedSpec {
pub fn validate(&self) -> Result<(), String> {
// Only Primary nodes are using safekeeper_connstrings, and at the moment
// this method only validates that part of the specs.
if self.spec.mode != ComputeMode::Primary {
return Ok(());
}
// While it seems like a good idea to check for an odd number of entries in
// the safekeepers connection string, changes to the list of safekeepers might
// incur appending a new server to a list of 3, in which case a list of 4
// entries is okay in production.
//
// Still we want unique entries, and at least one entry in the vector
if self.safekeeper_connstrings.is_empty() {
return Err(String::from("safekeeper_connstrings is empty"));
}
// check for uniqueness of the connection strings in the set
let mut connstrings = self.safekeeper_connstrings.clone();
connstrings.sort();
let mut previous = &connstrings[0];
for current in connstrings.iter().skip(1) {
// duplicate entry?
if current == previous {
return Err(format!(
"duplicate entry in safekeeper_connstrings: {current}!",
));
}
previous = current;
}
Ok(())
}
}
impl TryFrom<ComputeSpec> for ParsedSpec {
type Error = String;
fn try_from(spec: ComputeSpec) -> Result<Self, String> {
@@ -296,7 +240,6 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
} else {
spec.safekeeper_connstrings.clone()
};
let storage_auth_token = spec.storage_auth_token.clone();
let tenant_id: TenantId = if let Some(tenant_id) = spec.tenant_id {
tenant_id
@@ -319,16 +262,19 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
.or(Err("invalid timeline id"))?
};
let endpoint_storage_addr: Option<String> = spec
let endpoint_storage_addr: Option<SocketAddr> = spec
.endpoint_storage_addr
.clone()
.or_else(|| spec.cluster.settings.find("neon.endpoint_storage_addr"));
.or_else(|| spec.cluster.settings.find("neon.endpoint_storage_addr"))
.unwrap_or_default()
.parse()
.ok();
let endpoint_storage_token = spec
.endpoint_storage_token
.clone()
.or_else(|| spec.cluster.settings.find("neon.endpoint_storage_token"));
let res = ParsedSpec {
Ok(ParsedSpec {
spec,
pageserver_connstr,
safekeeper_connstrings,
@@ -337,11 +283,7 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
timeline_id,
endpoint_storage_addr,
endpoint_storage_token,
};
// Now check validity of the parsed specification
res.validate()?;
Ok(res)
})
}
}
@@ -408,11 +350,14 @@ impl ComputeNode {
// that can affect `compute_ctl` and prevent it from properly configuring the database schema.
// Unset them via connection string options before connecting to the database.
// N.B. keep it in sync with `ZENITH_OPTIONS` in `get_maintenance_client()`.
const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0 -c pgaudit.log=none";
//
// TODO(ololobus): we currently pass `-c default_transaction_read_only=off` from control plane
// as well. After rolling out this code, we can remove this parameter from control plane.
// In the meantime, double-passing is fine, the last value is applied.
// See: <https://github.com/neondatabase/cloud/blob/133dd8c4dbbba40edfbad475bf6a45073ca63faf/goapp/controlplane/internal/pkg/compute/provisioner/provisioner_common.go#L70>
const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
let options = match conn_conf.get_options() {
// Allow the control plane to override any options set by the
// compute
Some(options) => format!("{EXTRA_OPTIONS} {options}"),
Some(options) => format!("{} {}", options, EXTRA_OPTIONS),
None => EXTRA_OPTIONS.to_string(),
};
conn_conf.options(&options);
@@ -432,7 +377,6 @@ impl ComputeNode {
state_changed: Condvar::new(),
ext_download_progress: RwLock::new(HashMap::new()),
compute_ctl_config: config.compute_ctl_config,
extension_stats_task: Mutex::new(None),
})
}
@@ -448,7 +392,7 @@ impl ComputeNode {
// because QEMU will already have its memory allocated from the host, and
// the necessary binaries will already be cached.
if cli_spec.is_none() {
this.prewarm_postgres_vm_memory()?;
this.prewarm_postgres()?;
}
// Set the up metric with Empty status before starting the HTTP server.
@@ -520,9 +464,6 @@ impl ComputeNode {
None
};
// Terminate the extension stats collection task
this.terminate_extension_stats_task();
// Terminate the vm_monitor so it releases the file watcher on
// /sys/fs/cgroup/neon-postgres.
// Note: the vm-monitor only runs on linux because it requires cgroups.
@@ -544,21 +485,12 @@ impl ComputeNode {
// Reap the postgres process
delay_exit |= this.cleanup_after_postgres_exit()?;
// /terminate returns LSN. If we don't sleep at all, connection will break and we
// won't get result. If we sleep too much, tests will take significantly longer
// and Github Action run will error out
let sleep_duration = if delay_exit {
Duration::from_secs(30)
} else {
Duration::from_millis(300)
};
// If launch failed, keep serving HTTP requests for a while, so the cloud
// control plane can get the actual error.
if delay_exit {
info!("giving control plane 30s to collect the error before shutdown");
std::thread::sleep(Duration::from_secs(30));
}
std::thread::sleep(sleep_duration);
Ok(exit_code)
}
@@ -667,8 +599,6 @@ impl ComputeNode {
});
}
let tls_config = self.tls_config(&pspec.spec);
// If there are any remote extensions in shared_preload_libraries, start downloading them
if pspec.spec.remote_extensions.is_some() {
let (this, spec) = (self.clone(), pspec.spec.clone());
@@ -725,7 +655,7 @@ impl ComputeNode {
info!("tuning pgbouncer");
let pgbouncer_settings = pgbouncer_settings.clone();
let tls_config = tls_config.clone();
let tls_config = self.compute_ctl_config.tls.clone();
// Spawn a background task to do the tuning,
// so that we don't block the main thread that starts Postgres.
@@ -744,10 +674,7 @@ impl ComputeNode {
// Spawn a background task to do the configuration,
// so that we don't block the main thread that starts Postgres.
let mut local_proxy = local_proxy.clone();
local_proxy.tls = tls_config.clone();
let local_proxy = local_proxy.clone();
let _handle = tokio::spawn(async move {
if let Err(err) = local_proxy::configure(&local_proxy) {
error!("error while configuring local_proxy: {err:?}");
@@ -768,18 +695,25 @@ impl ComputeNode {
let log_directory_path = Path::new(&self.params.pgdata).join("log");
let log_directory_path = log_directory_path.to_string_lossy().to_string();
// Add project_id,endpoint_id to identify the logs.
// Add project_id,endpoint_id tag to identify the logs.
//
// These ids are passed from cplane,
let endpoint_id = pspec.spec.endpoint_id.as_deref().unwrap_or("");
let project_id = pspec.spec.project_id.as_deref().unwrap_or("");
// for backwards compatibility (old computes that don't have them),
// we set them to None.
// TODO: Clean up this code when all computes have them.
let tag: Option<String> = match (
pspec.spec.project_id.as_deref(),
pspec.spec.endpoint_id.as_deref(),
) {
(Some(project_id), Some(endpoint_id)) => {
Some(format!("{project_id}/{endpoint_id}"))
}
(Some(project_id), None) => Some(format!("{project_id}/None")),
(None, Some(endpoint_id)) => Some(format!("None,{endpoint_id}")),
(None, None) => None,
};
configure_audit_rsyslog(
log_directory_path.clone(),
endpoint_id,
project_id,
&remote_endpoint,
)?;
configure_audit_rsyslog(log_directory_path.clone(), tag, &remote_endpoint)?;
// Launch a background task to clean up the audit logs
launch_pgaudit_gc(log_directory_path);
@@ -815,7 +749,17 @@ impl ComputeNode {
let conf = self.get_tokio_conn_conf(None);
tokio::task::spawn(async {
let _ = installed_extensions(conf).await;
let res = get_installed_extensions(conf).await;
match res {
Ok(extensions) => {
info!(
"[NEON_EXT_STAT] {}",
serde_json::to_string(&extensions)
.expect("failed to serialize extensions list")
);
}
Err(err) => error!("could not get installed extensions: {err:?}"),
}
});
}
@@ -845,11 +789,8 @@ impl ComputeNode {
// Log metrics so that we can search for slow operations in logs
info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished");
// Spawn the extension stats background task
self.spawn_extension_stats_task();
if pspec.spec.autoprewarm {
self.prewarm_lfc(None);
if pspec.spec.prewarm_lfc_on_startup {
self.prewarm_lfc();
}
Ok(())
}
@@ -930,25 +871,20 @@ impl ComputeNode {
// Maybe sync safekeepers again, to speed up next startup
let compute_state = self.state.lock().unwrap().clone();
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
let lsn = if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
info!("syncing safekeepers on shutdown");
let storage_auth_token = pspec.storage_auth_token.clone();
let lsn = self.sync_safekeepers(storage_auth_token)?;
info!(%lsn, "synced safekeepers");
Some(lsn)
} else {
info!("not primary, not syncing safekeepers");
None
};
info!("synced safekeepers at lsn {lsn}");
}
let mut delay_exit = false;
let mut state = self.state.lock().unwrap();
state.terminate_flush_lsn = lsn;
if let ComputeStatus::TerminationPending { mode } = state.status {
if state.status == ComputeStatus::TerminationPending {
state.status = ComputeStatus::Terminated;
self.state_changed.notify_all();
// we were asked to terminate gracefully, don't exit to avoid restart
delay_exit = mode == compute_api::responses::TerminateMode::Fast
delay_exit = true
}
drop(state);
@@ -1005,80 +941,13 @@ impl ComputeNode {
Ok(())
}
/// Fetches a basebackup from the Pageserver using the compute state's Pageserver connstring and
/// unarchives it to `pgdata` directory, replacing any existing contents.
// Get basebackup from the libpq connection to pageserver using `connstr` and
// unarchive it to `pgdata` directory overriding all its previous content.
#[instrument(skip_all, fields(%lsn))]
fn try_get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
let spec = compute_state.pspec.as_ref().expect("spec must be set");
let start_time = Instant::now();
let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
let started = Instant::now();
let (connected, size) = match PageserverProtocol::from_connstring(shard0_connstr)? {
PageserverProtocol::Libpq => self.try_get_basebackup_libpq(spec, lsn)?,
PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,
};
let mut state = self.state.lock().unwrap();
state.metrics.pageserver_connect_micros =
connected.duration_since(started).as_micros() as u64;
state.metrics.basebackup_bytes = size as u64;
state.metrics.basebackup_ms = started.elapsed().as_millis() as u64;
Ok(())
}
/// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when
/// the connection was established, and the (compressed) size of the basebackup.
fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
let shard0_connstr = spec
.pageserver_connstr
.split(',')
.next()
.unwrap()
.to_string();
let shard_index = match spec.pageserver_connstr.split(',').count() as u8 {
0 | 1 => ShardIndex::unsharded(),
count => ShardIndex::new(ShardNumber(0), ShardCount(count)),
};
let (reader, connected) = tokio::runtime::Handle::current().block_on(async move {
let mut client = page_api::Client::new(
shard0_connstr,
spec.tenant_id,
spec.timeline_id,
shard_index,
spec.storage_auth_token.clone(),
None, // NB: base backups use payload compression
)
.await?;
let connected = Instant::now();
let reader = client
.get_base_backup(page_api::GetBaseBackupRequest {
lsn: (lsn != Lsn(0)).then_some(lsn),
compression: BaseBackupCompression::Gzip,
replica: spec.spec.mode != ComputeMode::Primary,
full: false,
})
.await?;
anyhow::Ok((reader, connected))
})?;
let mut reader = MeasuredReader::new(tokio_util::io::SyncIoBridge::new(reader));
// Set `ignore_zeros` so that unpack() reads the entire stream and doesn't just stop at the
// end-of-archive marker. If the server errors, the tar::Builder drop handler will write an
// end-of-archive marker before the error is emitted, and we would not see the error.
let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut reader));
ar.set_ignore_zeros(true);
ar.unpack(&self.params.pgdata)?;
Ok((connected, reader.get_byte_count()))
}
/// Fetches a basebackup via libpq. The connstring must use postgresql://. Returns the timestamp
/// when the connection was established, and the (compressed) size of the basebackup.
fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
let mut config = postgres::Config::from_str(shard0_connstr)?;
@@ -1092,14 +961,16 @@ impl ComputeNode {
}
config.application_name("compute_ctl");
config.options(&format!(
"-c neon.compute_mode={}",
spec.spec.mode.to_type_str()
));
if let Some(spec) = &compute_state.pspec {
config.options(&format!(
"-c neon.compute_mode={}",
spec.spec.mode.to_type_str()
));
}
// Connect to pageserver
let mut client = config.connect(NoTls)?;
let connected = Instant::now();
let pageserver_connect_micros = start_time.elapsed().as_micros() as u64;
let basebackup_cmd = match lsn {
Lsn(0) => {
@@ -1136,13 +1007,16 @@ impl ComputeNode {
// Set `ignore_zeros` so that unpack() reads all the Copy data and
// doesn't stop at the end-of-archive marker. Otherwise, if the server
// sends an Error after finishing the tarball, we will not notice it.
// The tar::Builder drop handler will write an end-of-archive marker
// before emitting the error, and we would not see it otherwise.
let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader));
ar.set_ignore_zeros(true);
ar.unpack(&self.params.pgdata)?;
Ok((connected, measured_reader.get_byte_count()))
// Report metrics
let mut state = self.state.lock().unwrap();
state.metrics.pageserver_connect_micros = pageserver_connect_micros;
state.metrics.basebackup_bytes = measured_reader.get_byte_count() as u64;
state.metrics.basebackup_ms = start_time.elapsed().as_millis() as u64;
Ok(())
}
// Gets the basebackup in a retry loop
@@ -1195,7 +1069,7 @@ impl ComputeNode {
let sk_configs = sk_connstrs.into_iter().map(|connstr| {
// Format connstr
let id = connstr.clone();
let connstr = format!("postgresql://no_user@{connstr}");
let connstr = format!("postgresql://no_user@{}", connstr);
let options = format!(
"-c timeline_id={} tenant_id={}",
pspec.timeline_id, pspec.tenant_id
@@ -1341,15 +1215,13 @@ impl ComputeNode {
let spec = &pspec.spec;
let pgdata_path = Path::new(&self.params.pgdata);
let tls_config = self.tls_config(&pspec.spec);
// Remove/create an empty pgdata directory and put configuration there.
self.create_pgdata()?;
config::write_postgres_conf(
pgdata_path,
&pspec.spec,
self.params.internal_http_port,
tls_config,
&self.compute_ctl_config.tls,
)?;
// Syncing safekeepers is only safe with primary nodes: if a primary
@@ -1445,8 +1317,8 @@ impl ComputeNode {
}
/// Start and stop a postgres process to warm up the VM for startup.
pub fn prewarm_postgres_vm_memory(&self) -> Result<()> {
info!("prewarming VM memory");
pub fn prewarm_postgres(&self) -> Result<()> {
info!("prewarming");
// Create pgdata
let pgdata = &format!("{}.warmup", self.params.pgdata);
@@ -1488,7 +1360,7 @@ impl ComputeNode {
kill(pm_pid, Signal::SIGQUIT)?;
info!("sent SIGQUIT signal");
pg.wait()?;
info!("done prewarming vm memory");
info!("done prewarming");
// clean up
let _ok = fs::remove_dir_all(pgdata);
@@ -1558,7 +1430,7 @@ impl ComputeNode {
let (mut client, connection) = conf.connect(NoTls).await?;
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {e}");
eprintln!("connection error: {}", e);
}
});
@@ -1674,24 +1546,14 @@ impl ComputeNode {
.clone(),
);
let mut tls_config = None::<TlsConfig>;
if spec.features.contains(&ComputeFeature::TlsExperimental) {
tls_config = self.compute_ctl_config.tls.clone();
}
self.update_installed_extensions_collection_interval(&spec);
let max_concurrent_connections = self.max_service_connections(compute_state, &spec);
// Merge-apply spec & changes to PostgreSQL state.
self.apply_spec_sql(spec.clone(), conf.clone(), max_concurrent_connections)?;
if let Some(local_proxy) = &spec.clone().local_proxy_config {
let mut local_proxy = local_proxy.clone();
local_proxy.tls = tls_config.clone();
info!("configuring local_proxy");
local_proxy::configure(&local_proxy).context("apply_config local_proxy")?;
local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
}
// Run migrations separately to not hold up cold starts
@@ -1703,7 +1565,7 @@ impl ComputeNode {
Ok((mut client, connection)) => {
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {e}");
eprintln!("connection error: {}", e);
}
});
if let Err(e) = handle_migrations(&mut client).await {
@@ -1743,15 +1605,11 @@ impl ComputeNode {
pub fn reconfigure(&self) -> Result<()> {
let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;
let tls_config = self.tls_config(&spec);
self.update_installed_extensions_collection_interval(&spec);
if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
info!("tuning pgbouncer");
let pgbouncer_settings = pgbouncer_settings.clone();
let tls_config = tls_config.clone();
let tls_config = self.compute_ctl_config.tls.clone();
// Spawn a background task to do the tuning,
// so that we don't block the main thread that starts Postgres.
@@ -1769,7 +1627,7 @@ impl ComputeNode {
// Spawn a background task to do the configuration,
// so that we don't block the main thread that starts Postgres.
let mut local_proxy = local_proxy.clone();
local_proxy.tls = tls_config.clone();
local_proxy.tls = self.compute_ctl_config.tls.clone();
tokio::spawn(async move {
if let Err(err) = local_proxy::configure(&local_proxy) {
error!("error while configuring local_proxy: {err:?}");
@@ -1787,7 +1645,7 @@ impl ComputeNode {
pgdata_path,
&spec,
self.params.internal_http_port,
tls_config,
&self.compute_ctl_config.tls,
)?;
if !spec.skip_pg_catalog_updates {
@@ -1885,7 +1743,7 @@ impl ComputeNode {
// exit loop
ComputeStatus::Failed
| ComputeStatus::TerminationPending { .. }
| ComputeStatus::TerminationPending
| ComputeStatus::Terminated => break 'cert_update,
// wait
@@ -1907,14 +1765,6 @@ impl ComputeNode {
}
}
pub fn tls_config(&self, spec: &ComputeSpec) -> &Option<TlsConfig> {
if spec.features.contains(&ComputeFeature::TlsExperimental) {
&self.compute_ctl_config.tls
} else {
&None::<TlsConfig>
}
}
/// Update the `last_active` in the shared state, but ensure that it's a more recent one.
pub fn update_last_active(&self, last_active: Option<DateTime<Utc>>) {
let mut state = self.state.lock().unwrap();
@@ -2009,7 +1859,7 @@ impl ComputeNode {
let (client, connection) = connect_result.unwrap();
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {e}");
eprintln!("connection error: {}", e);
}
});
let result = client
@@ -2178,7 +2028,7 @@ LIMIT 100",
db_client
.simple_query(&query)
.await
.with_context(|| format!("Failed to execute query: {query}"))?;
.with_context(|| format!("Failed to execute query: {}", query))?;
}
Ok(())
@@ -2205,7 +2055,7 @@ LIMIT 100",
let version: Option<ExtVersion> = db_client
.query_opt(version_query, &[&ext_name])
.await
.with_context(|| format!("Failed to execute query: {version_query}"))?
.with_context(|| format!("Failed to execute query: {}", version_query))?
.map(|row| row.get(0));
// sanitize the inputs as postgres idents.
@@ -2220,14 +2070,14 @@ LIMIT 100",
db_client
.simple_query(&query)
.await
.with_context(|| format!("Failed to execute query: {query}"))?;
.with_context(|| format!("Failed to execute query: {}", query))?;
} else {
let query =
format!("CREATE EXTENSION IF NOT EXISTS {ext_name} WITH VERSION {quoted_version}");
db_client
.simple_query(&query)
.await
.with_context(|| format!("Failed to execute query: {query}"))?;
.with_context(|| format!("Failed to execute query: {}", query))?;
}
Ok(ext_version)
@@ -2349,155 +2199,14 @@ LIMIT 100",
info!("Pageserver config changed");
}
}
pub fn spawn_extension_stats_task(&self) {
// Cancel any existing task
if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
handle.abort();
}
let conf = self.tokio_conn_conf.clone();
let atomic_interval = self.params.installed_extensions_collection_interval.clone();
let mut installed_extensions_collection_interval =
2 * atomic_interval.load(std::sync::atomic::Ordering::SeqCst);
info!(
"[NEON_EXT_SPAWN] Spawning background installed extensions worker with Timeout: {}",
installed_extensions_collection_interval
);
let handle = tokio::spawn(async move {
// An initial sleep is added to ensure that two collections don't happen at the same time.
// The first collection happens during compute startup.
tokio::time::sleep(tokio::time::Duration::from_secs(
installed_extensions_collection_interval,
))
.await;
let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(
installed_extensions_collection_interval,
));
loop {
interval.tick().await;
let _ = installed_extensions(conf.clone()).await;
// Acquire a read lock on the compute spec and then update the interval if necessary
interval = tokio::time::interval(tokio::time::Duration::from_secs(std::cmp::max(
installed_extensions_collection_interval,
2 * atomic_interval.load(std::sync::atomic::Ordering::SeqCst),
)));
installed_extensions_collection_interval = interval.period().as_secs();
}
});
// Store the new task handle
*self.extension_stats_task.lock().unwrap() = Some(handle);
}
fn terminate_extension_stats_task(&self) {
if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
handle.abort();
}
}
fn update_installed_extensions_collection_interval(&self, spec: &ComputeSpec) {
// Update the interval for collecting installed extensions statistics
// If the value is -1, we never suspend so set the value to default collection.
// If the value is 0, it means default, we will just continue to use the default.
if spec.suspend_timeout_seconds == -1 || spec.suspend_timeout_seconds == 0 {
info!(
"[NEON_EXT_INT_UPD] Spec Timeout: {}, New Timeout: {}",
spec.suspend_timeout_seconds, DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL
);
self.params.installed_extensions_collection_interval.store(
DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL,
std::sync::atomic::Ordering::SeqCst,
);
} else {
info!(
"[NEON_EXT_INT_UPD] Spec Timeout: {}",
spec.suspend_timeout_seconds
);
self.params.installed_extensions_collection_interval.store(
spec.suspend_timeout_seconds as u64,
std::sync::atomic::Ordering::SeqCst,
);
}
}
}
pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
let res = get_installed_extensions(conf).await;
match res {
Ok(extensions) => {
info!(
"[NEON_EXT_STAT] {}",
serde_json::to_string(&extensions).expect("failed to serialize extensions list")
);
}
Err(err) => error!("could not get installed extensions: {err:?}"),
}
Ok(())
}
pub fn forward_termination_signal(dev_mode: bool) {
pub fn forward_termination_signal() {
let ss_pid = SYNC_SAFEKEEPERS_PID.load(Ordering::SeqCst);
if ss_pid != 0 {
let ss_pid = nix::unistd::Pid::from_raw(ss_pid as i32);
kill(ss_pid, Signal::SIGTERM).ok();
}
if !dev_mode {
// Terminate pgbouncer with SIGKILL
match pid_file::read(PGBOUNCER_PIDFILE.into()) {
Ok(pid_file::PidFileRead::LockedByOtherProcess(pid)) => {
info!("sending SIGKILL to pgbouncer process pid: {}", pid);
if let Err(e) = kill(pid, Signal::SIGKILL) {
error!("failed to terminate pgbouncer: {}", e);
}
}
// pgbouncer does not lock the pid file, so we read and kill the process directly
Ok(pid_file::PidFileRead::NotHeldByAnyProcess(_)) => {
if let Ok(pid_str) = std::fs::read_to_string(PGBOUNCER_PIDFILE) {
if let Ok(pid) = pid_str.trim().parse::<i32>() {
info!(
"sending SIGKILL to pgbouncer process pid: {} (from unlocked pid file)",
pid
);
if let Err(e) = kill(Pid::from_raw(pid), Signal::SIGKILL) {
error!("failed to terminate pgbouncer: {}", e);
}
}
} else {
info!("pgbouncer pid file exists but process not running");
}
}
Ok(pid_file::PidFileRead::NotExist) => {
info!("pgbouncer pid file not found, process may not be running");
}
Err(e) => {
error!("error reading pgbouncer pid file: {}", e);
}
}
// Terminate local_proxy
match pid_file::read("/etc/local_proxy/pid".into()) {
Ok(pid_file::PidFileRead::LockedByOtherProcess(pid)) => {
info!("sending SIGTERM to local_proxy process pid: {}", pid);
if let Err(e) = kill(pid, Signal::SIGTERM) {
error!("failed to terminate local_proxy: {}", e);
}
}
Ok(pid_file::PidFileRead::NotHeldByAnyProcess(_)) => {
info!("local_proxy PID file exists but process not running");
}
Ok(pid_file::PidFileRead::NotExist) => {
info!("local_proxy PID file not found, process may not be running");
}
Err(e) => {
error!("error reading local_proxy PID file: {}", e);
}
}
} else {
info!("Skipping pgbouncer and local_proxy termination because in dev mode");
}
let pg_pid = PG_PID.load(Ordering::SeqCst);
if pg_pid != 0 {
let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
@@ -2530,21 +2239,3 @@ impl<T: 'static> JoinSetExt<T> for tokio::task::JoinSet<T> {
})
}
}
#[cfg(test)]
mod tests {
use std::fs::File;
use super::*;
#[test]
fn duplicate_safekeeper_connstring() {
let file = File::open("tests/cluster_spec.json").unwrap();
let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
match ParsedSpec::try_from(spec.clone()) {
Ok(_p) => panic!("Failed to detect duplicate entry"),
Err(e) => assert!(e.starts_with("duplicate entry in safekeeper_connstrings:")),
};
}
}

View File

@@ -25,16 +25,11 @@ struct EndpointStoragePair {
}
const KEY: &str = "lfc_state";
impl EndpointStoragePair {
/// endpoint_id is set to None while prewarming from other endpoint, see replica promotion
/// If not None, takes precedence over pspec.spec.endpoint_id
fn from_spec_and_endpoint(
pspec: &crate::compute::ParsedSpec,
endpoint_id: Option<String>,
) -> Result<Self> {
let endpoint_id = endpoint_id.as_ref().or(pspec.spec.endpoint_id.as_ref());
let Some(ref endpoint_id) = endpoint_id else {
bail!("pspec.endpoint_id missing, other endpoint_id not provided")
impl TryFrom<&crate::compute::ParsedSpec> for EndpointStoragePair {
type Error = anyhow::Error;
fn try_from(pspec: &crate::compute::ParsedSpec) -> Result<Self, Self::Error> {
let Some(ref endpoint_id) = pspec.spec.endpoint_id else {
bail!("pspec.endpoint_id missing")
};
let Some(ref base_uri) = pspec.endpoint_storage_addr else {
bail!("pspec.endpoint_storage_addr missing")
@@ -89,7 +84,7 @@ impl ComputeNode {
}
/// Returns false if there is a prewarm request ongoing, true otherwise
pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
pub fn prewarm_lfc(self: &Arc<Self>) -> bool {
crate::metrics::LFC_PREWARM_REQUESTS.inc();
{
let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
@@ -102,7 +97,7 @@ impl ComputeNode {
let cloned = self.clone();
spawn(async move {
let Err(err) = cloned.prewarm_impl(from_endpoint).await else {
let Err(err) = cloned.prewarm_impl().await else {
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Completed;
return;
};
@@ -114,14 +109,13 @@ impl ComputeNode {
true
}
/// from_endpoint: None for endpoint managed by this compute_ctl
fn endpoint_storage_pair(&self, from_endpoint: Option<String>) -> Result<EndpointStoragePair> {
fn endpoint_storage_pair(&self) -> Result<EndpointStoragePair> {
let state = self.state.lock().unwrap();
EndpointStoragePair::from_spec_and_endpoint(state.pspec.as_ref().unwrap(), from_endpoint)
state.pspec.as_ref().unwrap().try_into()
}
async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<()> {
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(from_endpoint)?;
async fn prewarm_impl(&self) -> Result<()> {
let EndpointStoragePair { url, token } = self.endpoint_storage_pair()?;
info!(%url, "requesting LFC state from endpoint storage");
let request = Client::new().get(&url).bearer_auth(token);
@@ -179,7 +173,7 @@ impl ComputeNode {
}
async fn offload_lfc_impl(&self) -> Result<()> {
let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;
let EndpointStoragePair { url, token } = self.endpoint_storage_pair()?;
info!(%url, "requesting LFC state from postgres");
let mut compressed = Vec::new();

View File

@@ -51,7 +51,7 @@ pub fn write_postgres_conf(
// Write the postgresql.conf content from the spec file as is.
if let Some(conf) = &spec.cluster.postgresql_conf {
writeln!(file, "{conf}")?;
writeln!(file, "{}", conf)?;
}
// Add options for connecting to storage
@@ -70,7 +70,7 @@ pub fn write_postgres_conf(
);
// If generation is given, prepend sk list with g#number:
if let Some(generation) = spec.safekeepers_generation {
write!(neon_safekeepers_value, "g#{generation}:")?;
write!(neon_safekeepers_value, "g#{}:", generation)?;
}
neon_safekeepers_value.push_str(&spec.safekeeper_connstrings.join(","));
writeln!(
@@ -109,8 +109,8 @@ pub fn write_postgres_conf(
tls::update_key_path_blocking(pgdata_path, tls_config);
// these are the default, but good to be explicit.
writeln!(file, "ssl_cert_file = '{SERVER_CRT}'")?;
writeln!(file, "ssl_key_file = '{SERVER_KEY}'")?;
writeln!(file, "ssl_cert_file = '{}'", SERVER_CRT)?;
writeln!(file, "ssl_key_file = '{}'", SERVER_KEY)?;
}
// Locales
@@ -191,7 +191,8 @@ pub fn write_postgres_conf(
}
writeln!(
file,
"shared_preload_libraries='{libs}{extra_shared_preload_libraries}'"
"shared_preload_libraries='{}{}'",
libs, extra_shared_preload_libraries
)?;
} else {
// Typically, this should be unreacheable,
@@ -243,7 +244,8 @@ pub fn write_postgres_conf(
}
writeln!(
file,
"shared_preload_libraries='{libs}{extra_shared_preload_libraries}'"
"shared_preload_libraries='{}{}'",
libs, extra_shared_preload_libraries
)?;
} else {
// Typically, this should be unreacheable,
@@ -261,7 +263,7 @@ pub fn write_postgres_conf(
}
}
writeln!(file, "neon.extension_server_port={extension_server_port}")?;
writeln!(file, "neon.extension_server_port={}", extension_server_port)?;
if spec.drop_subscriptions_before_start {
writeln!(file, "neon.disable_logical_replication_subscribers=true")?;
@@ -289,7 +291,7 @@ where
{
let path = pgdata_path.join("compute_ctl_temp_override.conf");
let mut file = File::create(path)?;
write!(file, "{options}")?;
write!(file, "{}", options)?;
let res = exec();

View File

@@ -2,24 +2,10 @@
module(load="imfile")
# Input configuration for log files in the specified directory
# The messages can be multiline. The start of the message is a timestamp
# in "%Y-%m-%d %H:%M:%S.%3N GMT" (so timezone hardcoded).
# Replace log_directory with the directory containing the log files
input(type="imfile" File="{log_directory}/*.log"
Tag="pgaudit_log" Severity="info" Facility="local5"
startmsg.regex="^[[:digit:]]{{4}}-[[:digit:]]{{2}}-[[:digit:]]{{2}} [[:digit:]]{{2}}:[[:digit:]]{{2}}:[[:digit:]]{{2}}.[[:digit:]]{{3}} GMT,")
# Replace {log_directory} with the directory containing the log files
input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0")
# the directory to store rsyslog state files
global(workDirectory="/var/log/rsyslog")
# Construct json, endpoint_id and project_id as additional metadata
set $.json_log!endpoint_id = "{endpoint_id}";
set $.json_log!project_id = "{project_id}";
set $.json_log!msg = $msg;
# Template suitable for rfc5424 syslog format
template(name="PgAuditLog" type="string"
string="<%PRI%>1 %TIMESTAMP:::date-rfc3339% %HOSTNAME% - - - - %$.json_log%")
# Forward to remote syslog receiver (@@<hostname>:<port>;format
local5.info @@{remote_endpoint};PgAuditLog
# Forward logs to remote syslog server
*.* @@{remote_endpoint}

View File

@@ -74,20 +74,19 @@ More specifically, here is an example ext_index.json
use std::path::Path;
use std::str;
use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
use anyhow::{Context, Result, bail};
use bytes::Bytes;
use compute_api::spec::RemoteExtSpec;
use postgres_versioninfo::PgMajorVersion;
use regex::Regex;
use remote_storage::*;
use reqwest::StatusCode;
use tar::Archive;
use tracing::info;
use tracing::log::warn;
use url::Url;
use zstd::stream::read::Decoder;
use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
fn get_pg_config(argument: &str, pgbin: &str) -> String {
// gives the result of `pg_config [argument]`
// where argument is a flag like `--version` or `--sharedir`
@@ -106,7 +105,7 @@ fn get_pg_config(argument: &str, pgbin: &str) -> String {
.to_string()
}
pub fn get_pg_version(pgbin: &str) -> PgMajorVersion {
pub fn get_pg_version(pgbin: &str) -> PostgresMajorVersion {
// pg_config --version returns a (platform specific) human readable string
// such as "PostgreSQL 15.4". We parse this to v14/v15/v16 etc.
let human_version = get_pg_config("--version", pgbin);
@@ -114,11 +113,25 @@ pub fn get_pg_version(pgbin: &str) -> PgMajorVersion {
}
pub fn get_pg_version_string(pgbin: &str) -> String {
get_pg_version(pgbin).v_str()
match get_pg_version(pgbin) {
PostgresMajorVersion::V14 => "v14",
PostgresMajorVersion::V15 => "v15",
PostgresMajorVersion::V16 => "v16",
PostgresMajorVersion::V17 => "v17",
}
.to_owned()
}
fn parse_pg_version(human_version: &str) -> PgMajorVersion {
use PgMajorVersion::*;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum PostgresMajorVersion {
V14,
V15,
V16,
V17,
}
fn parse_pg_version(human_version: &str) -> PostgresMajorVersion {
use PostgresMajorVersion::*;
// Normal releases have version strings like "PostgreSQL 15.4". But there
// are also pre-release versions like "PostgreSQL 17devel" or "PostgreSQL
// 16beta2" or "PostgreSQL 17rc1". And with the --with-extra-version
@@ -129,10 +142,10 @@ fn parse_pg_version(human_version: &str) -> PgMajorVersion {
.captures(human_version)
{
Some(captures) if captures.len() == 2 => match &captures["major"] {
"14" => return PG14,
"15" => return PG15,
"16" => return PG16,
"17" => return PG17,
"14" => return V14,
"15" => return V15,
"16" => return V16,
"17" => return V17,
_ => {}
},
_ => {}
@@ -145,7 +158,7 @@ fn parse_pg_version(human_version: &str) -> PgMajorVersion {
pub async fn download_extension(
ext_name: &str,
ext_path: &RemotePath,
remote_ext_base_url: &Url,
remote_ext_base_url: &str,
pgbin: &str,
) -> Result<u64> {
info!("Download extension {:?} from {:?}", ext_name, ext_path);
@@ -257,14 +270,10 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
}
// Do request to extension storage proxy, e.g.,
// curl http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local/latest/v15/extensions/anon.tar.zst
// curl http://pg-ext-s3-gateway/latest/v15/extensions/anon.tar.zst
// using HTTP GET and return the response body as bytes.
async fn download_extension_tar(remote_ext_base_url: &Url, ext_path: &str) -> Result<Bytes> {
let uri = remote_ext_base_url.join(ext_path).with_context(|| {
format!(
"failed to create the remote extension URI for {ext_path} using {remote_ext_base_url}"
)
})?;
async fn download_extension_tar(remote_ext_base_url: &str, ext_path: &str) -> Result<Bytes> {
let uri = format!("{}/{}", remote_ext_base_url, ext_path);
let filename = Path::new(ext_path)
.file_name()
.unwrap_or_else(|| std::ffi::OsStr::new("unknown"))
@@ -274,7 +283,7 @@ async fn download_extension_tar(remote_ext_base_url: &Url, ext_path: &str) -> Re
info!("Downloading extension file '{}' from uri {}", filename, uri);
match do_extension_server_request(uri).await {
match do_extension_server_request(&uri).await {
Ok(resp) => {
info!("Successfully downloaded remote extension data {}", ext_path);
REMOTE_EXT_REQUESTS_TOTAL
@@ -293,10 +302,13 @@ async fn download_extension_tar(remote_ext_base_url: &Url, ext_path: &str) -> Re
// Do a single remote extensions server request.
// Return result or (error message + stringified status code) in case of any failures.
async fn do_extension_server_request(uri: Url) -> Result<Bytes, (String, String)> {
async fn do_extension_server_request(uri: &str) -> Result<Bytes, (String, String)> {
let resp = reqwest::get(uri).await.map_err(|e| {
(
format!("could not perform remote extensions server request: {e:?}"),
format!(
"could not perform remote extensions server request: {:?}",
e
),
UNKNOWN_HTTP_STATUS.to_string(),
)
})?;
@@ -306,7 +318,7 @@ async fn do_extension_server_request(uri: Url) -> Result<Bytes, (String, String)
StatusCode::OK => match resp.bytes().await {
Ok(resp) => Ok(resp),
Err(e) => Err((
format!("could not read remote extensions server response: {e:?}"),
format!("could not read remote extensions server response: {:?}", e),
// It's fine to return and report error with status as 200 OK,
// because we still failed to read the response.
status.to_string(),
@@ -317,7 +329,10 @@ async fn do_extension_server_request(uri: Url) -> Result<Bytes, (String, String)
status.to_string(),
)),
_ => Err((
format!("unexpected remote extensions server response status code: {status}"),
format!(
"unexpected remote extensions server response status code: {}",
status
),
status.to_string(),
)),
}
@@ -329,25 +344,25 @@ mod tests {
#[test]
fn test_parse_pg_version() {
use postgres_versioninfo::PgMajorVersion::*;
assert_eq!(parse_pg_version("PostgreSQL 15.4"), PG15);
assert_eq!(parse_pg_version("PostgreSQL 15.14"), PG15);
use super::PostgresMajorVersion::*;
assert_eq!(parse_pg_version("PostgreSQL 15.4"), V15);
assert_eq!(parse_pg_version("PostgreSQL 15.14"), V15);
assert_eq!(
parse_pg_version("PostgreSQL 15.4 (Ubuntu 15.4-0ubuntu0.23.04.1)"),
PG15
V15
);
assert_eq!(parse_pg_version("PostgreSQL 14.15"), PG14);
assert_eq!(parse_pg_version("PostgreSQL 14.0"), PG14);
assert_eq!(parse_pg_version("PostgreSQL 14.15"), V14);
assert_eq!(parse_pg_version("PostgreSQL 14.0"), V14);
assert_eq!(
parse_pg_version("PostgreSQL 14.9 (Debian 14.9-1.pgdg120+1"),
PG14
V14
);
assert_eq!(parse_pg_version("PostgreSQL 16devel"), PG16);
assert_eq!(parse_pg_version("PostgreSQL 16beta1"), PG16);
assert_eq!(parse_pg_version("PostgreSQL 16rc2"), PG16);
assert_eq!(parse_pg_version("PostgreSQL 16extra"), PG16);
assert_eq!(parse_pg_version("PostgreSQL 16devel"), V16);
assert_eq!(parse_pg_version("PostgreSQL 16beta1"), V16);
assert_eq!(parse_pg_version("PostgreSQL 16rc2"), V16);
assert_eq!(parse_pg_version("PostgreSQL 16extra"), V16);
}
#[test]

View File

@@ -48,9 +48,11 @@ impl JsonResponse {
/// Create an error response related to the compute being in an invalid state
pub(self) fn invalid_status(status: ComputeStatus) -> Response {
Self::error(
Self::create_response(
StatusCode::PRECONDITION_FAILED,
format!("invalid compute status: {status}"),
&GenericAPIError {
error: format!("invalid compute status: {status}"),
},
)
}
}

View File

@@ -22,7 +22,7 @@ pub(in crate::http) async fn configure(
State(compute): State<Arc<ComputeNode>>,
request: Json<ConfigurationRequest>,
) -> Response {
let pspec = match ParsedSpec::try_from(request.0.spec) {
let pspec = match ParsedSpec::try_from(request.spec.clone()) {
Ok(p) => p,
Err(e) => return JsonResponse::error(StatusCode::BAD_REQUEST, e),
};
@@ -65,7 +65,7 @@ pub(in crate::http) async fn configure(
if state.status == ComputeStatus::Failed {
let err = state.error.as_ref().map_or("unknown error", |x| x);
let msg = format!("compute configuration failed: {err:?}");
let msg = format!("compute configuration failed: {:?}", err);
return Err(msg);
}
}

View File

@@ -2,7 +2,6 @@ use crate::compute_prewarm::LfcPrewarmStateWithProgress;
use crate::http::JsonResponse;
use axum::response::{IntoResponse, Response};
use axum::{Json, http::StatusCode};
use axum_extra::extract::OptionalQuery;
use compute_api::responses::LfcOffloadState;
type Compute = axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>;
@@ -17,16 +16,8 @@ pub(in crate::http) async fn offload_state(compute: Compute) -> Json<LfcOffloadS
Json(compute.lfc_offload_state())
}
#[derive(serde::Deserialize)]
pub struct PrewarmQuery {
pub from_endpoint: String,
}
pub(in crate::http) async fn prewarm(
compute: Compute,
OptionalQuery(query): OptionalQuery<PrewarmQuery>,
) -> Response {
if compute.prewarm_lfc(query.map(|q| q.from_endpoint)) {
pub(in crate::http) async fn prewarm(compute: Compute) -> Response {
if compute.prewarm_lfc() {
StatusCode::ACCEPTED.into_response()
} else {
JsonResponse::error(

View File

@@ -1,42 +1,32 @@
use crate::compute::{ComputeNode, forward_termination_signal};
use crate::http::JsonResponse;
use axum::extract::State;
use axum::response::Response;
use axum_extra::extract::OptionalQuery;
use compute_api::responses::{ComputeStatus, TerminateResponse};
use http::StatusCode;
use serde::Deserialize;
use std::sync::Arc;
use axum::extract::State;
use axum::response::{IntoResponse, Response};
use compute_api::responses::ComputeStatus;
use http::StatusCode;
use tokio::task;
use tracing::info;
#[derive(Deserialize, Default)]
pub struct TerminateQuery {
mode: compute_api::responses::TerminateMode,
}
use crate::compute::{ComputeNode, forward_termination_signal};
use crate::http::JsonResponse;
/// Terminate the compute.
pub(in crate::http) async fn terminate(
State(compute): State<Arc<ComputeNode>>,
OptionalQuery(terminate): OptionalQuery<TerminateQuery>,
) -> Response {
let mode = terminate.unwrap_or_default().mode;
pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
{
let mut state = compute.state.lock().unwrap();
if state.status == ComputeStatus::Terminated {
return JsonResponse::success(StatusCode::CREATED, state.terminate_flush_lsn);
return StatusCode::CREATED.into_response();
}
if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
return JsonResponse::invalid_status(state.status);
}
state.set_status(
ComputeStatus::TerminationPending { mode },
&compute.state_changed,
);
state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
drop(state);
}
forward_termination_signal(false);
forward_termination_signal();
info!("sent signal and notified waiters");
// Spawn a blocking thread to wait for compute to become Terminated.
@@ -44,7 +34,7 @@ pub(in crate::http) async fn terminate(
// be able to serve other requests while some particular request
// is waiting for compute to finish configuration.
let c = compute.clone();
let lsn = task::spawn_blocking(move || {
task::spawn_blocking(move || {
let mut state = c.state.lock().unwrap();
while state.status != ComputeStatus::Terminated {
state = c.state_changed.wait(state).unwrap();
@@ -54,10 +44,11 @@ pub(in crate::http) async fn terminate(
state.status
);
}
state.terminate_flush_lsn
})
.await
.unwrap();
info!("terminated Postgres");
JsonResponse::success(StatusCode::OK, TerminateResponse { lsn })
StatusCode::OK.into_response()
}

View File

@@ -43,7 +43,7 @@ pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExten
let (mut client, connection) = conf.connect(NoTls).await?;
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {e}");
eprintln!("connection error: {}", e);
}
});
@@ -57,7 +57,7 @@ pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExten
let (client, connection) = conf.connect(NoTls).await?;
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {e}");
eprintln!("connection error: {}", e);
}
});

View File

@@ -22,7 +22,6 @@ mod migration;
pub mod monitor;
pub mod params;
pub mod pg_helpers;
pub mod pgbouncer;
pub mod rsyslog;
pub mod spec;
mod spec_apply;

View File

@@ -4,9 +4,7 @@ use std::thread;
use std::time::{Duration, SystemTime};
use anyhow::{Result, bail};
use compute_api::spec::{ComputeMode, PageserverProtocol};
use itertools::Itertools as _;
use pageserver_page_api as page_api;
use compute_api::spec::ComputeMode;
use postgres::{NoTls, SimpleQueryMessage};
use tracing::{info, warn};
use utils::id::{TenantId, TimelineId};
@@ -78,17 +76,25 @@ fn acquire_lsn_lease_with_retry(
loop {
// Note: List of pageservers is dynamic, need to re-read configs before each attempt.
let (connstrings, auth) = {
let configs = {
let state = compute.state.lock().unwrap();
let spec = state.pspec.as_ref().expect("spec must be set");
(
spec.pageserver_connstr.clone(),
spec.storage_auth_token.clone(),
)
let conn_strings = spec.pageserver_connstr.split(',');
conn_strings
.map(|connstr| {
let mut config = postgres::Config::from_str(connstr).expect("Invalid connstr");
if let Some(storage_auth_token) = &spec.storage_auth_token {
config.password(storage_auth_token.clone());
}
config
})
.collect::<Vec<_>>()
};
let result =
try_acquire_lsn_lease(&connstrings, auth.as_deref(), tenant_id, timeline_id, lsn);
let result = try_acquire_lsn_lease(tenant_id, timeline_id, lsn, &configs);
match result {
Ok(Some(res)) => {
return Ok(res);
@@ -110,104 +116,68 @@ fn acquire_lsn_lease_with_retry(
}
}
/// Tries to acquire LSN leases on all Pageserver shards.
/// Tries to acquire an LSN lease through PS page_service API.
fn try_acquire_lsn_lease(
connstrings: &str,
auth: Option<&str>,
tenant_id: TenantId,
timeline_id: TimelineId,
lsn: Lsn,
configs: &[postgres::Config],
) -> Result<Option<SystemTime>> {
let connstrings = connstrings.split(',').collect_vec();
let shard_count = connstrings.len();
let mut leases = Vec::new();
for (shard_number, &connstring) in connstrings.iter().enumerate() {
let tenant_shard_id = match shard_count {
0 | 1 => TenantShardId::unsharded(tenant_id),
shard_count => TenantShardId {
tenant_id,
shard_number: ShardNumber(shard_number as u8),
shard_count: ShardCount::new(shard_count as u8),
},
fn get_valid_until(
config: &postgres::Config,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
lsn: Lsn,
) -> Result<Option<SystemTime>> {
let mut client = config.connect(NoTls)?;
let cmd = format!("lease lsn {} {} {} ", tenant_shard_id, timeline_id, lsn);
let res = client.simple_query(&cmd)?;
let msg = match res.first() {
Some(msg) => msg,
None => bail!("empty response"),
};
let row = match msg {
SimpleQueryMessage::Row(row) => row,
_ => bail!("error parsing lsn lease response"),
};
let lease = match PageserverProtocol::from_connstring(connstring)? {
PageserverProtocol::Libpq => {
acquire_lsn_lease_libpq(connstring, auth, tenant_shard_id, timeline_id, lsn)?
}
PageserverProtocol::Grpc => {
acquire_lsn_lease_grpc(connstring, auth, tenant_shard_id, timeline_id, lsn)?
}
};
leases.push(lease);
// Note: this will be None if a lease is explicitly not granted.
let valid_until_str = row.get("valid_until");
let valid_until = valid_until_str.map(|s| {
SystemTime::UNIX_EPOCH
.checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))
.expect("Time larger than max SystemTime could handle")
});
Ok(valid_until)
}
Ok(leases.into_iter().min().flatten())
}
let shard_count = configs.len();
/// Acquires an LSN lease on a single shard, using the libpq API. The connstring must use a
/// postgresql:// scheme.
fn acquire_lsn_lease_libpq(
connstring: &str,
auth: Option<&str>,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
lsn: Lsn,
) -> Result<Option<SystemTime>> {
let mut config = postgres::Config::from_str(connstring)?;
if let Some(auth) = auth {
config.password(auth);
}
let mut client = config.connect(NoTls)?;
let cmd = format!("lease lsn {tenant_shard_id} {timeline_id} {lsn} ");
let res = client.simple_query(&cmd)?;
let msg = match res.first() {
Some(msg) => msg,
None => bail!("empty response"),
};
let row = match msg {
SimpleQueryMessage::Row(row) => row,
_ => bail!("error parsing lsn lease response"),
let valid_until = if shard_count > 1 {
configs
.iter()
.enumerate()
.map(|(shard_number, config)| {
let tenant_shard_id = TenantShardId {
tenant_id,
shard_count: ShardCount::new(shard_count as u8),
shard_number: ShardNumber(shard_number as u8),
};
get_valid_until(config, tenant_shard_id, timeline_id, lsn)
})
.collect::<Result<Vec<Option<SystemTime>>>>()?
.into_iter()
.min()
.unwrap()
} else {
get_valid_until(
&configs[0],
TenantShardId::unsharded(tenant_id),
timeline_id,
lsn,
)?
};
// Note: this will be None if a lease is explicitly not granted.
let valid_until_str = row.get("valid_until");
let valid_until = valid_until_str.map(|s| {
SystemTime::UNIX_EPOCH
.checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))
.expect("Time larger than max SystemTime could handle")
});
Ok(valid_until)
}
/// Acquires an LSN lease on a single shard, using the gRPC API. The connstring must use a
/// grpc:// scheme.
fn acquire_lsn_lease_grpc(
connstring: &str,
auth: Option<&str>,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
lsn: Lsn,
) -> Result<Option<SystemTime>> {
tokio::runtime::Handle::current().block_on(async move {
let mut client = page_api::Client::new(
connstring.to_string(),
tenant_shard_id.tenant_id,
timeline_id,
tenant_shard_id.to_index(),
auth.map(String::from),
None,
)
.await?;
let req = page_api::LeaseLsnRequest { lsn };
match client.lease_lsn(req).await {
Ok(expires) => Ok(Some(expires)),
// Lease couldn't be acquired because the LSN has been garbage collected.
Err(err) if err.code() == tonic::Code::FailedPrecondition => Ok(None),
Err(err) => Err(err.into()),
}
})
}

View File

@@ -13,12 +13,6 @@ use crate::metrics::{PG_CURR_DOWNTIME_MS, PG_TOTAL_DOWNTIME_MS};
const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
/// Struct to store runtime state of the compute monitor thread.
/// In theory, this could be a part of `Compute`, but i)
/// this state is expected to be accessed only by single thread,
/// so we don't need to care about locking; ii) `Compute` is
/// already quite big. Thus, it seems to be a good idea to keep
/// all the activity/health monitoring parts here.
struct ComputeMonitor {
compute: Arc<ComputeNode>,
@@ -76,38 +70,12 @@ impl ComputeMonitor {
)
}
/// Check if compute is in some terminal or soon-to-be-terminal
/// state, then return `true`, signalling the caller that it
/// should exit gracefully. Otherwise, return `false`.
fn check_interrupts(&mut self) -> bool {
let compute_status = self.compute.get_status();
if matches!(
compute_status,
ComputeStatus::Terminated
| ComputeStatus::TerminationPending { .. }
| ComputeStatus::Failed
) {
info!(
"compute is in {} status, stopping compute monitor",
compute_status
);
return true;
}
false
}
/// Spin in a loop and figure out the last activity time in the Postgres.
/// Then update it in the shared state. This function currently never
/// errors out explicitly, but there is a graceful termination path.
/// Every time we receive an error trying to check Postgres, we use
/// [`ComputeMonitor::check_interrupts()`] because it could be that
/// compute is being terminated already, then we can exit gracefully
/// to not produce errors' noise in the log.
/// Then update it in the shared state. This function never errors out.
/// NB: the only expected panic is at `Mutex` unwrap(), all other errors
/// should be handled gracefully.
#[instrument(skip_all)]
pub fn run(&mut self) -> anyhow::Result<()> {
pub fn run(&mut self) {
// Suppose that `connstr` doesn't change
let connstr = self.compute.params.connstr.clone();
let conf = self
@@ -125,10 +93,6 @@ impl ComputeMonitor {
info!("starting compute monitor for {}", connstr);
loop {
if self.check_interrupts() {
break;
}
match &mut client {
Ok(cli) => {
if cli.is_closed() {
@@ -136,10 +100,6 @@ impl ComputeMonitor {
downtime_info = self.downtime_info(),
"connection to Postgres is closed, trying to reconnect"
);
if self.check_interrupts() {
break;
}
self.report_down();
// Connection is closed, reconnect and try again.
@@ -151,19 +111,15 @@ impl ComputeMonitor {
self.compute.update_last_active(self.last_active);
}
Err(e) => {
error!(
downtime_info = self.downtime_info(),
"could not check Postgres: {}", e
);
if self.check_interrupts() {
break;
}
// Although we have many places where we can return errors in `check()`,
// normally it shouldn't happen. I.e., we will likely return error if
// connection got broken, query timed out, Postgres returned invalid data, etc.
// In all such cases it's suspicious, so let's report this as downtime.
self.report_down();
error!(
downtime_info = self.downtime_info(),
"could not check Postgres: {}", e
);
// Reconnect to Postgres just in case. During tests, I noticed
// that queries in `check()` can fail with `connection closed`,
@@ -180,10 +136,6 @@ impl ComputeMonitor {
downtime_info = self.downtime_info(),
"could not connect to Postgres: {}, retrying", e
);
if self.check_interrupts() {
break;
}
self.report_down();
// Establish a new connection and try again.
@@ -195,9 +147,6 @@ impl ComputeMonitor {
self.last_checked = Utc::now();
thread::sleep(MONITOR_CHECK_INTERVAL);
}
// Graceful termination path
Ok(())
}
#[instrument(skip_all)]
@@ -480,10 +429,7 @@ pub fn launch_monitor(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
.spawn(move || {
let span = span!(Level::INFO, "compute_monitor");
let _enter = span.enter();
match monitor.run() {
Ok(_) => info!("compute monitor thread terminated gracefully"),
Err(err) => error!("compute monitor thread terminated abnormally {:?}", err),
}
monitor.run();
})
.expect("cannot launch compute monitor thread")
}

View File

@@ -36,9 +36,9 @@ pub fn escape_literal(s: &str) -> String {
let res = s.replace('\'', "''").replace('\\', "\\\\");
if res.contains('\\') {
format!("E'{res}'")
format!("E'{}'", res)
} else {
format!("'{res}'")
format!("'{}'", res)
}
}
@@ -46,7 +46,7 @@ pub fn escape_literal(s: &str) -> String {
/// with `'{}'` is not required, as it returns a ready-to-use config string.
pub fn escape_conf_value(s: &str) -> String {
let res = s.replace('\'', "''").replace('\\', "\\\\");
format!("'{res}'")
format!("'{}'", res)
}
pub trait GenericOptionExt {
@@ -213,10 +213,8 @@ impl Escaping for PgIdent {
// Find the first suitable tag that is not present in the string.
// Postgres' max role/DB name length is 63 bytes, so even in the
// worst case it won't take long. Outer tag is always `tag + "x"`,
// so if `tag` is not present in the string, `outer_tag` is not
// present in the string either.
while self.contains(&tag.to_string()) {
// worst case it won't take long.
while self.contains(&format!("${tag}$")) || self.contains(&format!("${outer_tag}$")) {
tag += "x";
outer_tag = tag.clone() + "x";
}
@@ -446,7 +444,7 @@ pub async fn tune_pgbouncer(
let mut pgbouncer_connstr =
"host=localhost port=6432 dbname=pgbouncer user=postgres sslmode=disable".to_string();
if let Ok(pass) = std::env::var("PGBOUNCER_PASSWORD") {
pgbouncer_connstr.push_str(format!(" password={pass}").as_str());
pgbouncer_connstr.push_str(format!(" password={}", pass).as_str());
}
pgbouncer_connstr
};
@@ -464,7 +462,7 @@ pub async fn tune_pgbouncer(
Ok((client, connection)) => {
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {e}");
eprintln!("connection error: {}", e);
}
});
break client;

View File

@@ -1 +0,0 @@
pub const PGBOUNCER_PIDFILE: &str = "/tmp/pgbouncer.pid";

View File

@@ -27,40 +27,6 @@ fn get_rsyslog_pid() -> Option<String> {
}
}
fn wait_for_rsyslog_pid() -> Result<String, anyhow::Error> {
const MAX_WAIT: Duration = Duration::from_secs(5);
const INITIAL_SLEEP: Duration = Duration::from_millis(2);
let mut sleep_duration = INITIAL_SLEEP;
let start = std::time::Instant::now();
let mut attempts = 1;
for attempt in 1.. {
attempts = attempt;
match get_rsyslog_pid() {
Some(pid) => return Ok(pid),
None => {
if start.elapsed() >= MAX_WAIT {
break;
}
info!(
"rsyslogd is not running, attempt {}. Sleeping for {} ms",
attempt,
sleep_duration.as_millis()
);
std::thread::sleep(sleep_duration);
sleep_duration *= 2;
}
}
}
Err(anyhow::anyhow!(
"rsyslogd is not running after waiting for {} seconds and {} attempts",
attempts,
start.elapsed().as_secs()
))
}
// Restart rsyslogd to apply the new configuration.
// This is necessary, because there is no other way to reload the rsyslog configuration.
//
@@ -70,29 +36,27 @@ fn wait_for_rsyslog_pid() -> Result<String, anyhow::Error> {
// TODO: test it properly
//
fn restart_rsyslog() -> Result<()> {
let old_pid = get_rsyslog_pid().context("rsyslogd is not running")?;
info!("rsyslogd is running with pid: {}, restart it", old_pid);
// kill it to restart
let _ = Command::new("pkill")
.arg("rsyslogd")
.output()
.context("Failed to restart rsyslogd")?;
// ensure rsyslogd is running
wait_for_rsyslog_pid()?;
.context("Failed to stop rsyslogd")?;
Ok(())
}
pub fn configure_audit_rsyslog(
log_directory: String,
endpoint_id: &str,
project_id: &str,
tag: Option<String>,
remote_endpoint: &str,
) -> Result<()> {
let config_content: String = format!(
include_str!("config_template/compute_audit_rsyslog_template.conf"),
log_directory = log_directory,
endpoint_id = endpoint_id,
project_id = project_id,
tag = tag.unwrap_or("".to_string()),
remote_endpoint = remote_endpoint
);
@@ -167,11 +131,15 @@ pub fn configure_postgres_logs_export(conf: PostgresLogsRsyslogConfig) -> Result
return Ok(());
}
// Nothing to configure
// When new config is empty we can simply remove the configuration file.
if new_config.is_empty() {
// When the configuration is removed, PostgreSQL will stop sending data
// to the files watched by rsyslog, so restarting rsyslog is more effort
// than just ignoring this change.
info!("removing rsyslog config file: {}", POSTGRES_LOGS_CONF_PATH);
match std::fs::remove_file(POSTGRES_LOGS_CONF_PATH) {
Ok(_) => {}
Err(err) if err.kind() == ErrorKind::NotFound => {}
Err(err) => return Err(err.into()),
}
restart_rsyslog()?;
return Ok(());
}

View File

@@ -23,12 +23,12 @@ fn do_control_plane_request(
) -> Result<ControlPlaneConfigResponse, (bool, String, String)> {
let resp = reqwest::blocking::Client::new()
.get(uri)
.header("Authorization", format!("Bearer {jwt}"))
.header("Authorization", format!("Bearer {}", jwt))
.send()
.map_err(|e| {
(
true,
format!("could not perform request to control plane: {e:?}"),
format!("could not perform request to control plane: {:?}", e),
UNKNOWN_HTTP_STATUS.to_string(),
)
})?;
@@ -39,7 +39,7 @@ fn do_control_plane_request(
Ok(spec_resp) => Ok(spec_resp),
Err(e) => Err((
true,
format!("could not deserialize control plane response: {e:?}"),
format!("could not deserialize control plane response: {:?}", e),
status.to_string(),
)),
},
@@ -62,7 +62,7 @@ fn do_control_plane_request(
// or some internal failure happened. Doesn't make much sense to retry in this case.
_ => Err((
false,
format!("unexpected control plane response status code: {status}"),
format!("unexpected control plane response status code: {}", status),
status.to_string(),
)),
}

View File

@@ -933,53 +933,56 @@ async fn get_operations<'a>(
PerDatabasePhase::DeleteDBRoleReferences => {
let ctx = ctx.read().await;
let operations = spec
.delta_operations
.iter()
.flatten()
.filter(|op| op.action == "delete_role")
.filter_map(move |op| {
if db.is_owned_by(&op.name) {
return None;
}
if !ctx.roles.contains_key(&op.name) {
return None;
}
let quoted = op.name.pg_quote();
let new_owner = match &db {
DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(),
DB::UserDB(db) => db.owner.pg_quote(),
};
let (escaped_role, outer_tag) = op.name.pg_quote_dollar();
let operations =
spec.delta_operations
.iter()
.flatten()
.filter(|op| op.action == "delete_role")
.filter_map(move |op| {
if db.is_owned_by(&op.name) {
return None;
}
if !ctx.roles.contains_key(&op.name) {
return None;
}
let quoted = op.name.pg_quote();
let new_owner = match &db {
DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(),
DB::UserDB(db) => db.owner.pg_quote(),
};
let (escaped_role, outer_tag) = op.name.pg_quote_dollar();
Some(vec![
// This will reassign all dependent objects to the db owner
Operation {
query: format!("REASSIGN OWNED BY {quoted} TO {new_owner}",),
comment: None,
},
// Revoke some potentially blocking privileges (Neon-specific currently)
Operation {
query: format!(
include_str!("sql/pre_drop_role_revoke_privileges.sql"),
// N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
role_name = escaped_role,
outer_tag = outer_tag,
),
comment: None,
},
// This now will only drop privileges of the role
// TODO: this is obviously not 100% true because of the above case,
// there could be still some privileges that are not revoked. Maybe this
// only drops privileges that were granted *by this* role, not *to this* role,
// but this has to be checked.
Operation {
query: format!("DROP OWNED BY {quoted}"),
comment: None,
},
])
})
.flatten();
Some(vec![
// This will reassign all dependent objects to the db owner
Operation {
query: format!(
"REASSIGN OWNED BY {} TO {}",
quoted, new_owner,
),
comment: None,
},
// Revoke some potentially blocking privileges (Neon-specific currently)
Operation {
query: format!(
include_str!("sql/pre_drop_role_revoke_privileges.sql"),
// N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
role_name = escaped_role,
outer_tag = outer_tag,
),
comment: None,
},
// This now will only drop privileges of the role
// TODO: this is obviously not 100% true because of the above case,
// there could be still some privileges that are not revoked. Maybe this
// only drops privileges that were granted *by this* role, not *to this* role,
// but this has to be checked.
Operation {
query: format!("DROP OWNED BY {}", quoted),
comment: None,
},
])
})
.flatten();
Ok(Box::new(operations))
}

View File

@@ -27,7 +27,7 @@ pub async fn ping_safekeeper(
let (client, conn) = config.connect(tokio_postgres::NoTls).await?;
tokio::spawn(async move {
if let Err(e) = conn.await {
eprintln!("connection error: {e}");
eprintln!("connection error: {}", e);
}
});

View File

@@ -1,6 +0,0 @@
### Test files
The file `cluster_spec.json` has been copied over from libs/compute_api
tests, with some edits:
- the neon.safekeepers setting contains a duplicate value

View File

@@ -1,246 +0,0 @@
{
"format_version": 1.0,
"timestamp": "2021-05-23T18:25:43.511Z",
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
"suspend_timeout_seconds": 3600,
"cluster": {
"cluster_id": "test-cluster-42",
"name": "Zenith Test",
"state": "restarted",
"roles": [
{
"name": "postgres",
"encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
"options": null
},
{
"name": "alexk",
"encrypted_password": null,
"options": null
},
{
"name": "zenith \"new\"",
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
"options": null
},
{
"name": "zen",
"encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
},
{
"name": "\"name\";\\n select 1;",
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
},
{
"name": "MyRole",
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
}
],
"databases": [
{
"name": "DB2",
"owner": "alexk",
"options": [
{
"name": "LC_COLLATE",
"value": "C",
"vartype": "string"
},
{
"name": "LC_CTYPE",
"value": "C",
"vartype": "string"
},
{
"name": "TEMPLATE",
"value": "template0",
"vartype": "enum"
}
]
},
{
"name": "zenith",
"owner": "MyRole"
},
{
"name": "zen",
"owner": "zen"
}
],
"settings": [
{
"name": "fsync",
"value": "off",
"vartype": "bool"
},
{
"name": "wal_level",
"value": "logical",
"vartype": "enum"
},
{
"name": "hot_standby",
"value": "on",
"vartype": "bool"
},
{
"name": "prewarm_lfc_on_startup",
"value": "off",
"vartype": "bool"
},
{
"name": "neon.safekeepers",
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501,127.0.0.1:6502",
"vartype": "string"
},
{
"name": "wal_log_hints",
"value": "on",
"vartype": "bool"
},
{
"name": "log_connections",
"value": "on",
"vartype": "bool"
},
{
"name": "shared_buffers",
"value": "32768",
"vartype": "integer"
},
{
"name": "port",
"value": "55432",
"vartype": "integer"
},
{
"name": "max_connections",
"value": "100",
"vartype": "integer"
},
{
"name": "max_wal_senders",
"value": "10",
"vartype": "integer"
},
{
"name": "listen_addresses",
"value": "0.0.0.0",
"vartype": "string"
},
{
"name": "wal_sender_timeout",
"value": "0",
"vartype": "integer"
},
{
"name": "password_encryption",
"value": "md5",
"vartype": "enum"
},
{
"name": "maintenance_work_mem",
"value": "65536",
"vartype": "integer"
},
{
"name": "max_parallel_workers",
"value": "8",
"vartype": "integer"
},
{
"name": "max_worker_processes",
"value": "8",
"vartype": "integer"
},
{
"name": "neon.tenant_id",
"value": "b0554b632bd4d547a63b86c3630317e8",
"vartype": "string"
},
{
"name": "max_replication_slots",
"value": "10",
"vartype": "integer"
},
{
"name": "neon.timeline_id",
"value": "2414a61ffc94e428f14b5758fe308e13",
"vartype": "string"
},
{
"name": "shared_preload_libraries",
"value": "neon",
"vartype": "string"
},
{
"name": "synchronous_standby_names",
"value": "walproposer",
"vartype": "string"
},
{
"name": "neon.pageserver_connstring",
"value": "host=127.0.0.1 port=6400",
"vartype": "string"
},
{
"name": "test.escaping",
"value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
"vartype": "string"
}
]
},
"delta_operations": [
{
"action": "delete_db",
"name": "zenith_test"
},
{
"action": "rename_db",
"name": "DB",
"new_name": "DB2"
},
{
"action": "delete_role",
"name": "zenith2"
},
{
"action": "rename_role",
"name": "zenith new",
"new_name": "zenith \"new\""
}
],
"remote_extensions": {
"library_index": {
"postgis-3": "postgis",
"libpgrouting-3.4": "postgis",
"postgis_raster-3": "postgis",
"postgis_sfcgal-3": "postgis",
"postgis_topology-3": "postgis",
"address_standardizer-3": "postgis"
},
"extension_data": {
"postgis": {
"archive_path": "5834329303/v15/extensions/postgis.tar.zst",
"control_data": {
"postgis.control": "# postgis extension\ncomment = ''PostGIS geometry and geography spatial types and functions''\ndefault_version = ''3.3.2''\nmodule_pathname = ''$libdir/postgis-3''\nrelocatable = false\ntrusted = true\n",
"pgrouting.control": "# pgRouting Extension\ncomment = ''pgRouting Extension''\ndefault_version = ''3.4.2''\nmodule_pathname = ''$libdir/libpgrouting-3.4''\nrelocatable = true\nrequires = ''plpgsql''\nrequires = ''postgis''\ntrusted = true\n",
"postgis_raster.control": "# postgis_raster extension\ncomment = ''PostGIS raster types and functions''\ndefault_version = ''3.3.2''\nmodule_pathname = ''$libdir/postgis_raster-3''\nrelocatable = false\nrequires = postgis\ntrusted = true\n",
"postgis_sfcgal.control": "# postgis topology extension\ncomment = ''PostGIS SFCGAL functions''\ndefault_version = ''3.3.2''\nrelocatable = true\nrequires = postgis\ntrusted = true\n",
"postgis_topology.control": "# postgis topology extension\ncomment = ''PostGIS topology spatial types and functions''\ndefault_version = ''3.3.2''\nrelocatable = false\nschema = topology\nrequires = postgis\ntrusted = true\n",
"address_standardizer.control": "# address_standardizer extension\ncomment = ''Used to parse an address into constituent elements. Generally used to support geocoding address normalization step.''\ndefault_version = ''3.3.2''\nrelocatable = true\ntrusted = true\n",
"postgis_tiger_geocoder.control": "# postgis tiger geocoder extension\ncomment = ''PostGIS tiger geocoder and reverse geocoder''\ndefault_version = ''3.3.2''\nrelocatable = false\nschema = tiger\nrequires = ''postgis,fuzzystrmatch''\nsuperuser= false\ntrusted = true\n",
"address_standardizer_data_us.control": "# address standardizer us dataset\ncomment = ''Address Standardizer US dataset example''\ndefault_version = ''3.3.2''\nrelocatable = true\ntrusted = true\n"
}
}
},
"custom_extensions": [],
"public_extensions": ["postgis"]
},
"pgbouncer_settings": {
"default_pool_size": "42",
"pool_mode": "session"
}
}

View File

@@ -30,7 +30,7 @@ mod pg_helpers_tests {
r#"fsync = off
wal_level = logical
hot_standby = on
autoprewarm = off
prewarm_lfc_on_startup = off
neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
wal_log_hints = on
log_connections = on
@@ -71,14 +71,6 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor
("name$$$", ("$x$name$$$$x$", "xx")),
("name$$$$", ("$x$name$$$$$x$", "xx")),
("name$x$", ("$xx$name$x$$xx$", "xxx")),
("x", ("$xx$x$xx$", "xxx")),
("xx", ("$xxx$xx$xxx$", "xxxx")),
("$x", ("$xx$$x$xx$", "xxx")),
("x$", ("$xx$x$$xx$", "xxx")),
("$x$", ("$xx$$x$$xx$", "xxx")),
("xx$", ("$xxx$xx$$xxx$", "xxxx")),
("$xx", ("$xxx$$xx$xxx$", "xxxx")),
("$xx$", ("$xxx$$xx$$xxx$", "xxxx")),
];
for (input, expected) in test_cases {

View File

@@ -36,7 +36,6 @@ pageserver_api.workspace = true
pageserver_client.workspace = true
postgres_backend.workspace = true
safekeeper_api.workspace = true
safekeeper_client.workspace = true
postgres_connection.workspace = true
storage_broker.workspace = true
http-utils.workspace = true

View File

@@ -2,10 +2,8 @@
[pageserver]
listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898'
listen_grpc_addr = '127.0.0.1:51051'
pg_auth_type = 'Trust'
http_auth_type = 'Trust'
grpc_auth_type = 'Trust'
[[safekeepers]]
id = 1

View File

@@ -4,10 +4,8 @@
id=1
listen_pg_addr = '127.0.0.1:64000'
listen_http_addr = '127.0.0.1:9898'
listen_grpc_addr = '127.0.0.1:51051'
pg_auth_type = 'Trust'
http_auth_type = 'Trust'
grpc_auth_type = 'Trust'
[[safekeepers]]
id = 1

View File

@@ -14,7 +14,7 @@
use std::ffi::OsStr;
use std::io::Write;
use std::os::fd::AsFd;
use std::os::unix::prelude::AsRawFd;
use std::os::unix::process::CommandExt;
use std::path::Path;
use std::process::Command;
@@ -356,7 +356,7 @@ where
let file = pid_file::claim_for_current_process(&path).expect("claim pid file");
// Remove the FD_CLOEXEC flag on the pidfile descriptor so that the pidfile
// remains locked after exec.
nix::fcntl::fcntl(file.as_fd(), FcntlArg::F_SETFD(FdFlag::empty()))
nix::fcntl::fcntl(file.as_raw_fd(), FcntlArg::F_SETFD(FdFlag::empty()))
.expect("remove FD_CLOEXEC");
// Don't run drop(file), it would close the file before we actually exec.
std::mem::forget(file);

View File

@@ -8,6 +8,7 @@
use std::borrow::Cow;
use std::collections::{BTreeSet, HashMap};
use std::fs::File;
use std::os::fd::AsRawFd;
use std::path::PathBuf;
use std::process::exit;
use std::str::FromStr;
@@ -16,9 +17,9 @@ use std::time::Duration;
use anyhow::{Context, Result, anyhow, bail};
use clap::Parser;
use compute_api::requests::ComputeClaimsScope;
use compute_api::spec::{ComputeMode, PageserverProtocol};
use compute_api::spec::ComputeMode;
use control_plane::broker::StorageBroker;
use control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode};
use control_plane::endpoint::ComputeControlPlane;
use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
use control_plane::local_env;
use control_plane::local_env::{
@@ -30,9 +31,8 @@ use control_plane::safekeeper::SafekeeperNode;
use control_plane::storage_controller::{
NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
};
use nix::fcntl::{Flock, FlockArg};
use nix::fcntl::{FlockArg, flock};
use pageserver_api::config::{
DEFAULT_GRPC_LISTEN_PORT as DEFAULT_PAGESERVER_GRPC_PORT,
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
};
@@ -45,10 +45,10 @@ use pageserver_api::models::{
use pageserver_api::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize, TenantShardId};
use postgres_backend::AuthType;
use postgres_connection::parse_host_port;
use safekeeper_api::membership::{SafekeeperGeneration, SafekeeperId};
use safekeeper_api::membership::SafekeeperGeneration;
use safekeeper_api::{
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT, PgMajorVersion, PgVersionId,
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
};
use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
use tokio::task::JoinSet;
@@ -64,7 +64,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
const DEFAULT_BRANCH_NAME: &str = "main";
project_git_version!(GIT_VERSION);
const DEFAULT_PG_VERSION: PgMajorVersion = PgMajorVersion::PG17;
const DEFAULT_PG_VERSION: u32 = 17;
const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
@@ -169,7 +169,7 @@ struct TenantCreateCmdArgs {
#[arg(default_value_t = DEFAULT_PG_VERSION)]
#[clap(long, help = "Postgres version to use for the initial timeline")]
pg_version: PgMajorVersion,
pg_version: u32,
#[clap(
long,
@@ -292,7 +292,7 @@ struct TimelineCreateCmdArgs {
#[arg(default_value_t = DEFAULT_PG_VERSION)]
#[clap(long, help = "Postgres version")]
pg_version: PgMajorVersion,
pg_version: u32,
}
#[derive(clap::Args)]
@@ -324,7 +324,7 @@ struct TimelineImportCmdArgs {
#[arg(default_value_t = DEFAULT_PG_VERSION)]
#[clap(long, help = "Postgres version of the backup being imported")]
pg_version: PgMajorVersion,
pg_version: u32,
}
#[derive(clap::Subcommand)]
@@ -603,15 +603,7 @@ struct EndpointCreateCmdArgs {
#[arg(default_value_t = DEFAULT_PG_VERSION)]
#[clap(long, help = "Postgres version")]
pg_version: PgMajorVersion,
/// Use gRPC to communicate with Pageservers, by generating grpc:// connstrings.
///
/// Specified on creation such that it's retained across reconfiguration and restarts.
///
/// NB: not yet supported by computes.
#[clap(long)]
grpc: bool,
pg_version: u32,
#[clap(
long,
@@ -672,13 +664,6 @@ struct EndpointStartCmdArgs {
#[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
#[arg(default_value = "90s")]
start_timeout: Duration,
#[clap(
long,
help = "Run in development mode, skipping VM-specific operations like process termination",
action = clap::ArgAction::SetTrue
)]
dev: bool,
}
#[derive(clap::Args)]
@@ -711,9 +696,10 @@ struct EndpointStopCmdArgs {
)]
destroy: bool,
#[clap(long, help = "Postgres shutdown mode")]
#[clap(default_value = "fast")]
mode: EndpointTerminateMode,
#[clap(long, help = "Postgres shutdown mode, passed to \"pg_ctl -m <mode>\"")]
#[arg(value_parser(["smart", "fast", "immediate"]))]
#[arg(default_value = "fast")]
mode: String,
}
#[derive(clap::Args)]
@@ -763,16 +749,16 @@ struct TimelineTreeEl {
/// A flock-based guard over the neon_local repository directory
struct RepoLock {
_file: Flock<File>,
_file: File,
}
impl RepoLock {
fn new() -> Result<Self> {
let repo_dir = File::open(local_env::base_path())?;
match Flock::lock(repo_dir, FlockArg::LockExclusive) {
Ok(f) => Ok(Self { _file: f }),
Err((_, e)) => Err(e).context("flock error"),
}
let repo_dir_fd = repo_dir.as_raw_fd();
flock(repo_dir_fd, FlockArg::LockExclusive)?;
Ok(Self { _file: repo_dir })
}
}
@@ -919,7 +905,7 @@ fn print_timeline(
br_sym = "┗━";
}
print!("{br_sym} @{ancestor_lsn}: ");
print!("{} @{}: ", br_sym, ancestor_lsn);
}
// Finally print a timeline id and name with new line
@@ -1022,16 +1008,13 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
let grpc_port = DEFAULT_PAGESERVER_GRPC_PORT + i;
NeonLocalInitPageserverConf {
id: pageserver_id,
listen_pg_addr: format!("127.0.0.1:{pg_port}"),
listen_http_addr: format!("127.0.0.1:{http_port}"),
listen_https_addr: None,
listen_grpc_addr: Some(format!("127.0.0.1:{grpc_port}")),
pg_auth_type: AuthType::Trust,
http_auth_type: AuthType::Trust,
grpc_auth_type: AuthType::Trust,
other: Default::default(),
// Typical developer machines use disks with slow fsync, and we don't care
// about data integrity: disable disk syncs.
@@ -1269,45 +1252,6 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
pageserver
.timeline_import(tenant_id, timeline_id, base, pg_wal, args.pg_version)
.await?;
if env.storage_controller.timelines_onto_safekeepers {
println!("Creating timeline on safekeeper ...");
let timeline_info = pageserver
.timeline_info(
TenantShardId::unsharded(tenant_id),
timeline_id,
pageserver_client::mgmt_api::ForceAwaitLogicalSize::No,
)
.await?;
let default_sk = SafekeeperNode::from_env(env, env.safekeepers.first().unwrap());
let default_host = default_sk
.conf
.listen_addr
.clone()
.unwrap_or_else(|| "localhost".to_string());
let mconf = safekeeper_api::membership::Configuration {
generation: SafekeeperGeneration::new(1),
members: safekeeper_api::membership::MemberSet {
m: vec![SafekeeperId {
host: default_host,
id: default_sk.conf.id,
pg_port: default_sk.conf.pg_port,
}],
},
new_members: None,
};
let pg_version = PgVersionId::from(args.pg_version);
let req = safekeeper_api::models::TimelineCreateRequest {
tenant_id,
timeline_id,
mconf,
pg_version,
system_id: None,
wal_seg_size: None,
start_lsn: timeline_info.last_record_lsn,
commit_lsn: None,
};
default_sk.create_timeline(&req).await?;
}
env.register_branch_mapping(branch_name.to_string(), tenant_id, timeline_id)?;
println!("Done");
}
@@ -1332,7 +1276,6 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
mode: pageserver_api::models::TimelineCreateRequestMode::Branch {
ancestor_timeline_id,
ancestor_start_lsn: start_lsn,
read_only: false,
pg_version: None,
},
};
@@ -1465,7 +1408,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
args.internal_http_port,
args.pg_version,
mode,
args.grpc,
!args.update_catalog,
false,
)?;
@@ -1506,20 +1448,13 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
let conf = env.get_pageserver_conf(pageserver_id).unwrap();
// Use gRPC if requested.
let pageserver = if endpoint.grpc {
let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
let (host, port) = parse_host_port(grpc_addr)?;
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
(PageserverProtocol::Grpc, host, port)
} else {
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
let port = port.unwrap_or(5432);
(PageserverProtocol::Libpq, host, port)
};
// If caller is telling us what pageserver to use, this is not a tenant which is
// fully managed by storage controller, therefore not sharded.
(vec![pageserver], DEFAULT_STRIPE_SIZE)
let parsed = parse_host_port(&conf.listen_pg_addr).expect("Bad config");
(
vec![(parsed.0, parsed.1.unwrap_or(5432))],
// If caller is telling us what pageserver to use, this is not a tenant which is
// full managed by storage controller, therefore not sharded.
DEFAULT_STRIPE_SIZE,
)
} else {
// Look up the currently attached location of the tenant, and its striping metadata,
// to pass these on to postgres.
@@ -1538,20 +1473,11 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
.await?;
}
let pageserver = if endpoint.grpc {
(
PageserverProtocol::Grpc,
Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))?,
shard.listen_grpc_port.expect("no gRPC port"),
)
} else {
(
PageserverProtocol::Libpq,
Host::parse(&shard.listen_pg_addr)?,
shard.listen_pg_port,
)
};
anyhow::Ok(pageserver)
anyhow::Ok((
Host::parse(&shard.listen_pg_addr)
.expect("Storage controller reported bad hostname"),
shard.listen_pg_port,
))
}),
)
.await?;
@@ -1596,7 +1522,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
stripe_size.0 as usize,
args.create_test_user,
args.start_timeout,
args.dev,
)
.await?;
}
@@ -1607,19 +1532,11 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
.get(endpoint_id.as_str())
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
let pageservers = if let Some(ps_id) = args.endpoint_pageserver_id {
let conf = env.get_pageserver_conf(ps_id)?;
// Use gRPC if requested.
let pageserver = if endpoint.grpc {
let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
let (host, port) = parse_host_port(grpc_addr)?;
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
(PageserverProtocol::Grpc, host, port)
} else {
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
let port = port.unwrap_or(5432);
(PageserverProtocol::Libpq, host, port)
};
vec![pageserver]
let pageserver = PageServerNode::from_env(env, env.get_pageserver_conf(ps_id)?);
vec![(
pageserver.pg_connection_config.host().clone(),
pageserver.pg_connection_config.port(),
)]
} else {
let storage_controller = StorageController::from_env(env);
storage_controller
@@ -1628,30 +1545,18 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
.shards
.into_iter()
.map(|shard| {
// Use gRPC if requested.
if endpoint.grpc {
(
PageserverProtocol::Grpc,
Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))
.expect("bad hostname"),
shard.listen_grpc_port.expect("no gRPC port"),
)
} else {
(
PageserverProtocol::Libpq,
Host::parse(&shard.listen_pg_addr).expect("bad hostname"),
shard.listen_pg_port,
)
}
(
Host::parse(&shard.listen_pg_addr)
.expect("Storage controller reported malformed host"),
shard.listen_pg_port,
)
})
.collect::<Vec<_>>()
};
// If --safekeepers argument is given, use only the listed
// safekeeper nodes; otherwise all from the env.
let safekeepers = parse_safekeepers(&args.safekeepers)?;
endpoint
.reconfigure(Some(pageservers), None, safekeepers, None)
.await?;
endpoint.reconfigure(pageservers, None, safekeepers).await?;
}
EndpointCmd::Stop(args) => {
let endpoint_id = &args.endpoint_id;
@@ -1659,10 +1564,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
.endpoints
.get(endpoint_id)
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
match endpoint.stop(args.mode, args.destroy).await?.lsn {
Some(lsn) => println!("{lsn}"),
None => println!("null"),
}
endpoint.stop(&args.mode, args.destroy)?;
}
EndpointCmd::GenerateJwt(args) => {
let endpoint = {
@@ -1744,7 +1646,7 @@ async fn handle_pageserver(subcmd: &PageserverCmd, env: &local_env::LocalEnv) ->
StopMode::Immediate => true,
};
if let Err(e) = get_pageserver(env, args.pageserver_id)?.stop(immediate) {
eprintln!("pageserver stop failed: {e}");
eprintln!("pageserver stop failed: {}", e);
exit(1);
}
}
@@ -1753,7 +1655,7 @@ async fn handle_pageserver(subcmd: &PageserverCmd, env: &local_env::LocalEnv) ->
let pageserver = get_pageserver(env, args.pageserver_id)?;
//TODO what shutdown strategy should we use here?
if let Err(e) = pageserver.stop(false) {
eprintln!("pageserver stop failed: {e}");
eprintln!("pageserver stop failed: {}", e);
exit(1);
}
@@ -1770,7 +1672,7 @@ async fn handle_pageserver(subcmd: &PageserverCmd, env: &local_env::LocalEnv) ->
{
Ok(_) => println!("Page server is up and running"),
Err(err) => {
eprintln!("Page server is not available: {err}");
eprintln!("Page server is not available: {}", err);
exit(1);
}
}
@@ -1807,7 +1709,7 @@ async fn handle_storage_controller(
},
};
if let Err(e) = svc.stop(stop_args).await {
eprintln!("stop failed: {e}");
eprintln!("stop failed: {}", e);
exit(1);
}
}
@@ -1829,7 +1731,7 @@ async fn handle_safekeeper(subcmd: &SafekeeperCmd, env: &local_env::LocalEnv) ->
let safekeeper = get_safekeeper(env, args.id)?;
if let Err(e) = safekeeper.start(&args.extra_opt, &args.start_timeout).await {
eprintln!("safekeeper start failed: {e}");
eprintln!("safekeeper start failed: {}", e);
exit(1);
}
}
@@ -1841,7 +1743,7 @@ async fn handle_safekeeper(subcmd: &SafekeeperCmd, env: &local_env::LocalEnv) ->
StopMode::Immediate => true,
};
if let Err(e) = safekeeper.stop(immediate) {
eprintln!("safekeeper stop failed: {e}");
eprintln!("safekeeper stop failed: {}", e);
exit(1);
}
}
@@ -1854,12 +1756,12 @@ async fn handle_safekeeper(subcmd: &SafekeeperCmd, env: &local_env::LocalEnv) ->
};
if let Err(e) = safekeeper.stop(immediate) {
eprintln!("safekeeper stop failed: {e}");
eprintln!("safekeeper stop failed: {}", e);
exit(1);
}
if let Err(e) = safekeeper.start(&args.extra_opt, &args.start_timeout).await {
eprintln!("safekeeper start failed: {e}");
eprintln!("safekeeper start failed: {}", e);
exit(1);
}
}
@@ -2094,16 +1996,11 @@ async fn handle_stop_all(args: &StopCmdArgs, env: &local_env::LocalEnv) -> Resul
}
async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
let mode = if immediate {
EndpointTerminateMode::Immediate
} else {
EndpointTerminateMode::Fast
};
// Stop all endpoints
match ComputeControlPlane::load(env.clone()) {
Ok(cplane) => {
for (_k, node) in cplane.endpoints {
if let Err(e) = node.stop(mode, false).await {
if let Err(e) = node.stop(if immediate { "immediate" } else { "fast" }, false) {
eprintln!("postgres stop failed: {e:#}");
}
}
@@ -2115,7 +2012,7 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
let storage = EndpointStorage::from_env(env);
if let Err(e) = storage.stop(immediate) {
eprintln!("endpoint_storage stop failed: {e:#}");
eprintln!("endpoint_storage stop failed: {:#}", e);
}
for ps_conf in &env.pageservers {

View File

@@ -37,7 +37,6 @@
//! ```
//!
use std::collections::BTreeMap;
use std::fmt::Display;
use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
use std::path::PathBuf;
use std::process::Command;
@@ -46,18 +45,15 @@ use std::sync::Arc;
use std::time::{Duration, Instant};
use anyhow::{Context, Result, anyhow, bail};
use base64::Engine;
use base64::prelude::BASE64_URL_SAFE_NO_PAD;
use compute_api::requests::{
COMPUTE_AUDIENCE, ComputeClaims, ComputeClaimsScope, ConfigurationRequest,
};
use compute_api::responses::{
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TerminateResponse,
TlsConfig,
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TlsConfig,
};
use compute_api::spec::{
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PageserverProtocol,
PgIdent, RemoteExtSpec, Role,
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
RemoteExtSpec, Role,
};
use jsonwebtoken::jwk::{
AlgorithmParameters, CommonParameters, EllipticCurve, Jwk, JwkSet, KeyAlgorithm, KeyOperations,
@@ -67,7 +63,6 @@ use nix::sys::signal::{Signal, kill};
use pageserver_api::shard::ShardStripeSize;
use pem::Pem;
use reqwest::header::CONTENT_TYPE;
use safekeeper_api::PgMajorVersion;
use safekeeper_api::membership::SafekeeperGeneration;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
@@ -79,6 +74,7 @@ use utils::id::{NodeId, TenantId, TimelineId};
use crate::local_env::LocalEnv;
use crate::postgresql_conf::PostgresConf;
use crate::storage_controller::StorageController;
// contents of a endpoint.json file
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
@@ -90,8 +86,7 @@ pub struct EndpointConf {
pg_port: u16,
external_http_port: u16,
internal_http_port: u16,
pg_version: PgMajorVersion,
grpc: bool,
pg_version: u32,
skip_pg_catalog_updates: bool,
reconfigure_concurrency: usize,
drop_subscriptions_before_start: bool,
@@ -169,7 +164,7 @@ impl ComputeControlPlane {
public_key_use: Some(PublicKeyUse::Signature),
key_operations: Some(vec![KeyOperations::Verify]),
key_algorithm: Some(KeyAlgorithm::EdDSA),
key_id: Some(BASE64_URL_SAFE_NO_PAD.encode(key_hash)),
key_id: Some(base64::encode_config(key_hash, base64::URL_SAFE_NO_PAD)),
x509_url: None::<String>,
x509_chain: None::<Vec<String>>,
x509_sha1_fingerprint: None::<String>,
@@ -178,7 +173,7 @@ impl ComputeControlPlane {
algorithm: AlgorithmParameters::OctetKeyPair(OctetKeyPairParameters {
key_type: OctetKeyPairType::OctetKeyPair,
curve: EllipticCurve::Ed25519,
x: BASE64_URL_SAFE_NO_PAD.encode(public_key),
x: base64::encode_config(public_key, base64::URL_SAFE_NO_PAD),
}),
}],
})
@@ -193,9 +188,8 @@ impl ComputeControlPlane {
pg_port: Option<u16>,
external_http_port: Option<u16>,
internal_http_port: Option<u16>,
pg_version: PgMajorVersion,
pg_version: u32,
mode: ComputeMode,
grpc: bool,
skip_pg_catalog_updates: bool,
drop_subscriptions_before_start: bool,
) -> Result<Arc<Endpoint>> {
@@ -230,7 +224,6 @@ impl ComputeControlPlane {
// we also skip catalog updates in the cloud.
skip_pg_catalog_updates,
drop_subscriptions_before_start,
grpc,
reconfigure_concurrency: 1,
features: vec![],
cluster: None,
@@ -249,7 +242,6 @@ impl ComputeControlPlane {
internal_http_port,
pg_port,
pg_version,
grpc,
skip_pg_catalog_updates,
drop_subscriptions_before_start,
reconfigure_concurrency: 1,
@@ -304,8 +296,6 @@ pub struct Endpoint {
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
pub mode: ComputeMode,
/// If true, the endpoint should use gRPC to communicate with Pageservers.
pub grpc: bool,
// port and address of the Postgres server and `compute_ctl`'s HTTP APIs
pub pg_address: SocketAddr,
@@ -313,7 +303,7 @@ pub struct Endpoint {
pub internal_http_address: SocketAddr,
// postgres major version in the format: 14, 15, etc.
pg_version: PgMajorVersion,
pg_version: u32,
// These are not part of the endpoint as such, but the environment
// the endpoint runs in.
@@ -341,35 +331,15 @@ pub enum EndpointStatus {
RunningNoPidfile,
}
impl Display for EndpointStatus {
impl std::fmt::Display for EndpointStatus {
fn fmt(&self, writer: &mut std::fmt::Formatter) -> std::fmt::Result {
writer.write_str(match self {
let s = match self {
Self::Running => "running",
Self::Stopped => "stopped",
Self::Crashed => "crashed",
Self::RunningNoPidfile => "running, no pidfile",
})
}
}
#[derive(Default, Clone, Copy, clap::ValueEnum)]
pub enum EndpointTerminateMode {
#[default]
/// Use pg_ctl stop -m fast
Fast,
/// Use pg_ctl stop -m immediate
Immediate,
/// Use /terminate?mode=immediate
ImmediateTerminate,
}
impl std::fmt::Display for EndpointTerminateMode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(match &self {
EndpointTerminateMode::Fast => "fast",
EndpointTerminateMode::Immediate => "immediate",
EndpointTerminateMode::ImmediateTerminate => "immediate-terminate",
})
};
write!(writer, "{}", s)
}
}
@@ -408,7 +378,6 @@ impl Endpoint {
mode: conf.mode,
tenant_id: conf.tenant_id,
pg_version: conf.pg_version,
grpc: conf.grpc,
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
reconfigure_concurrency: conf.reconfigure_concurrency,
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
@@ -535,7 +504,7 @@ impl Endpoint {
conf.append("hot_standby", "on");
// prefetching of blocks referenced in WAL doesn't make sense for us
// Neon hot standby ignores pages that are not in the shared_buffers
if self.pg_version >= PgMajorVersion::PG15 {
if self.pg_version >= 15 {
conf.append("recovery_prefetch", "off");
}
}
@@ -637,10 +606,10 @@ impl Endpoint {
}
}
fn build_pageserver_connstr(pageservers: &[(PageserverProtocol, Host, u16)]) -> String {
fn build_pageserver_connstr(pageservers: &[(Host, u16)]) -> String {
pageservers
.iter()
.map(|(scheme, host, port)| format!("{scheme}://no_user@{host}:{port}"))
.map(|(host, port)| format!("postgresql://no_user@{host}:{port}"))
.collect::<Vec<_>>()
.join(",")
}
@@ -685,12 +654,11 @@ impl Endpoint {
endpoint_storage_addr: String,
safekeepers_generation: Option<SafekeeperGeneration>,
safekeepers: Vec<NodeId>,
pageservers: Vec<(PageserverProtocol, Host, u16)>,
pageservers: Vec<(Host, u16)>,
remote_ext_base_url: Option<&String>,
shard_stripe_size: usize,
create_test_user: bool,
start_timeout: Duration,
dev: bool,
) -> Result<()> {
if self.status() == EndpointStatus::Running {
anyhow::bail!("The endpoint is already running");
@@ -779,8 +747,7 @@ impl Endpoint {
logs_export_host: None::<String>,
endpoint_storage_addr: Some(endpoint_storage_addr),
endpoint_storage_token: Some(endpoint_storage_token),
autoprewarm: false,
suspend_timeout_seconds: -1, // Only used in neon_local.
prewarm_lfc_on_startup: false,
};
// this strange code is needed to support respec() in tests
@@ -825,10 +792,10 @@ impl Endpoint {
// Launch compute_ctl
let conn_str = self.connstr("cloud_admin", "postgres");
println!("Starting postgres node at '{conn_str}'");
println!("Starting postgres node at '{}'", conn_str);
if create_test_user {
let conn_str = self.connstr("test", "neondb");
println!("Also at '{conn_str}'");
println!("Also at '{}'", conn_str);
}
let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
cmd.args([
@@ -862,10 +829,6 @@ impl Endpoint {
cmd.args(["--remote-ext-base-url", remote_ext_base_url]);
}
if dev {
cmd.arg("--dev");
}
let child = cmd.spawn()?;
// set up a scopeguard to kill & wait for the child in case we panic or bail below
let child = scopeguard::guard(child, |mut child| {
@@ -918,7 +881,7 @@ impl Endpoint {
ComputeStatus::Empty
| ComputeStatus::ConfigurationPending
| ComputeStatus::Configuration
| ComputeStatus::TerminationPending { .. }
| ComputeStatus::TerminationPending
| ComputeStatus::Terminated => {
bail!("unexpected compute status: {:?}", state.status)
}
@@ -927,7 +890,8 @@ impl Endpoint {
Err(e) => {
if Instant::now().duration_since(start_at) > start_timeout {
return Err(e).context(format!(
"timed out {start_timeout:?} waiting to connect to compute_ctl HTTP",
"timed out {:?} waiting to connect to compute_ctl HTTP",
start_timeout,
));
}
}
@@ -966,7 +930,7 @@ impl Endpoint {
// reqwest does not export its error construction utility functions, so let's craft the message ourselves
let url = response.url().to_owned();
let msg = match response.text().await {
Ok(err_body) => format!("Error: {err_body}"),
Ok(err_body) => format!("Error: {}", err_body),
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
};
Err(anyhow::anyhow!(msg))
@@ -975,10 +939,9 @@ impl Endpoint {
pub async fn reconfigure(
&self,
pageservers: Option<Vec<(PageserverProtocol, Host, u16)>>,
mut pageservers: Vec<(Host, u16)>,
stripe_size: Option<ShardStripeSize>,
safekeepers: Option<Vec<NodeId>>,
safekeeper_generation: Option<SafekeeperGeneration>,
) -> Result<()> {
let (mut spec, compute_ctl_config) = {
let config_path = self.endpoint_path().join("config.json");
@@ -991,24 +954,34 @@ impl Endpoint {
let postgresql_conf = self.read_postgresql_conf()?;
spec.cluster.postgresql_conf = Some(postgresql_conf);
// If pageservers are not specified, don't change them.
if let Some(pageservers) = pageservers {
anyhow::ensure!(!pageservers.is_empty(), "no pageservers provided");
// If we weren't given explicit pageservers, query the storage controller
if pageservers.is_empty() {
let storage_controller = StorageController::from_env(&self.env);
let locate_result = storage_controller.tenant_locate(self.tenant_id).await?;
pageservers = locate_result
.shards
.into_iter()
.map(|shard| {
(
Host::parse(&shard.listen_pg_addr)
.expect("Storage controller reported bad hostname"),
shard.listen_pg_port,
)
})
.collect::<Vec<_>>();
}
let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
spec.pageserver_connstring = Some(pageserver_connstr);
if stripe_size.is_some() {
spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
}
let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
assert!(!pageserver_connstr.is_empty());
spec.pageserver_connstring = Some(pageserver_connstr);
if stripe_size.is_some() {
spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
}
// If safekeepers are not specified, don't change them.
if let Some(safekeepers) = safekeepers {
let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
spec.safekeeper_connstrings = safekeeper_connstrings;
if let Some(g) = safekeeper_generation {
spec.safekeepers_generation = Some(g.into_inner());
}
}
let client = reqwest::Client::builder()
@@ -1039,52 +1012,15 @@ impl Endpoint {
} else {
let url = response.url().to_owned();
let msg = match response.text().await {
Ok(err_body) => format!("Error: {err_body}"),
Ok(err_body) => format!("Error: {}", err_body),
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
};
Err(anyhow::anyhow!(msg))
}
}
pub async fn reconfigure_pageservers(
&self,
pageservers: Vec<(PageserverProtocol, Host, u16)>,
stripe_size: Option<ShardStripeSize>,
) -> Result<()> {
self.reconfigure(Some(pageservers), stripe_size, None, None)
.await
}
pub async fn reconfigure_safekeepers(
&self,
safekeepers: Vec<NodeId>,
generation: SafekeeperGeneration,
) -> Result<()> {
self.reconfigure(None, None, Some(safekeepers), Some(generation))
.await
}
pub async fn stop(
&self,
mode: EndpointTerminateMode,
destroy: bool,
) -> Result<TerminateResponse> {
// pg_ctl stop is fast but doesn't allow us to collect LSN. /terminate is
// slow, and test runs time out. Solution: special mode "immediate-terminate"
// which uses /terminate
let response = if let EndpointTerminateMode::ImmediateTerminate = mode {
let ip = self.external_http_address.ip();
let port = self.external_http_address.port();
let url = format!("http://{ip}:{port}/terminate?mode=immediate");
let token = self.generate_jwt(Some(ComputeClaimsScope::Admin))?;
let request = reqwest::Client::new().post(url).bearer_auth(token);
let response = request.send().await.context("/terminate")?;
let text = response.text().await.context("/terminate result")?;
serde_json::from_str(&text).with_context(|| format!("deserializing {text}"))?
} else {
self.pg_ctl(&["-m", &mode.to_string(), "stop"], &None)?;
TerminateResponse { lsn: None }
};
pub fn stop(&self, mode: &str, destroy: bool) -> Result<()> {
self.pg_ctl(&["-m", mode, "stop"], &None)?;
// Also wait for the compute_ctl process to die. It might have some
// cleanup work to do after postgres stops, like syncing safekeepers,
@@ -1094,7 +1030,7 @@ impl Endpoint {
// waiting. Sometimes we do *not* want this cleanup: tests intentionally
// do stop when majority of safekeepers is down, so sync-safekeepers
// would hang otherwise. This could be a separate flag though.
let send_sigterm = destroy || !matches!(mode, EndpointTerminateMode::Fast);
let send_sigterm = destroy || mode == "immediate";
self.wait_for_compute_ctl_to_exit(send_sigterm)?;
if destroy {
println!(
@@ -1103,7 +1039,7 @@ impl Endpoint {
);
std::fs::remove_dir_all(self.endpoint_path())?;
}
Ok(response)
Ok(())
}
pub fn connstr(&self, user: &str, db_name: &str) -> String {

View File

@@ -12,11 +12,9 @@ use std::{env, fs};
use anyhow::{Context, bail};
use clap::ValueEnum;
use pageserver_api::config::PostHogConfig;
use pem::Pem;
use postgres_backend::AuthType;
use reqwest::{Certificate, Url};
use safekeeper_api::PgMajorVersion;
use serde::{Deserialize, Serialize};
use utils::auth::encode_from_key_file;
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
@@ -211,12 +209,6 @@ pub struct NeonStorageControllerConf {
pub use_https_safekeeper_api: bool,
pub use_local_compute_notifications: bool,
pub timeline_safekeeper_count: Option<usize>,
pub posthog_config: Option<PostHogConfig>,
pub kick_secondary_downloads: Option<bool>,
}
impl NeonStorageControllerConf {
@@ -244,12 +236,9 @@ impl Default for NeonStorageControllerConf {
heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
long_reconcile_threshold: None,
use_https_pageserver_api: false,
timelines_onto_safekeepers: true,
timelines_onto_safekeepers: false,
use_https_safekeeper_api: false,
use_local_compute_notifications: true,
timeline_safekeeper_count: None,
posthog_config: None,
kick_secondary_downloads: None,
}
}
}
@@ -265,7 +254,7 @@ impl Default for EndpointStorageConf {
impl NeonBroker {
pub fn client_url(&self) -> Url {
let url = if let Some(addr) = self.listen_https_addr {
format!("https://{addr}")
format!("https://{}", addr)
} else {
format!(
"http://{}",
@@ -289,10 +278,8 @@ pub struct PageServerConf {
pub listen_pg_addr: String,
pub listen_http_addr: String,
pub listen_https_addr: Option<String>,
pub listen_grpc_addr: Option<String>,
pub pg_auth_type: AuthType,
pub http_auth_type: AuthType,
pub grpc_auth_type: AuthType,
pub no_sync: bool,
}
@@ -303,10 +290,8 @@ impl Default for PageServerConf {
listen_pg_addr: String::new(),
listen_http_addr: String::new(),
listen_https_addr: None,
listen_grpc_addr: None,
pg_auth_type: AuthType::Trust,
http_auth_type: AuthType::Trust,
grpc_auth_type: AuthType::Trust,
no_sync: false,
}
}
@@ -321,10 +306,8 @@ pub struct NeonLocalInitPageserverConf {
pub listen_pg_addr: String,
pub listen_http_addr: String,
pub listen_https_addr: Option<String>,
pub listen_grpc_addr: Option<String>,
pub pg_auth_type: AuthType,
pub http_auth_type: AuthType,
pub grpc_auth_type: AuthType,
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
pub no_sync: bool,
#[serde(flatten)]
@@ -338,10 +321,8 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf {
listen_pg_addr,
listen_http_addr,
listen_https_addr,
listen_grpc_addr,
pg_auth_type,
http_auth_type,
grpc_auth_type,
no_sync,
other: _,
} = conf;
@@ -350,9 +331,7 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf {
listen_pg_addr: listen_pg_addr.clone(),
listen_http_addr: listen_http_addr.clone(),
listen_https_addr: listen_https_addr.clone(),
listen_grpc_addr: listen_grpc_addr.clone(),
pg_auth_type: *pg_auth_type,
grpc_auth_type: *grpc_auth_type,
http_auth_type: *http_auth_type,
no_sync: *no_sync,
}
@@ -429,21 +408,25 @@ impl LocalEnv {
self.pg_distrib_dir.clone()
}
pub fn pg_distrib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
let path = self.pg_distrib_dir.clone();
Ok(path.join(pg_version.v_str()))
#[allow(clippy::manual_range_patterns)]
match pg_version {
14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
_ => bail!("Unsupported postgres version: {}", pg_version),
}
}
pub fn pg_dir(&self, pg_version: PgMajorVersion, dir_name: &str) -> anyhow::Result<PathBuf> {
pub fn pg_dir(&self, pg_version: u32, dir_name: &str) -> anyhow::Result<PathBuf> {
Ok(self.pg_distrib_dir(pg_version)?.join(dir_name))
}
pub fn pg_bin_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
self.pg_dir(pg_version, "bin")
}
pub fn pg_lib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
self.pg_dir(pg_version, "lib")
}
@@ -724,17 +707,15 @@ impl LocalEnv {
listen_pg_addr: String,
listen_http_addr: String,
listen_https_addr: Option<String>,
listen_grpc_addr: Option<String>,
pg_auth_type: AuthType,
http_auth_type: AuthType,
grpc_auth_type: AuthType,
#[serde(default)]
no_sync: bool,
}
let config_toml_path = dentry.path().join("pageserver.toml");
let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(
&std::fs::read_to_string(&config_toml_path)
.with_context(|| format!("read {config_toml_path:?}"))?,
.with_context(|| format!("read {:?}", config_toml_path))?,
)
.context("parse pageserver.toml")?;
let identity_toml_path = dentry.path().join("identity.toml");
@@ -744,17 +725,15 @@ impl LocalEnv {
}
let identity_toml: IdentityTomlSubset = toml_edit::de::from_str(
&std::fs::read_to_string(&identity_toml_path)
.with_context(|| format!("read {identity_toml_path:?}"))?,
.with_context(|| format!("read {:?}", identity_toml_path))?,
)
.context("parse identity.toml")?;
let PageserverConfigTomlSubset {
listen_pg_addr,
listen_http_addr,
listen_https_addr,
listen_grpc_addr,
pg_auth_type,
http_auth_type,
grpc_auth_type,
no_sync,
} = config_toml;
let IdentityTomlSubset {
@@ -771,10 +750,8 @@ impl LocalEnv {
listen_pg_addr,
listen_http_addr,
listen_https_addr,
listen_grpc_addr,
pg_auth_type,
http_auth_type,
grpc_auth_type,
no_sync,
};
pageservers.push(conf);

View File

@@ -16,13 +16,11 @@ use std::time::Duration;
use anyhow::{Context, bail};
use camino::Utf8PathBuf;
use pageserver_api::config::{DEFAULT_GRPC_LISTEN_PORT, DEFAULT_HTTP_LISTEN_PORT};
use pageserver_api::models::{self, TenantInfo, TimelineInfo};
use pageserver_api::shard::TenantShardId;
use pageserver_client::mgmt_api;
use postgres_backend::AuthType;
use postgres_connection::{PgConnectionConfig, parse_host_port};
use safekeeper_api::PgMajorVersion;
use utils::auth::{Claims, Scope};
use utils::id::{NodeId, TenantId, TimelineId};
use utils::lsn::Lsn;
@@ -122,7 +120,7 @@ impl PageServerNode {
.env
.generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
.unwrap();
overrides.push(format!("control_plane_api_token='{jwt_token}'"));
overrides.push(format!("control_plane_api_token='{}'", jwt_token));
}
if !conf.other.contains_key("remote_storage") {
@@ -131,9 +129,7 @@ impl PageServerNode {
));
}
if [conf.http_auth_type, conf.pg_auth_type, conf.grpc_auth_type]
.contains(&AuthType::NeonJWT)
{
if conf.http_auth_type != AuthType::Trust || conf.pg_auth_type != AuthType::Trust {
// Keys are generated in the toplevel repo dir, pageservers' workdirs
// are one level below that, so refer to keys with ../
overrides.push("auth_validation_public_key_path='../auth_public_key.pem'".to_owned());
@@ -254,10 +250,9 @@ impl PageServerNode {
// the storage controller
let metadata_path = datadir.join("metadata.json");
let http_host = "localhost".to_string();
let (_, http_port) =
let (_http_host, http_port) =
parse_host_port(&self.conf.listen_http_addr).expect("Unable to parse listen_http_addr");
let http_port = http_port.unwrap_or(DEFAULT_HTTP_LISTEN_PORT);
let http_port = http_port.unwrap_or(9898);
let https_port = match self.conf.listen_https_addr.as_ref() {
Some(https_addr) => {
@@ -268,13 +263,6 @@ impl PageServerNode {
None => None,
};
let (mut grpc_host, mut grpc_port) = (None, None);
if let Some(grpc_addr) = &self.conf.listen_grpc_addr {
let (_, port) = parse_host_port(grpc_addr).expect("Unable to parse listen_grpc_addr");
grpc_host = Some("localhost".to_string());
grpc_port = Some(port.unwrap_or(DEFAULT_GRPC_LISTEN_PORT));
}
// Intentionally hand-craft JSON: this acts as an implicit format compat test
// in case the pageserver-side structure is edited, and reflects the real life
// situation: the metadata is written by some other script.
@@ -283,9 +271,7 @@ impl PageServerNode {
serde_json::to_vec(&pageserver_api::config::NodeMetadata {
postgres_host: "localhost".to_string(),
postgres_port: self.pg_connection_config.port(),
grpc_host,
grpc_port,
http_host,
http_host: "localhost".to_string(),
http_port,
https_port,
other: HashMap::from([(
@@ -525,6 +511,11 @@ impl PageServerNode {
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'timeline_offloading' as bool")?,
wal_receiver_protocol_override: settings
.remove("wal_receiver_protocol_override")
.map(serde_json::from_str)
.transpose()
.context("parse `wal_receiver_protocol_override` from json")?,
rel_size_v2_enabled: settings
.remove("rel_size_v2_enabled")
.map(|x| x.parse::<bool>())
@@ -555,16 +546,6 @@ impl PageServerNode {
.map(serde_json::from_str)
.transpose()
.context("Falied to parse 'sampling_ratio'")?,
relsize_snapshot_cache_capacity: settings
.remove("relsize snapshot cache capacity")
.map(|x| x.parse::<usize>())
.transpose()
.context("Falied to parse 'relsize_snapshot_cache_capacity' as integer")?,
basebackup_cache_enabled: settings
.remove("basebackup_cache_enabled")
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'basebackup_cache_enabled' as bool")?,
};
if !settings.is_empty() {
bail!("Unrecognized tenant settings: {settings:?}")
@@ -608,7 +589,7 @@ impl PageServerNode {
timeline_id: TimelineId,
base: (Lsn, PathBuf),
pg_wal: Option<(Lsn, PathBuf)>,
pg_version: PgMajorVersion,
pg_version: u32,
) -> anyhow::Result<()> {
// Init base reader
let (start_lsn, base_tarfile_path) = base;
@@ -647,16 +628,4 @@ impl PageServerNode {
Ok(())
}
pub async fn timeline_info(
&self,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
force_await_logical_size: mgmt_api::ForceAwaitLogicalSize,
) -> anyhow::Result<TimelineInfo> {
let timeline_info = self
.http_client
.timeline_info(tenant_shard_id, timeline_id, force_await_logical_size)
.await?;
Ok(timeline_info)
}
}

View File

@@ -6,6 +6,7 @@
//! .neon/safekeepers/<safekeeper id>
//! ```
use std::error::Error as _;
use std::future::Future;
use std::io::Write;
use std::path::PathBuf;
use std::time::Duration;
@@ -13,9 +14,9 @@ use std::{io, result};
use anyhow::Context;
use camino::Utf8PathBuf;
use http_utils::error::HttpErrorBody;
use postgres_connection::PgConnectionConfig;
use safekeeper_api::models::TimelineCreateRequest;
use safekeeper_client::mgmt_api;
use reqwest::{IntoUrl, Method};
use thiserror::Error;
use utils::auth::{Claims, Scope};
use utils::id::NodeId;
@@ -34,14 +35,25 @@ pub enum SafekeeperHttpError {
type Result<T> = result::Result<T, SafekeeperHttpError>;
fn err_from_client_err(err: mgmt_api::Error) -> SafekeeperHttpError {
use mgmt_api::Error::*;
match err {
ApiError(_, str) => SafekeeperHttpError::Response(str),
Cancelled => SafekeeperHttpError::Response("Cancelled".to_owned()),
ReceiveBody(err) => SafekeeperHttpError::Transport(err),
ReceiveErrorBody(err) => SafekeeperHttpError::Response(err),
Timeout(str) => SafekeeperHttpError::Response(format!("timeout: {str}")),
pub(crate) trait ResponseErrorMessageExt: Sized {
fn error_from_body(self) -> impl Future<Output = Result<Self>> + Send;
}
impl ResponseErrorMessageExt for reqwest::Response {
async fn error_from_body(self) -> Result<Self> {
let status = self.status();
if !(status.is_client_error() || status.is_server_error()) {
return Ok(self);
}
// reqwest does not export its error construction utility functions, so let's craft the message ourselves
let url = self.url().to_owned();
Err(SafekeeperHttpError::Response(
match self.json::<HttpErrorBody>().await {
Ok(err_body) => format!("Error: {}", err_body.msg),
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
},
))
}
}
@@ -58,8 +70,9 @@ pub struct SafekeeperNode {
pub pg_connection_config: PgConnectionConfig,
pub env: LocalEnv,
pub http_client: mgmt_api::Client,
pub http_client: reqwest::Client,
pub listen_addr: String,
pub http_base_url: String,
}
impl SafekeeperNode {
@@ -69,14 +82,13 @@ impl SafekeeperNode {
} else {
"127.0.0.1".to_string()
};
let jwt = None;
let http_base_url = format!("http://{}:{}", listen_addr, conf.http_port);
SafekeeperNode {
id: conf.id,
conf: conf.clone(),
pg_connection_config: Self::safekeeper_connection_config(&listen_addr, conf.pg_port),
env: env.clone(),
http_client: mgmt_api::Client::new(env.create_http_client(), http_base_url, jwt),
http_client: env.create_http_client(),
http_base_url: format!("http://{}:{}/v1", listen_addr, conf.http_port),
listen_addr,
}
}
@@ -143,7 +155,7 @@ impl SafekeeperNode {
let id_string = id.to_string();
// TODO: add availability_zone to the config.
// Right now we just specify any value here and use it to check metrics in tests.
let availability_zone = format!("sk-{id_string}");
let availability_zone = format!("sk-{}", id_string);
let mut args = vec![
"-D".to_owned(),
@@ -266,19 +278,20 @@ impl SafekeeperNode {
)
}
pub async fn check_status(&self) -> Result<()> {
self.http_client
.status()
.await
.map_err(err_from_client_err)?;
Ok(())
fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> reqwest::RequestBuilder {
// TODO: authentication
//if self.env.auth_type == AuthType::NeonJWT {
// builder = builder.bearer_auth(&self.env.safekeeper_auth_token)
//}
self.http_client.request(method, url)
}
pub async fn create_timeline(&self, req: &TimelineCreateRequest) -> Result<()> {
self.http_client
.create_timeline(req)
.await
.map_err(err_from_client_err)?;
pub async fn check_status(&self) -> Result<()> {
self.http_request(Method::GET, format!("{}/{}", self.http_base_url, "status"))
.send()
.await?
.error_from_body()
.await?;
Ok(())
}
}

View File

@@ -6,8 +6,6 @@ use std::str::FromStr;
use std::sync::OnceLock;
use std::time::{Duration, Instant};
use crate::background_process;
use crate::local_env::{LocalEnv, NeonStorageControllerConf};
use camino::{Utf8Path, Utf8PathBuf};
use hyper0::Uri;
use nix::unistd::Pid;
@@ -24,7 +22,6 @@ use pageserver_client::mgmt_api::ResponseErrorMessageExt;
use pem::Pem;
use postgres_backend::AuthType;
use reqwest::{Method, Response};
use safekeeper_api::PgMajorVersion;
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use tokio::process::Command;
@@ -34,6 +31,9 @@ use utils::auth::{Claims, Scope, encode_from_key_file};
use utils::id::{NodeId, TenantId};
use whoami::username;
use crate::background_process;
use crate::local_env::{LocalEnv, NeonStorageControllerConf};
pub struct StorageController {
env: LocalEnv,
private_key: Option<Pem>,
@@ -48,7 +48,7 @@ pub struct StorageController {
const COMMAND: &str = "storage_controller";
const STORAGE_CONTROLLER_POSTGRES_VERSION: PgMajorVersion = PgMajorVersion::PG16;
const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;
const DB_NAME: &str = "storage_controller";
@@ -167,7 +167,7 @@ impl StorageController {
fn storage_controller_instance_dir(&self, instance_id: u8) -> PathBuf {
self.env
.base_data_dir
.join(format!("storage_controller_{instance_id}"))
.join(format!("storage_controller_{}", instance_id))
}
fn pid_file(&self, instance_id: u8) -> Utf8PathBuf {
@@ -184,15 +184,9 @@ impl StorageController {
/// to other versions if that one isn't found. Some automated tests create circumstances
/// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> {
const PREFER_VERSIONS: [PgMajorVersion; 5] = [
STORAGE_CONTROLLER_POSTGRES_VERSION,
PgMajorVersion::PG16,
PgMajorVersion::PG15,
PgMajorVersion::PG14,
PgMajorVersion::PG17,
];
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 16, 15, 14];
for v in PREFER_VERSIONS {
for v in prefer_versions {
let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap();
if tokio::fs::try_exists(&path).await? {
return Ok(path);
@@ -226,7 +220,7 @@ impl StorageController {
"-d",
DB_NAME,
"-p",
&format!("{postgres_port}"),
&format!("{}", postgres_port),
];
let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();
let envs = [
@@ -269,7 +263,7 @@ impl StorageController {
"-h",
"localhost",
"-p",
&format!("{postgres_port}"),
&format!("{}", postgres_port),
"-U",
&username(),
"-O",
@@ -431,7 +425,7 @@ impl StorageController {
// from `LocalEnv`'s config file (`.neon/config`).
tokio::fs::write(
&pg_data_path.join("postgresql.conf"),
format!("port = {postgres_port}\nfsync=off\n"),
format!("port = {}\nfsync=off\n", postgres_port),
)
.await?;
@@ -483,7 +477,7 @@ impl StorageController {
self.setup_database(postgres_port).await?;
}
let database_url = format!("postgresql://localhost:{postgres_port}/{DB_NAME}");
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", postgres_port);
// We support running a startup SQL script to fiddle with the database before we launch storcon.
// This is used by the test suite.
@@ -514,7 +508,7 @@ impl StorageController {
drop(client);
conn.await??;
let addr = format!("{host}:{listen_port}");
let addr = format!("{}:{}", host, listen_port);
let address_for_peers = Uri::builder()
.scheme(scheme)
.authority(addr.clone())
@@ -563,10 +557,6 @@ impl StorageController {
args.push("--use-local-compute-notifications".to_string());
}
if let Some(value) = self.config.kick_secondary_downloads {
args.push(format!("--kick-secondary-downloads={value}"));
}
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
}
@@ -638,28 +628,6 @@ impl StorageController {
args.push("--timelines-onto-safekeepers".to_string());
}
// neon_local is used in test environments where we often have less than 3 safekeepers.
if self.config.timeline_safekeeper_count.is_some() || self.env.safekeepers.len() < 3 {
let sk_cnt = self
.config
.timeline_safekeeper_count
.unwrap_or(self.env.safekeepers.len());
args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
}
let mut envs = vec![
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
];
if let Some(posthog_config) = &self.config.posthog_config {
envs.push((
"POSTHOG_CONFIG".to_string(),
serde_json::to_string(posthog_config)?,
));
}
println!("Starting storage controller");
background_process::start_process(
@@ -667,7 +635,10 @@ impl StorageController {
&instance_dir,
&self.env.storage_controller_bin(),
args,
envs,
vec![
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
],
background_process::InitialPidFile::Create(self.pid_file(start_args.instance_id)),
&start_args.start_timeout,
|| async {
@@ -831,9 +802,9 @@ impl StorageController {
builder = builder.json(&body)
}
if let Some(private_key) = &self.private_key {
println!("Getting claims for path {path}");
println!("Getting claims for path {}", path);
if let Some(required_claims) = Self::get_claims_for_path(&path)? {
println!("Got claims {required_claims:?} for path {path}");
println!("Got claims {:?} for path {}", required_claims, path);
let jwt_token = encode_from_key_file(&required_claims, private_key)?;
builder = builder.header(
reqwest::header::AUTHORIZATION,

View File

@@ -36,10 +36,6 @@ enum Command {
listen_pg_addr: String,
#[arg(long)]
listen_pg_port: u16,
#[arg(long)]
listen_grpc_addr: Option<String>,
#[arg(long)]
listen_grpc_port: Option<u16>,
#[arg(long)]
listen_http_addr: String,
@@ -65,16 +61,10 @@ enum Command {
#[arg(long)]
scheduling: Option<NodeSchedulingPolicy>,
},
// Set a node status as deleted.
NodeDelete {
#[arg(long)]
node_id: NodeId,
},
/// Delete a tombstone of node from the storage controller.
NodeDeleteTombstone {
#[arg(long)]
node_id: NodeId,
},
/// Modify a tenant's policies in the storage controller
TenantPolicy {
#[arg(long)]
@@ -92,8 +82,6 @@ enum Command {
},
/// List nodes known to the storage controller
Nodes {},
/// List soft deleted nodes known to the storage controller
NodeTombstones {},
/// List tenants known to the storage controller
Tenants {
/// If this field is set, it will list the tenants on a specific node
@@ -422,8 +410,6 @@ async fn main() -> anyhow::Result<()> {
node_id,
listen_pg_addr,
listen_pg_port,
listen_grpc_addr,
listen_grpc_port,
listen_http_addr,
listen_http_port,
listen_https_port,
@@ -437,8 +423,6 @@ async fn main() -> anyhow::Result<()> {
node_id,
listen_pg_addr,
listen_pg_port,
listen_grpc_addr,
listen_grpc_port,
listen_http_addr,
listen_http_port,
listen_https_port,
@@ -649,7 +633,7 @@ async fn main() -> anyhow::Result<()> {
response
.new_shards
.iter()
.map(|s| format!("{s:?}"))
.map(|s| format!("{:?}", s))
.collect::<Vec<_>>()
.join(",")
);
@@ -771,8 +755,8 @@ async fn main() -> anyhow::Result<()> {
println!("Tenant {tenant_id}");
let mut table = comfy_table::Table::new();
table.add_row(["Policy", &format!("{policy:?}")]);
table.add_row(["Stripe size", &format!("{stripe_size:?}")]);
table.add_row(["Policy", &format!("{:?}", policy)]);
table.add_row(["Stripe size", &format!("{:?}", stripe_size)]);
table.add_row(["Config", &serde_json::to_string_pretty(&config).unwrap()]);
println!("{table}");
println!("Shards:");
@@ -789,7 +773,7 @@ async fn main() -> anyhow::Result<()> {
let secondary = shard
.node_secondary
.iter()
.map(|n| format!("{n}"))
.map(|n| format!("{}", n))
.collect::<Vec<_>>()
.join(",");
@@ -863,7 +847,7 @@ async fn main() -> anyhow::Result<()> {
}
} else {
// Make it obvious to the user that since they've omitted an AZ, we're clearing it
eprintln!("Clearing preferred AZ for tenant {tenant_id}");
eprintln!("Clearing preferred AZ for tenant {}", tenant_id);
}
// Construct a request that modifies all the tenant's shards
@@ -916,39 +900,6 @@ async fn main() -> anyhow::Result<()> {
.dispatch::<(), ()>(Method::DELETE, format!("control/v1/node/{node_id}"), None)
.await?;
}
Command::NodeDeleteTombstone { node_id } => {
storcon_client
.dispatch::<(), ()>(
Method::DELETE,
format!("debug/v1/tombstone/{node_id}"),
None,
)
.await?;
}
Command::NodeTombstones {} => {
let mut resp = storcon_client
.dispatch::<(), Vec<NodeDescribeResponse>>(
Method::GET,
"debug/v1/tombstone".to_string(),
None,
)
.await?;
resp.sort_by(|a, b| a.listen_http_addr.cmp(&b.listen_http_addr));
let mut table = comfy_table::Table::new();
table.set_header(["Id", "Hostname", "AZ", "Scheduling", "Availability"]);
for node in resp {
table.add_row([
format!("{}", node.id),
node.listen_http_addr,
node.availability_zone_id,
format!("{:?}", node.scheduling),
format!("{:?}", node.availability),
]);
}
println!("{table}");
}
Command::TenantSetTimeBasedEviction {
tenant_id,
period,
@@ -1134,7 +1085,8 @@ async fn main() -> anyhow::Result<()> {
Err((tenant_shard_id, from, to, error)) => {
failure += 1;
println!(
"Failed to migrate {tenant_shard_id} from node {from} to node {to}: {error}"
"Failed to migrate {} from node {} to node {}: {}",
tenant_shard_id, from, to, error
);
}
}
@@ -1276,7 +1228,8 @@ async fn main() -> anyhow::Result<()> {
concurrency,
} => {
let mut path = format!(
"/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
"/v1/tenant/{}/timeline/{}/download_heatmap_layers",
tenant_shard_id, timeline_id,
);
if let Some(c) = concurrency {
@@ -1301,7 +1254,8 @@ async fn watch_tenant_shard(
) -> anyhow::Result<()> {
if let Some(until_migrated_to) = until_migrated_to {
println!(
"Waiting for tenant shard {tenant_shard_id} to be migrated to node {until_migrated_to}"
"Waiting for tenant shard {} to be migrated to node {}",
tenant_shard_id, until_migrated_to
);
}
@@ -1324,7 +1278,7 @@ async fn watch_tenant_shard(
"attached: {} secondary: {} {}",
shard
.node_attached
.map(|n| format!("{n}"))
.map(|n| format!("{}", n))
.unwrap_or("none".to_string()),
shard
.node_secondary
@@ -1338,12 +1292,15 @@ async fn watch_tenant_shard(
"(reconciler idle)"
}
);
println!("{summary}");
println!("{}", summary);
// Maybe drop out if we finished migration
if let Some(until_migrated_to) = until_migrated_to {
if shard.node_attached == Some(until_migrated_to) && !shard.is_reconciling {
println!("Tenant shard {tenant_shard_id} is now on node {until_migrated_to}");
println!(
"Tenant shard {} is now on node {}",
tenant_shard_id, until_migrated_to
);
break;
}
}

View File

@@ -13,6 +13,6 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
jq \
netcat-openbsd
#This is required for the pg_hintplan test
RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw /ext-src/postgis-src/ && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw /ext-src/postgis-src
RUN mkdir -p /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw && chown postgres /ext-src/pg_hint_plan-src /postgres/contrib/file_fdw
USER postgres

View File

@@ -1,18 +1,18 @@
#!/usr/bin/env bash
#!/bin/bash
set -eux
# Generate a random tenant or timeline ID
#
# Takes a variable name as argument. The result is stored in that variable.
generate_id() {
local -n resvar=${1}
printf -v resvar '%08x%08x%08x%08x' ${SRANDOM} ${SRANDOM} ${SRANDOM} ${SRANDOM}
local -n resvar=$1
printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
}
PG_VERSION=${PG_VERSION:-14}
readonly CONFIG_FILE_ORG=/var/db/postgres/configs/config.json
readonly CONFIG_FILE=/tmp/config.json
CONFIG_FILE_ORG=/var/db/postgres/configs/config.json
CONFIG_FILE=/tmp/config.json
# Test that the first library path that the dynamic loader looks in is the path
# that we use for custom compiled software
@@ -20,17 +20,17 @@ first_path="$(ldconfig --verbose 2>/dev/null \
| grep --invert-match ^$'\t' \
| cut --delimiter=: --fields=1 \
| head --lines=1)"
test "${first_path}" = '/usr/local/lib'
test "$first_path" == '/usr/local/lib' || true # Remove the || true in a follow-up PR. Needed for backwards compat.
echo "Waiting pageserver become ready."
while ! nc -z pageserver 6400; do
sleep 1
sleep 1;
done
echo "Page server is ready."
cp "${CONFIG_FILE_ORG}" "${CONFIG_FILE}"
cp ${CONFIG_FILE_ORG} ${CONFIG_FILE}
if [[ -n "${TENANT_ID:-}" && -n "${TIMELINE_ID:-}" ]]; then
if [ -n "${TENANT_ID:-}" ] && [ -n "${TIMELINE_ID:-}" ]; then
tenant_id=${TENANT_ID}
timeline_id=${TIMELINE_ID}
else
@@ -41,7 +41,7 @@ else
"http://pageserver:9898/v1/tenant"
)
tenant_id=$(curl "${PARAMS[@]}" | jq -r .[0].id)
if [[ -z "${tenant_id}" || "${tenant_id}" = null ]]; then
if [ -z "${tenant_id}" ] || [ "${tenant_id}" = null ]; then
echo "Create a tenant"
generate_id tenant_id
PARAMS=(
@@ -51,7 +51,7 @@ else
"http://pageserver:9898/v1/tenant/${tenant_id}/location_config"
)
result=$(curl "${PARAMS[@]}")
printf '%s\n' "${result}" | jq .
echo $result | jq .
fi
echo "Check if a timeline present"
@@ -61,7 +61,7 @@ else
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
)
timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
if [[ -z "${timeline_id}" || "${timeline_id}" = null ]]; then
if [ -z "${timeline_id}" ] || [ "${timeline_id}" = null ]; then
generate_id timeline_id
PARAMS=(
-sbf
@@ -71,7 +71,7 @@ else
"http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
)
result=$(curl "${PARAMS[@]}")
printf '%s\n' "${result}" | jq .
echo $result | jq .
fi
fi
@@ -82,10 +82,10 @@ else
fi
echo "Adding pgx_ulid"
shared_libraries=$(jq -r '.spec.cluster.settings[] | select(.name=="shared_preload_libraries").value' ${CONFIG_FILE})
sed -i "s|${shared_libraries}|${shared_libraries},${ulid_extension}|" ${CONFIG_FILE}
sed -i "s/${shared_libraries}/${shared_libraries},${ulid_extension}/" ${CONFIG_FILE}
echo "Overwrite tenant id and timeline id in spec file"
sed -i "s|TENANT_ID|${tenant_id}|" ${CONFIG_FILE}
sed -i "s|TIMELINE_ID|${timeline_id}|" ${CONFIG_FILE}
sed -i "s/TENANT_ID/${tenant_id}/" ${CONFIG_FILE}
sed -i "s/TIMELINE_ID/${timeline_id}/" ${CONFIG_FILE}
cat ${CONFIG_FILE}
@@ -93,6 +93,5 @@ echo "Start compute node"
/usr/local/bin/compute_ctl --pgdata /var/db/postgres/compute \
-C "postgresql://cloud_admin@localhost:55433/postgres" \
-b /usr/local/bin/postgres \
--compute-id "compute-${RANDOM}" \
--config "${CONFIG_FILE}"
--dev
--compute-id "compute-$RANDOM" \
--config "$CONFIG_FILE"

View File

@@ -4,7 +4,6 @@
"timestamp": "2022-10-12T18:00:00.000Z",
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8c",
"suspend_timeout_seconds": -1,
"cluster": {
"cluster_id": "docker_compose",

View File

@@ -186,14 +186,13 @@ services:
neon-test-extensions:
profiles: ["test-extensions"]
image: ${REPOSITORY:-ghcr.io/neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-${PG_VERSION:-16}}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
image: ${REPOSITORY:-ghcr.io/neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
environment:
- PGUSER=${PGUSER:-cloud_admin}
- PGPASSWORD=${PGPASSWORD:-cloud_admin}
- PGPASSWORD=cloud_admin
entrypoint:
- "/bin/bash"
- "-c"
command:
- sleep 3600
- sleep 1800
depends_on:
- compute

View File

@@ -54,15 +54,6 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
# It cannot be moved to Dockerfile now because the database directory is created after the start of the container
echo Adding dummy config
docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
# Prepare for the PostGIS test
docker compose exec compute mkdir -p /tmp/pgis_reg/pgis_reg_tmp
TMPDIR=$(mktemp -d)
docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${TMPDIR}"
docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${TMPDIR}"
docker compose exec compute mkdir -p /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
docker compose cp "${TMPDIR}/test" compute:/ext-src/postgis-src/raster/test
docker compose cp "${TMPDIR}/00-regress-install" compute:/ext-src/postgis-src/regress
rm -rf "${TMPDIR}"
# The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
TMPDIR=$(mktemp -d)
docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
@@ -77,7 +68,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
# We are running tests now
rm -f testout.txt testout_contrib.txt
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0

View File

@@ -1,8 +0,0 @@
#!/bin/bash
# We need these settings to get the expected output results.
# We cannot use the environment variables e.g. PGTZ due to
# https://github.com/neondatabase/neon/issues/1287
export DATABASE=${1:-contrib_regression}
psql -c "ALTER DATABASE ${DATABASE} SET neon.allow_unstable_extensions='on'" \
-c "ALTER DATABASE ${DATABASE} SET DateStyle='Postgres,MDY'" \
-c "ALTER DATABASE ${DATABASE} SET TimeZone='America/Los_Angeles'" \

View File

@@ -1,16 +0,0 @@
#!/usr/bin/env bash
set -ex
cd "$(dirname "${0}")"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
dropdb --if-exists contrib_regression
createdb contrib_regression
cd h3_postgis/test
psql -d contrib_regression -c "CREATE EXTENSION postgis" -c "CREATE EXTENSION postgis_raster" -c "CREATE EXTENSION h3" -c "CREATE EXTENSION h3_postgis"
TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
${PG_REGRESS} --use-existing --dbname contrib_regression ${TESTS}
cd ../../h3/test
TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
dropdb --if-exists contrib_regression
createdb contrib_regression
psql -d contrib_regression -c "CREATE EXTENSION h3"
${PG_REGRESS} --use-existing --dbname contrib_regression ${TESTS}

View File

@@ -1,7 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
cd h3/test
TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ${TESTS}

View File

@@ -1,6 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname "${0}")"
if [ -f Makefile ]; then
make installcheck
fi

View File

@@ -1,9 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
[ -f Makefile ] || exit 0
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
TESTS=$(echo sql/* | sed 's|sql/||g; s|\.sql||g')
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ${TESTS}

View File

@@ -18,7 +18,6 @@ TESTS=${TESTS/row_level_security/}
TESTS=${TESTS/sqli_connection/}
dropdb --if-exist contrib_regression
createdb contrib_regression
. ../alter_db.sh
psql -v ON_ERROR_STOP=1 -f test/fixtures.sql -d contrib_regression
${REGRESS} --use-existing --dbname=contrib_regression --inputdir=${TESTDIR} ${TESTS}

View File

@@ -3,7 +3,6 @@ set -ex
cd "$(dirname "${0}")"
dropdb --if-exist contrib_regression
createdb contrib_regression
. ../alter_db.sh
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --load-extension=vector --load-extension=rag --dbname=contrib_regression basic_functions text_processing api_keys chunking_functions document_processing embedding_api_functions voyageai_functions

View File

@@ -20,6 +20,5 @@ installcheck: regression-test
regression-test:
dropdb --if-exists contrib_regression
createdb contrib_regression
../alter_db.sh
psql -d contrib_regression -c "CREATE EXTENSION $(EXTNAME)"
$(PG_REGRESS) --inputdir=. --outputdir=. --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -3,7 +3,6 @@ set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
. ../alter_db.sh
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
REGRESS="$(make -n installcheck | awk '{print substr($0,index($0,"init-extension"));}')"
REGRESS="${REGRESS/startup_perms/}"

View File

@@ -1,70 +0,0 @@
# PostGIS Testing in Neon
This directory contains configuration files and patches for running PostGIS tests in the Neon database environment.
## Overview
PostGIS is a spatial database extension for PostgreSQL that adds support for geographic objects. Testing PostGIS compatibility ensures that Neon's modifications to PostgreSQL don't break compatibility with this critical extension.
## PostGIS Versions
- PostgreSQL v17: PostGIS 3.5.0
- PostgreSQL v14/v15/v16: PostGIS 3.3.3
## Test Configuration
The test setup includes:
- `postgis-no-upgrade-test.patch`: Disables upgrade tests by removing the upgrade test section from regress/runtest.mk
- `postgis-regular-v16.patch`: Version-specific patch for PostgreSQL v16
- `postgis-regular-v17.patch`: Version-specific patch for PostgreSQL v17
- `regular-test.sh`: Script to run PostGIS tests as a regular user
- `neon-test.sh`: Script to handle version-specific test configurations
- `raster_outdb_template.sql`: Template for raster tests with explicit file paths
## Excluded Tests
**Important Note:** The test exclusions listed below are specifically for regular-user tests against staging instances. These exclusions are necessary because staging instances run with limited privileges and cannot perform operations requiring superuser access. Docker-compose based tests are not affected by these exclusions.
### Tests Requiring Superuser Permissions
These tests cannot be run as a regular user:
- `estimatedextent`
- `regress/core/legacy`
- `regress/core/typmod`
- `regress/loader/TestSkipANALYZE`
- `regress/loader/TestANALYZE`
### Tests Requiring Filesystem Access
These tests need direct filesystem access that is only possible for superusers:
- `loader/load_outdb`
### Tests with Flaky Results
These tests have assumptions that don't always hold true:
- `regress/core/computed_columns` - Assumes computed columns always outperform alternatives, which is not consistently true
### Tests Requiring Tunable Parameter Modifications
These tests attempt to modify the `postgis.gdal_enabled_drivers` parameter, which is only accessible to superusers:
- `raster/test/regress/rt_wkb`
- `raster/test/regress/rt_addband`
- `raster/test/regress/rt_setbandpath`
- `raster/test/regress/rt_fromgdalraster`
- `raster/test/regress/rt_asgdalraster`
- `raster/test/regress/rt_astiff`
- `raster/test/regress/rt_asjpeg`
- `raster/test/regress/rt_aspng`
- `raster/test/regress/permitted_gdal_drivers`
- Loader tests: `BasicOutDB`, `Tiled10x10`, `Tiled10x10Copy`, `Tiled8x8`, `TiledAuto`, `TiledAutoSkipNoData`, `TiledAutoCopyn`
### Topology Tests (v17 only)
- `populate_topology_layer`
- `renametopogeometrycolumn`
## Other Modifications
- Binary.sql tests are modified to use explicit file paths
- Server-side SQL COPY commands (which require superuser privileges) are converted to client-side `\copy` commands
- Upgrade tests are disabled

View File

@@ -1,6 +0,0 @@
#!/bin/sh
set -ex
cd "$(dirname "$0")"
patch -p1 <"postgis-common-${PG_VERSION}.patch"
trap 'echo Cleaning up; patch -R -p1 <postgis-common-${PG_VERSION}.patch' EXIT
make installcheck-base

View File

@@ -1,37 +0,0 @@
diff --git a/regress/core/tests.mk b/regress/core/tests.mk
index 3abd7bc..64a9254 100644
--- a/regress/core/tests.mk
+++ b/regress/core/tests.mk
@@ -144,11 +144,6 @@ TESTS_SLOW = \
$(top_srcdir)/regress/core/concave_hull_hard \
$(top_srcdir)/regress/core/knn_recheck
-ifeq ($(shell expr "$(POSTGIS_PGSQL_VERSION)" ">=" 120),1)
- TESTS += \
- $(top_srcdir)/regress/core/computed_columns
-endif
-
ifeq ($(shell expr "$(POSTGIS_GEOS_VERSION)" ">=" 30700),1)
# GEOS-3.7 adds:
# ST_FrechetDistance
diff --git a/regress/runtest.mk b/regress/runtest.mk
index c051f03..010e493 100644
--- a/regress/runtest.mk
+++ b/regress/runtest.mk
@@ -24,16 +24,6 @@ check-regress:
POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(RUNTESTFLAGS_INTERNAL) $(TESTS)
- @if echo "$(RUNTESTFLAGS)" | grep -vq -- --upgrade; then \
- echo "Running upgrade test as RUNTESTFLAGS did not contain that"; \
- POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl \
- --upgrade \
- $(RUNTESTFLAGS) \
- $(RUNTESTFLAGS_INTERNAL) \
- $(TESTS); \
- else \
- echo "Skipping upgrade test as RUNTESTFLAGS already requested upgrades"; \
- fi
check-long:
$(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(TESTS) $(TESTS_SLOW)

View File

@@ -1,35 +0,0 @@
diff --git a/regress/core/tests.mk b/regress/core/tests.mk
index 9e05244..90987df 100644
--- a/regress/core/tests.mk
+++ b/regress/core/tests.mk
@@ -143,8 +143,7 @@ TESTS += \
$(top_srcdir)/regress/core/oriented_envelope \
$(top_srcdir)/regress/core/point_coordinates \
$(top_srcdir)/regress/core/out_geojson \
- $(top_srcdir)/regress/core/wrapx \
- $(top_srcdir)/regress/core/computed_columns
+ $(top_srcdir)/regress/core/wrapx
# Slow slow tests
TESTS_SLOW = \
diff --git a/regress/runtest.mk b/regress/runtest.mk
index 4b95b7e..449d5a2 100644
--- a/regress/runtest.mk
+++ b/regress/runtest.mk
@@ -24,16 +24,6 @@ check-regress:
@POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(RUNTESTFLAGS_INTERNAL) $(TESTS)
- @if echo "$(RUNTESTFLAGS)" | grep -vq -- --upgrade; then \
- echo "Running upgrade test as RUNTESTFLAGS did not contain that"; \
- POSTGIS_TOP_BUILD_DIR=$(abs_top_builddir) $(PERL) $(top_srcdir)/regress/run_test.pl \
- --upgrade \
- $(RUNTESTFLAGS) \
- $(RUNTESTFLAGS_INTERNAL) \
- $(TESTS); \
- else \
- echo "Skipping upgrade test as RUNTESTFLAGS already requested upgrades"; \
- fi
check-long:
$(PERL) $(top_srcdir)/regress/run_test.pl $(RUNTESTFLAGS) $(TESTS) $(TESTS_SLOW)

View File

@@ -1,186 +0,0 @@
diff --git a/raster/test/regress/tests.mk b/raster/test/regress/tests.mk
index 00918e1..7e2b6cd 100644
--- a/raster/test/regress/tests.mk
+++ b/raster/test/regress/tests.mk
@@ -17,9 +17,7 @@ override RUNTESTFLAGS_INTERNAL := \
$(RUNTESTFLAGS_INTERNAL) \
--after-upgrade-script $(top_srcdir)/raster/test/regress/hooks/hook-after-upgrade-raster.sql
-RASTER_TEST_FIRST = \
- $(top_srcdir)/raster/test/regress/check_gdal \
- $(top_srcdir)/raster/test/regress/loader/load_outdb
+RASTER_TEST_FIRST =
RASTER_TEST_LAST = \
$(top_srcdir)/raster/test/regress/clean
@@ -33,9 +31,7 @@ RASTER_TEST_IO = \
RASTER_TEST_BASIC_FUNC = \
$(top_srcdir)/raster/test/regress/rt_bytea \
- $(top_srcdir)/raster/test/regress/rt_wkb \
$(top_srcdir)/raster/test/regress/box3d \
- $(top_srcdir)/raster/test/regress/rt_addband \
$(top_srcdir)/raster/test/regress/rt_band \
$(top_srcdir)/raster/test/regress/rt_tile
@@ -73,16 +69,10 @@ RASTER_TEST_BANDPROPS = \
$(top_srcdir)/raster/test/regress/rt_neighborhood \
$(top_srcdir)/raster/test/regress/rt_nearestvalue \
$(top_srcdir)/raster/test/regress/rt_pixelofvalue \
- $(top_srcdir)/raster/test/regress/rt_polygon \
- $(top_srcdir)/raster/test/regress/rt_setbandpath
+ $(top_srcdir)/raster/test/regress/rt_polygon
RASTER_TEST_UTILITY = \
$(top_srcdir)/raster/test/regress/rt_utility \
- $(top_srcdir)/raster/test/regress/rt_fromgdalraster \
- $(top_srcdir)/raster/test/regress/rt_asgdalraster \
- $(top_srcdir)/raster/test/regress/rt_astiff \
- $(top_srcdir)/raster/test/regress/rt_asjpeg \
- $(top_srcdir)/raster/test/regress/rt_aspng \
$(top_srcdir)/raster/test/regress/rt_reclass \
$(top_srcdir)/raster/test/regress/rt_gdalwarp \
$(top_srcdir)/raster/test/regress/rt_gdalcontour \
@@ -120,21 +110,13 @@ RASTER_TEST_SREL = \
RASTER_TEST_BUGS = \
$(top_srcdir)/raster/test/regress/bug_test_car5 \
- $(top_srcdir)/raster/test/regress/permitted_gdal_drivers \
$(top_srcdir)/raster/test/regress/tickets
RASTER_TEST_LOADER = \
$(top_srcdir)/raster/test/regress/loader/Basic \
$(top_srcdir)/raster/test/regress/loader/Projected \
$(top_srcdir)/raster/test/regress/loader/BasicCopy \
- $(top_srcdir)/raster/test/regress/loader/BasicFilename \
- $(top_srcdir)/raster/test/regress/loader/BasicOutDB \
- $(top_srcdir)/raster/test/regress/loader/Tiled10x10 \
- $(top_srcdir)/raster/test/regress/loader/Tiled10x10Copy \
- $(top_srcdir)/raster/test/regress/loader/Tiled8x8 \
- $(top_srcdir)/raster/test/regress/loader/TiledAuto \
- $(top_srcdir)/raster/test/regress/loader/TiledAutoSkipNoData \
- $(top_srcdir)/raster/test/regress/loader/TiledAutoCopyn
+ $(top_srcdir)/raster/test/regress/loader/BasicFilename
RASTER_TESTS := $(RASTER_TEST_FIRST) \
$(RASTER_TEST_METADATA) $(RASTER_TEST_IO) $(RASTER_TEST_BASIC_FUNC) \
diff --git a/regress/core/binary.sql b/regress/core/binary.sql
index 7a36b65..ad78fc7 100644
--- a/regress/core/binary.sql
+++ b/regress/core/binary.sql
@@ -1,4 +1,5 @@
SET client_min_messages TO warning;
+
CREATE SCHEMA tm;
CREATE TABLE tm.geoms (id serial, g geometry);
@@ -31,24 +32,39 @@ SELECT st_force4d(g) FROM tm.geoms WHERE id < 15 ORDER BY id;
INSERT INTO tm.geoms(g)
SELECT st_setsrid(g,4326) FROM tm.geoms ORDER BY id;
-COPY tm.geoms TO :tmpfile WITH BINARY;
+-- define temp file path
+\set tmpfile '/tmp/postgis_binary_test.dat'
+
+-- export
+\set command '\\copy tm.geoms TO ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+-- import
CREATE TABLE tm.geoms_in AS SELECT * FROM tm.geoms LIMIT 0;
-COPY tm.geoms_in FROM :tmpfile WITH BINARY;
-SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o WHERE i.id = o.id
- AND ST_OrderingEquals(i.g, o.g);
+\set command '\\copy tm.geoms_in FROM ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o
+WHERE i.id = o.id AND ST_OrderingEquals(i.g, o.g);
CREATE TABLE tm.geogs AS SELECT id,g::geography FROM tm.geoms
WHERE geometrytype(g) NOT LIKE '%CURVE%'
AND geometrytype(g) NOT LIKE '%CIRCULAR%'
AND geometrytype(g) NOT LIKE '%SURFACE%'
AND geometrytype(g) NOT LIKE 'TRIANGLE%'
- AND geometrytype(g) NOT LIKE 'TIN%'
-;
+ AND geometrytype(g) NOT LIKE 'TIN%';
-COPY tm.geogs TO :tmpfile WITH BINARY;
+-- export
+\set command '\\copy tm.geogs TO ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+-- import
CREATE TABLE tm.geogs_in AS SELECT * FROM tm.geogs LIMIT 0;
-COPY tm.geogs_in FROM :tmpfile WITH BINARY;
-SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o WHERE i.id = o.id
- AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
+\set command '\\copy tm.geogs_in FROM ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o
+WHERE i.id = o.id AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
DROP SCHEMA tm CASCADE;
+
diff --git a/regress/core/tests.mk b/regress/core/tests.mk
index 64a9254..94903c3 100644
--- a/regress/core/tests.mk
+++ b/regress/core/tests.mk
@@ -23,7 +23,6 @@ current_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
RUNTESTFLAGS_INTERNAL += \
--before-upgrade-script $(top_srcdir)/regress/hooks/hook-before-upgrade.sql \
--after-upgrade-script $(top_srcdir)/regress/hooks/hook-after-upgrade.sql \
- --after-create-script $(top_srcdir)/regress/hooks/hook-after-create.sql \
--before-uninstall-script $(top_srcdir)/regress/hooks/hook-before-uninstall.sql
TESTS += \
@@ -40,7 +39,6 @@ TESTS += \
$(top_srcdir)/regress/core/dumppoints \
$(top_srcdir)/regress/core/dumpsegments \
$(top_srcdir)/regress/core/empty \
- $(top_srcdir)/regress/core/estimatedextent \
$(top_srcdir)/regress/core/forcecurve \
$(top_srcdir)/regress/core/flatgeobuf \
$(top_srcdir)/regress/core/geography \
@@ -55,7 +53,6 @@ TESTS += \
$(top_srcdir)/regress/core/out_marc21 \
$(top_srcdir)/regress/core/in_encodedpolyline \
$(top_srcdir)/regress/core/iscollection \
- $(top_srcdir)/regress/core/legacy \
$(top_srcdir)/regress/core/letters \
$(top_srcdir)/regress/core/long_xact \
$(top_srcdir)/regress/core/lwgeom_regress \
@@ -112,7 +109,6 @@ TESTS += \
$(top_srcdir)/regress/core/temporal_knn \
$(top_srcdir)/regress/core/tickets \
$(top_srcdir)/regress/core/twkb \
- $(top_srcdir)/regress/core/typmod \
$(top_srcdir)/regress/core/wkb \
$(top_srcdir)/regress/core/wkt \
$(top_srcdir)/regress/core/wmsservers \
diff --git a/regress/loader/tests.mk b/regress/loader/tests.mk
index 1fc77ac..c3cb9de 100644
--- a/regress/loader/tests.mk
+++ b/regress/loader/tests.mk
@@ -38,7 +38,5 @@ TESTS += \
$(top_srcdir)/regress/loader/Latin1 \
$(top_srcdir)/regress/loader/Latin1-implicit \
$(top_srcdir)/regress/loader/mfile \
- $(top_srcdir)/regress/loader/TestSkipANALYZE \
- $(top_srcdir)/regress/loader/TestANALYZE \
$(top_srcdir)/regress/loader/CharNoWidth
diff --git a/regress/run_test.pl b/regress/run_test.pl
index 0ec5b2d..1c331f4 100755
--- a/regress/run_test.pl
+++ b/regress/run_test.pl
@@ -147,7 +147,6 @@ $ENV{"LANG"} = "C";
# Add locale info to the psql options
# Add pg12 precision suppression
my $PGOPTIONS = $ENV{"PGOPTIONS"};
-$PGOPTIONS .= " -c lc_messages=C";
$PGOPTIONS .= " -c client_min_messages=NOTICE";
$PGOPTIONS .= " -c extra_float_digits=0";
$ENV{"PGOPTIONS"} = $PGOPTIONS;

View File

@@ -1,208 +0,0 @@
diff --git a/raster/test/regress/tests.mk b/raster/test/regress/tests.mk
index 00918e1..7e2b6cd 100644
--- a/raster/test/regress/tests.mk
+++ b/raster/test/regress/tests.mk
@@ -17,9 +17,7 @@ override RUNTESTFLAGS_INTERNAL := \
$(RUNTESTFLAGS_INTERNAL) \
--after-upgrade-script $(top_srcdir)/raster/test/regress/hooks/hook-after-upgrade-raster.sql
-RASTER_TEST_FIRST = \
- $(top_srcdir)/raster/test/regress/check_gdal \
- $(top_srcdir)/raster/test/regress/loader/load_outdb
+RASTER_TEST_FIRST =
RASTER_TEST_LAST = \
$(top_srcdir)/raster/test/regress/clean
@@ -33,9 +31,7 @@ RASTER_TEST_IO = \
RASTER_TEST_BASIC_FUNC = \
$(top_srcdir)/raster/test/regress/rt_bytea \
- $(top_srcdir)/raster/test/regress/rt_wkb \
$(top_srcdir)/raster/test/regress/box3d \
- $(top_srcdir)/raster/test/regress/rt_addband \
$(top_srcdir)/raster/test/regress/rt_band \
$(top_srcdir)/raster/test/regress/rt_tile
@@ -73,16 +69,10 @@ RASTER_TEST_BANDPROPS = \
$(top_srcdir)/raster/test/regress/rt_neighborhood \
$(top_srcdir)/raster/test/regress/rt_nearestvalue \
$(top_srcdir)/raster/test/regress/rt_pixelofvalue \
- $(top_srcdir)/raster/test/regress/rt_polygon \
- $(top_srcdir)/raster/test/regress/rt_setbandpath
+ $(top_srcdir)/raster/test/regress/rt_polygon
RASTER_TEST_UTILITY = \
$(top_srcdir)/raster/test/regress/rt_utility \
- $(top_srcdir)/raster/test/regress/rt_fromgdalraster \
- $(top_srcdir)/raster/test/regress/rt_asgdalraster \
- $(top_srcdir)/raster/test/regress/rt_astiff \
- $(top_srcdir)/raster/test/regress/rt_asjpeg \
- $(top_srcdir)/raster/test/regress/rt_aspng \
$(top_srcdir)/raster/test/regress/rt_reclass \
$(top_srcdir)/raster/test/regress/rt_gdalwarp \
$(top_srcdir)/raster/test/regress/rt_gdalcontour \
@@ -120,21 +110,13 @@ RASTER_TEST_SREL = \
RASTER_TEST_BUGS = \
$(top_srcdir)/raster/test/regress/bug_test_car5 \
- $(top_srcdir)/raster/test/regress/permitted_gdal_drivers \
$(top_srcdir)/raster/test/regress/tickets
RASTER_TEST_LOADER = \
$(top_srcdir)/raster/test/regress/loader/Basic \
$(top_srcdir)/raster/test/regress/loader/Projected \
$(top_srcdir)/raster/test/regress/loader/BasicCopy \
- $(top_srcdir)/raster/test/regress/loader/BasicFilename \
- $(top_srcdir)/raster/test/regress/loader/BasicOutDB \
- $(top_srcdir)/raster/test/regress/loader/Tiled10x10 \
- $(top_srcdir)/raster/test/regress/loader/Tiled10x10Copy \
- $(top_srcdir)/raster/test/regress/loader/Tiled8x8 \
- $(top_srcdir)/raster/test/regress/loader/TiledAuto \
- $(top_srcdir)/raster/test/regress/loader/TiledAutoSkipNoData \
- $(top_srcdir)/raster/test/regress/loader/TiledAutoCopyn
+ $(top_srcdir)/raster/test/regress/loader/BasicFilename
RASTER_TESTS := $(RASTER_TEST_FIRST) \
$(RASTER_TEST_METADATA) $(RASTER_TEST_IO) $(RASTER_TEST_BASIC_FUNC) \
diff --git a/regress/core/binary.sql b/regress/core/binary.sql
index 7a36b65..ad78fc7 100644
--- a/regress/core/binary.sql
+++ b/regress/core/binary.sql
@@ -1,4 +1,5 @@
SET client_min_messages TO warning;
+
CREATE SCHEMA tm;
CREATE TABLE tm.geoms (id serial, g geometry);
@@ -31,24 +32,39 @@ SELECT st_force4d(g) FROM tm.geoms WHERE id < 15 ORDER BY id;
INSERT INTO tm.geoms(g)
SELECT st_setsrid(g,4326) FROM tm.geoms ORDER BY id;
-COPY tm.geoms TO :tmpfile WITH BINARY;
+-- define temp file path
+\set tmpfile '/tmp/postgis_binary_test.dat'
+
+-- export
+\set command '\\copy tm.geoms TO ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+-- import
CREATE TABLE tm.geoms_in AS SELECT * FROM tm.geoms LIMIT 0;
-COPY tm.geoms_in FROM :tmpfile WITH BINARY;
-SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o WHERE i.id = o.id
- AND ST_OrderingEquals(i.g, o.g);
+\set command '\\copy tm.geoms_in FROM ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+SELECT 'geometry', count(*) FROM tm.geoms_in i, tm.geoms o
+WHERE i.id = o.id AND ST_OrderingEquals(i.g, o.g);
CREATE TABLE tm.geogs AS SELECT id,g::geography FROM tm.geoms
WHERE geometrytype(g) NOT LIKE '%CURVE%'
AND geometrytype(g) NOT LIKE '%CIRCULAR%'
AND geometrytype(g) NOT LIKE '%SURFACE%'
AND geometrytype(g) NOT LIKE 'TRIANGLE%'
- AND geometrytype(g) NOT LIKE 'TIN%'
-;
+ AND geometrytype(g) NOT LIKE 'TIN%';
-COPY tm.geogs TO :tmpfile WITH BINARY;
+-- export
+\set command '\\copy tm.geogs TO ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+-- import
CREATE TABLE tm.geogs_in AS SELECT * FROM tm.geogs LIMIT 0;
-COPY tm.geogs_in FROM :tmpfile WITH BINARY;
-SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o WHERE i.id = o.id
- AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
+\set command '\\copy tm.geogs_in FROM ':tmpfile' WITH (FORMAT BINARY)'
+:command
+
+SELECT 'geometry', count(*) FROM tm.geogs_in i, tm.geogs o
+WHERE i.id = o.id AND ST_OrderingEquals(i.g::geometry, o.g::geometry);
DROP SCHEMA tm CASCADE;
+
diff --git a/regress/core/tests.mk b/regress/core/tests.mk
index 90987df..74fe3f1 100644
--- a/regress/core/tests.mk
+++ b/regress/core/tests.mk
@@ -16,14 +16,13 @@ POSTGIS_PGSQL_VERSION=170
POSTGIS_GEOS_VERSION=31101
HAVE_JSON=yes
HAVE_SPGIST=yes
-INTERRUPTTESTS=yes
+INTERRUPTTESTS=no
current_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
RUNTESTFLAGS_INTERNAL += \
--before-upgrade-script $(top_srcdir)/regress/hooks/hook-before-upgrade.sql \
--after-upgrade-script $(top_srcdir)/regress/hooks/hook-after-upgrade.sql \
- --after-create-script $(top_srcdir)/regress/hooks/hook-after-create.sql \
--before-uninstall-script $(top_srcdir)/regress/hooks/hook-before-uninstall.sql
TESTS += \
@@ -40,7 +39,6 @@ TESTS += \
$(top_srcdir)/regress/core/dumppoints \
$(top_srcdir)/regress/core/dumpsegments \
$(top_srcdir)/regress/core/empty \
- $(top_srcdir)/regress/core/estimatedextent \
$(top_srcdir)/regress/core/forcecurve \
$(top_srcdir)/regress/core/flatgeobuf \
$(top_srcdir)/regress/core/frechet \
@@ -60,7 +58,6 @@ TESTS += \
$(top_srcdir)/regress/core/out_marc21 \
$(top_srcdir)/regress/core/in_encodedpolyline \
$(top_srcdir)/regress/core/iscollection \
- $(top_srcdir)/regress/core/legacy \
$(top_srcdir)/regress/core/letters \
$(top_srcdir)/regress/core/lwgeom_regress \
$(top_srcdir)/regress/core/measures \
@@ -119,7 +116,6 @@ TESTS += \
$(top_srcdir)/regress/core/temporal_knn \
$(top_srcdir)/regress/core/tickets \
$(top_srcdir)/regress/core/twkb \
- $(top_srcdir)/regress/core/typmod \
$(top_srcdir)/regress/core/wkb \
$(top_srcdir)/regress/core/wkt \
$(top_srcdir)/regress/core/wmsservers \
diff --git a/regress/loader/tests.mk b/regress/loader/tests.mk
index ac4f8ad..4bad4fc 100644
--- a/regress/loader/tests.mk
+++ b/regress/loader/tests.mk
@@ -38,7 +38,5 @@ TESTS += \
$(top_srcdir)/regress/loader/Latin1 \
$(top_srcdir)/regress/loader/Latin1-implicit \
$(top_srcdir)/regress/loader/mfile \
- $(top_srcdir)/regress/loader/TestSkipANALYZE \
- $(top_srcdir)/regress/loader/TestANALYZE \
$(top_srcdir)/regress/loader/CharNoWidth \
diff --git a/regress/run_test.pl b/regress/run_test.pl
index cac4b2e..4c7c82b 100755
--- a/regress/run_test.pl
+++ b/regress/run_test.pl
@@ -238,7 +238,6 @@ $ENV{"LANG"} = "C";
# Add locale info to the psql options
# Add pg12 precision suppression
my $PGOPTIONS = $ENV{"PGOPTIONS"};
-$PGOPTIONS .= " -c lc_messages=C";
$PGOPTIONS .= " -c client_min_messages=NOTICE";
$PGOPTIONS .= " -c extra_float_digits=0";
$ENV{"PGOPTIONS"} = $PGOPTIONS;
diff --git a/topology/test/tests.mk b/topology/test/tests.mk
index cbe2633..2c7c18f 100644
--- a/topology/test/tests.mk
+++ b/topology/test/tests.mk
@@ -46,9 +46,7 @@ TESTS += \
$(top_srcdir)/topology/test/regress/legacy_query.sql \
$(top_srcdir)/topology/test/regress/legacy_validate.sql \
$(top_srcdir)/topology/test/regress/polygonize.sql \
- $(top_srcdir)/topology/test/regress/populate_topology_layer.sql \
$(top_srcdir)/topology/test/regress/removeunusedprimitives.sql \
- $(top_srcdir)/topology/test/regress/renametopogeometrycolumn.sql \
$(top_srcdir)/topology/test/regress/renametopology.sql \
$(top_srcdir)/topology/test/regress/share_sequences.sql \
$(top_srcdir)/topology/test/regress/sqlmm.sql \

File diff suppressed because one or more lines are too long

View File

@@ -1,17 +0,0 @@
#!/bin/bash
set -ex
cd "$(dirname "${0}")"
dropdb --if-exist contrib_regression
createdb contrib_regression
psql -d contrib_regression -c "ALTER DATABASE contrib_regression SET TimeZone='UTC'" \
-c "ALTER DATABASE contrib_regression SET DateStyle='ISO, MDY'" \
-c "CREATE EXTENSION postgis SCHEMA public" \
-c "CREATE EXTENSION postgis_topology" \
-c "CREATE EXTENSION postgis_tiger_geocoder CASCADE" \
-c "CREATE EXTENSION postgis_raster SCHEMA public" \
-c "CREATE EXTENSION postgis_sfcgal SCHEMA public"
patch -p1 <"postgis-common-${PG_VERSION}.patch"
patch -p1 <"postgis-regular-${PG_VERSION}.patch"
psql -d contrib_regression -f raster_outdb_template.sql
trap 'patch -R -p1 <postgis-regular-${PG_VERSION}.patch && patch -R -p1 <"postgis-common-${PG_VERSION}.patch"' EXIT
POSTGIS_REGRESS_DB=contrib_regression RUNTESTFLAGS=--nocreate make installcheck-base

View File

@@ -11,6 +11,5 @@ PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
installcheck:
dropdb --if-exists contrib_regression
createdb contrib_regression
../alter_db.sh
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag_bge_small_en_v15"
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -11,6 +11,5 @@ PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
installcheck:
dropdb --if-exists contrib_regression
createdb contrib_regression
../alter_db.sh
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag_jina_reranker_v1_tiny_en"
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -3,6 +3,5 @@ set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
. ../alter_db.sh
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression rum rum_hash ruminv timestamp orderby orderby_hash altorder altorder_hash limits int2 int4 int8 float4 float8 money oid time timetz date interval macaddr inet cidr text varchar char bytea bit varbit numeric rum_weight expr array

View File

@@ -63,9 +63,5 @@ done
for d in ${FAILED}; do
cat "$(find $d -name regression.diffs)"
done
for postgis_diff in /tmp/pgis_reg/*_diff; do
echo "${postgis_diff}:"
cat "${postgis_diff}"
done
echo "${FAILED}"
exit 1

View File

@@ -82,8 +82,7 @@ EXTENSIONS='[
{"extname": "pg_ivm", "extdir": "pg_ivm-src"},
{"extname": "pgjwt", "extdir": "pgjwt-src"},
{"extname": "pgtap", "extdir": "pgtap-src"},
{"extname": "pg_repack", "extdir": "pg_repack-src"},
{"extname": "h3", "extdir": "h3-pg-src"}
{"extname": "pg_repack", "extdir": "pg_repack-src"}
]'
EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d

View File

@@ -1,396 +0,0 @@
# Memo: Endpoint Persistent Unlogged Files Storage
Created on 2024-11-05
Implemented on N/A
## Summary
A design for a storage system that allows storage of files required to make
Neon's Endpoints have a better experience at or after a reboot.
## Motivation
Several systems inside PostgreSQL (and Neon) need some persistent storage for
optimal workings across reboots and restarts, but still work without.
Examples are the query-level statistics files of `pg_stat_statements` in
`pg_stat/pg_stat_statements.stat`, and `pg_prewarm`'s `autoprewarm.blocks`.
We need a storage system that can store and manage these files for each
Endpoint, without necessarily granting users access to an unlimited storage
device.
## Goals
- Store known files for Endpoints with reasonable persistence.
_Data loss in this service, while annoying and bad for UX, won't lose any
customer's data._
## Non Goals (if relevant)
- This storage system does not need branching, file versioning, or other such
features. The files are as ephemeral to the timeline of the data as the
Endpoints that host the data.
- This storage system does not need to store _all_ user files, only 'known'
user files.
- This storage system does not need to be hosted fully inside Computes.
_Instead, this will be a separate component similar to Pageserver,
SafeKeeper, the S3 proxy used for dynamically loaded extensions, etc._
## Impacted components
- Compute needs new code to load and store these files in its lifetime.
- Control Plane needs to consider this new storage system when signalling
the deletion of an Endpoint, Timeline, or Tenant.
- Control Plane needs to consider this new storage system when it resets
or re-assigns an endpoint's timeline/branch state.
A new service is created: the Endpoint Persistent Unlogged Files Storage
service. This could be integrated in e.g. Pageserver or Control Plane, or a
separately hosted service.
## Proposed implementation
Endpoint-related data files are managed by a newly designed service (which
optionally is integrated in an existing service like Pageserver or Control
Plane), which stores data directly into S3 or any blob storage of choice.
Upon deletion of the Endpoint, or reassignment of the endpoint to a different
branch, this ephemeral data is dropped: the data stored may not match the
state of the branch's data after reassignment, and on endpoint deletion the
data won't have any use to the user.
Compute gets credentials (JWT token with Tenant, Timeline & Endpoint claims)
which it can use to authenticate to this new service and retrieve and store
data associated with this endpoint. This limited scope reduces leaks of data
across endpoints and timeline resets, and limits the ability of endpoints to
mess with other endpoints' data.
The path of this endpoint data in S3 is initially as follows:
s3://<regional-epufs-bucket>/
tenants/
<hex-tenant-id>/
tenants/
<hex-timeline-id>/
endpoints/
<endpoint-id>/
pgdata/
<file_path_in_pgdatadir>
For other blob storages an equivalent or similar path can be constructed.
### Reliability, failure modes and corner cases (if relevant)
Reliability is important, but not critical to the workings of Neon. The data
stored in this service will, when lost, reduce performance, but won't be a
cause of permanent data loss - only operational metadata is stored.
Most, if not all, blob storage services have sufficiently high persistence
guarantees to cater our need for persistence and uptime. The only concern with
blob storages is that the access latency is generally higher than local disk,
but for the object types stored (cache state, ...) I don't think this will be
much of an issue.
### Interaction/Sequence diagram (if relevant)
In these diagrams you can replace S3 with any persistent storage device of
choice, but S3 is chosen as representative name: The well-known and short name
of AWS' blob storage. Azure Blob Storage should work too, but it has a much
longer name making it less practical for the diagrams.
Write data:
```http
POST /tenants/<tenant-id>/timelines/<tl-id>/endpoints/<endpoint-id>/pgdata/<the-pgdata-path>
Host: epufs.svc.neon.local
<<<
200 OK
{
"version": "<opaque>", # opaque file version token, changes when the file contents change
"size": <bytes>,
}
```
```mermaid
sequenceDiagram
autonumber
participant co as Compute
participant ep as EPUFS
participant s3 as Blob Storage
co-->ep: Connect with credentials
co->>+ep: Store Unlogged Persistent File
opt is authenticated
ep->>s3: Write UPF to S3
end
ep->>-co: OK / Failure / Auth Failure
co-->ep: Cancel connection
```
Read data: (optional with cache-relevant request parameters, e.g. If-Modified-Since)
```http
GET /tenants/<tenant-id>/timelines/<tl-id>/endpoints/<endpoint-id>/pgdata/<the-pgdata-path>
Host: epufs.svc.neon.local
<<<
200 OK
<file data>
```
```mermaid
sequenceDiagram
autonumber
participant co as Compute
participant ep as EPUFS
participant s3 as Blob Storage
co->>+ep: Read Unlogged Persistent File
opt is authenticated
ep->>+s3: Request UPF from storage
s3->>-ep: Receive UPF from storage
end
ep->>-co: OK(response) / Failure(storage, auth, ...)
```
Compute Startup:
```mermaid
sequenceDiagram
autonumber
participant co as Compute
participant ps as Pageserver
participant ep as EPUFS
participant es as Extension server
note over co: Bind endpoint ep-xxx
par Get basebackup
co->>+ps: Request basebackup @ LSN
ps-)ps: Construct basebackup
ps->>-co: Receive basebackup TAR @ LSN
and Get startup-critical Unlogged Persistent Files
co->>+ep: Get all UPFs of endpoint ep-xxx
ep-)ep: Retrieve and gather all UPFs
ep->>-co: TAR of UPFs
and Get startup-critical extensions
loop For every startup-critical extension
co->>es: Get critical extension
es->>co: Receive critical extension
end
end
note over co: Start compute
```
CPlane ops:
```http
DELETE /tenants/<tenant-id>/timelines/<timeline-id>/endpoints/<endpoint-id>
Host: epufs.svc.neon.local
<<<
200 OK
{
"tenant": "<tenant-id>",
"timeline": "<timeline-id>",
"endpoint": "<endpoint-id>",
"deleted": {
"files": <count>,
"bytes": <count>,
},
}
```
```http
DELETE /tenants/<tenant-id>/timelines/<timeline-id>
Host: epufs.svc.neon.local
<<<
200 OK
{
"tenant": "<tenant-id>",
"timeline": "<timeline-id>",
"deleted": {
"files": <count>,
"bytes": <count>,
},
}
```
```http
DELETE /tenants/<tenant-id>
Host: epufs.svc.neon.local
<<<
200 OK
{
"tenant": "<tenant-id>",
"deleted": {
"files": <count>,
"bytes": <count>,
},
}
```
```mermaid
sequenceDiagram
autonumber
participant cp as Control Plane
participant ep as EPUFS
participant s3 as Blob Storage
alt Tenant deleted
cp-)ep: Tenant deleted
loop For every object associated with removed tenant
ep->>s3: Remove data of deleted tenant from Storage
end
opt
ep-)cp: Tenant cleanup complete
end
alt Timeline deleted
cp-)ep: Timeline deleted
loop For every object associated with removed timeline
ep->>s3: Remove data of deleted timeline from Storage
end
opt
ep-)cp: Timeline cleanup complete
end
else Endpoint reassigned or removed
cp->>+ep: Endpoint reassigned
loop For every object associated with reassigned/removed endpoint
ep->>s3: Remove data from Storage
end
ep->>-cp: Cleanup complete
end
```
### Scalability (if relevant)
Provisionally: As this service is going to be part of compute startup, this
service should be able to quickly respond to all requests. Therefore this
service is deployed to every AZ we host Computes in, and Computes communicate
(generally) only to the EPUFS endpoint of the AZ they're hosted in.
Local caching of frequently restarted endpoints' data or metadata may be
needed for best performance. However, due to the regional nature of stored
data but zonal nature of the service deployment, we should be careful when we
implement any local caching, as it is possible that computes in AZ 1 will
update data originally written and thus cached by AZ 2. Cache version tests
and invalidation is therefore required if we want to roll out caching to this
service, which is too broad a scope for an MVC. This is why caching is left
out of scope for this RFC, and should be considered separately after this RFC
is implemented.
### Security implications (if relevant)
This service must be able to authenticate users at least by Tenant ID,
Timeline ID and Endpoint ID. This will use the existing JWT infrastructure of
Compute, which will be upgraded to the extent needed to support Timeline- and
Endpoint-based claims.
The service requires unlimited access to (a prefix of) a blob storage bucket,
and thus must be hosted outside the Compute VM sandbox.
A service that generates pre-signed request URLs for Compute to download the
data from that URL is likely problematic, too: Compute would be able to write
unlimited data to the bucket, or exfiltrate this signed URL to get read/write
access to specific objects in this bucket, which would still effectively give
users access to the S3 bucket (but with improved access logging).
There may be a use case for transferring data associated with one endpoint to
another endpoint (e.g. to make one endpoint warm its caches with the state of
another endpoint), but that's not currently in scope, and specific needs may
be solved through out-of-line communication of data or pre-signed URLs.
### Unresolved questions (if relevant)
Caching of files is not in the implementation scope of the document, but
should at some future point be considered to maximize performance.
## Alternative implementation (if relevant)
Several ideas have come up to solve this issue:
### Use AUXfile
One prevalent idea was to WAL-log the files using our AUXfile mechanism.
Benefits:
+ We already have this storage mechanism
Demerits:
- It isn't available on read replicas
- Additional WAL will be consumed during shutdown and after the shutdown
checkpoint, which needs PG modifications to work without panics.
- It increases the data we need to manage in our versioned storage, thus
causing higher storage costs with higher retention due to duplication at
the storage layer.
### Sign URLs for read/write operations, instead of proxying them
Benefits:
+ The service can be implemented with a much reduced IO budget
Demerits:
- Users could get access to these signed credentials
- Not all blob storage services may implement URL signing
### Give endpoints each their own directly accessed block volume
Benefits:
+ Easier to integrate for PostgreSQL
Demerits:
- Little control on data size and contents
- Potentially problematic as we'd need to store data all across the pgdata
directory.
- EBS is not a good candidate
- Attaches in 10s of seconds, if not more; i.e. too cold to start
- Shared EBS volumes are a no-go, as you'd have to schedule the endpoint
with users of the same EBS volumes, which can't work with VM migration
- EBS storage costs are very high (>80$/kilotenant when using a
volume/tenant)
- EBS volumes can't be mounted across AZ boundaries
- Bucket per endpoint is unfeasible
- S3 buckets are priced at $20/month per 1k, which we could better spend
on developers.
- Allocating service accounts takes time (100s of ms), and service accounts
are a limited resource, too; so they're not a good candidate to allocate
on a per-endpoint basis.
- Giving credentials limited to prefix has similar issues as the pre-signed
URL approach.
- Bucket DNS lookup will fill DNS caches and put pressure on DNS lookup
much more than our current systems would.
- Volumes bound by hypervisor are unlikely
- This requires significant investment and increased software on the
hypervisor.
- It is unclear if we can attach volumes after boot, i.e. for pooled
instances.
### Put the files into a table
Benefits:
+ Mostly already available in PostgreSQL
Demerits:
- Uses WAL
- Can't be used after shutdown checkpoint
- Needs a RW endpoint, and table & catalog access to write to this data
- Gets hit with DB size limitations
- Depending on user acces:
- Inaccessible:
The user doesn't have control over database size caused by
these systems.
- Accessible:
The user can corrupt these files and cause the system to crash while
user-corrupted files are present, thus increasing on-call overhead.
## Definition of Done (if relevant)
This project is done if we have:
- One S3 bucket equivalent per region, which stores this per-endpoint data.
- A new service endpoint in at least every AZ, which indirectly grants
endpoints access to the data stored for these endpoints in these buckets.
- Compute writes & reads temp-data at shutdown and startup, respectively, for
at least the pg_prewarm or lfc_prewarm state files.
- Cleanup of endpoint data is triggered when the endpoint is deleted or is
detached from its current timeline.

View File

@@ -1,194 +0,0 @@
# Bottommost Garbage-Collection Compaction
## Summary
The goal of this doc is to propose a way to reliably collect garbages below the GC horizon. This process is called bottom-most garbage-collect-compaction, and is part of the broader legacy-enhanced compaction that we plan to implement in the future.
## Motivation
The current GC algorithm will wait until the covering via image layers before collecting the garbages of a key region. Relying on image layer generation to generate covering images is not reliable. There are prior arts to generate feedbacks from the GC algorithm to the image generation process to accelerate garbage collection, but it slows down the system and creates write amplification.
# Basic Idea
![](images/036-bottom-most-gc-compaction/01-basic-idea.svg)
The idea of bottom-most compaction is simple: we rewrite all layers that are below or intersect with the GC horizon to produce a flat level of image layers at the GC horizon and deltas above the GC horizon. In this process,
- All images and deltas ≤ GC horizon LSN will be dropped. This process collects garbages.
- We produce images for all keys involved in the compaction process at the GC horizon.
Therefore, it can precisely collect all garbages below the horizon, and reduce the space amplification, i.e., in the staircase pattern (test_gc_feedback).
![The staircase pattern in test_gc_feedback in the original compaction algorithm. The goal is to collect garbage below the red horizontal line.](images/036-bottom-most-gc-compaction/12-staircase-test-gc-feedback.png)
The staircase pattern in test_gc_feedback in the original compaction algorithm. The goal is to collect garbage below the red horizontal line.
# Branches
With branches, the bottom-most compaction should retain a snapshot of the keyspace at the `retain_lsn` so that the child branch can access data at the branch point. This requires some modifications to the basic bottom-most compaction algorithm that we sketched above.
![](images/036-bottom-most-gc-compaction/03-retain-lsn.svg)
## Single Timeline w/ Snapshots: handle `retain_lsn`
First lets look into the case where we create branches over the main branch but dont write any data to them (aka “snapshots”).
The bottom-most compaction algorithm collects all deltas and images of a key and can make decisions on what data to retain. Given that we have a single keys history as below:
```
LSN 0x10 -> A
LSN 0x20 -> append B
retain_lsn: 0x20
LSN 0x30 -> append C
LSN 0x40 -> append D
retain_lsn: 0x40
LSN 0x50 -> append E
GC horizon: 0x50
LSN 0x60 -> append F
```
The algorithm will produce:
```
LSN 0x20 -> AB
(drop all history below the earliest retain_lsn)
LSN 0x40 -> ABCD
(assume the cost of replaying 2 deltas is higher than storing the full image, we generate an image here)
LSN 0x50 -> append E
(replay one delta is cheap)
LSN 0x60 -> append F
(keep everything as-is above the GC horizon)
```
![](images/036-bottom-most-gc-compaction/05-btmgc-parent.svg)
What happens is that we balance the space taken by each retain_lsn and the cost of replaying deltas during the bottom-most compaction process. This is controlled by a threshold. If `count(deltas) < $threshold`, the deltas will be retained. Otherwise, an image will be generated and the deltas will be dropped.
In the example above, the `$threshold` is 2.
## Child Branches with data: pull + partial images
In the previous section we have shown how bottom-most compaction respects `retain_lsn` so that all data that was readable at branch creation remains readable. But branches can have data on their own, and that data can fall out of the branchs PITR window. So, this section explains how we deal with that.
We will run the same bottom-most compaction for these branches, to ensure the space amplification on the child branch is reasonable.
```
branch_lsn: 0x20
LSN 0x30 -> append P
LSN 0x40 -> append Q
LSN 0x50 -> append R
GC horizon: 0x50
LSN 0x60 -> append S
```
Note that bottom-most compaction happens on a per-timeline basis. When it processes this key, it only reads the history from LSN 0x30 without a base image. Therefore, on child branches, the bottom-most compaction process will make image creation decisions based on the same `count(deltas) < $threshold` criteria, and if it decides to create an image, the base image will be retrieved from the ancestor branch.
```
branch_lsn: 0x20
LSN 0x50 -> ABPQR
(we pull the image at LSN 0x20 from the ancestor branch to get AB, and then apply append PQ to the page; we replace the record at 0x40 with an image and drop the delta)
GC horizon: 0x50
LSN 0x60 -> append S
```
![](images/036-bottom-most-gc-compaction/06-btmgc-child.svg)
Note that for child branches, we do not create image layers for the images when bottom-most compaction runs. Instead, we drop the 0x30/0x40/0x50 delta records and directly place the image ABPQR@0x50 into the delta layer, which serves as a sparse image layer. For child branches, if we create image layers, we will need to put all keys in the range into the image layer. This causes space bloat and slow compactions. In this proposal, the compaction process will only compact and process keys modified inside the child branch.
# Result
Bottom-most compaction ensures all garbage under the GC horizon gets collected right away (compared with “eventually” in the current algorithm). Meanwhile, it generates images at each of the retain_lsn to ensure branch reads are fast. As we make per-key decisions on whether to generate an image or not, the theoretical lower bound of the storage space we need to retain for a branch is lower than before.
Before: min(sum(logs for each key), sum(image for each key)), for each partition — we always generate image layers on a key range
After: sum(min(logs for each key, image for each key))
# Compaction Trigger
The bottom-most compaction can be automatically triggered. The goal of the trigger is that it should ensure a constant factor for write amplification. Say that the user write 1GB of WAL into the system, we should write 1GB x C data to S3. The legacy compaction algorithm does not have such a constant factor C. The data we write to S3 is quadratic to the logical size of the database (see [A Theoretical View of Neon Storage](https://www.notion.so/A-Theoretical-View-of-Neon-Storage-8d7ad7555b0c41b2a3597fa780911194?pvs=21)).
We propose the following compaction trigger that generates a constant write amplification factor. Write amplification >= total writes to S3 / total user writes. We only analyze the write amplification caused by the bottom-most GC-compaction process, ignoring the legacy create image layers amplification.
Given that we have ***X*** bytes of the delta layers above the GC horizon, ***A*** bytes of the delta layers intersecting with the GC horizon, ***B*** bytes of the delta layers below the GC horizon, and ***C*** bytes of the image layers below the GC horizon.
The legacy GC + compaction loop will always keep ***A*** unchanged, reduce ***B and C*** when there are image layers covering the key range. This yields 0 write amplification (only file deletions) and extra ***B*** bytes of space.
![](images/036-bottom-most-gc-compaction/09-btmgc-analysis-2.svg)
The bottom-most compaction proposed here will split ***A*** into deltas above the GC horizon and below the GC horizon. Everything below the GC horizon will be image layers after the compaction (not considering branches). Therefore, this yields ***A+C*** extra write traffic each iteration, plus 0 extra space.
![](images/036-bottom-most-gc-compaction/07-btmgc-analysis-1.svg)
Also considering read amplification (below the GC horizon). When a read request reaches the GC horizon, the read amplification will be (A+B+C)/C=1+(A+B)/C. Reducing ***A*** and ***B*** can help reduce the read amplification below the GC horizon.
The metrics-based trigger will wait until a point that space amplification is not that large and write amplification is not that large before the compaction gets triggered. The trigger is defined as **(A+B)/C ≥ 1 (or some other ratio)**.
To reason about this trigger, consider the two cases:
**Data Ingestion**
User keeps ingesting data into the database, which indicates that WAL size roughly equals to the database logical size. The compaction gets triggered only when the newly-written WAL roughly equals to the current bottom-most image size (=X). Therefore, its triggered when the database size gets doubled. This is a reasonable amount of work. Write amplification is 2X/X=1 for the X amount of data written.
![](images/036-bottom-most-gc-compaction/10-btmgc-analysis-3.svg)
**Updates/Deletion**
In this case, WAL size will be larger than the database logical size ***D***. The compaction gets triggered for every ***D*** bytes of WAL written. Therefore, for every ***D*** bytes of WAL, we rewrite the bottom-most layer, which produces an extra ***D*** bytes of write amplification. This incurs exactly 2x write amplification (by the write of D), 1.5x write amplification (if we count from the start of the process) and no space amplification.
![](images/036-bottom-most-gc-compaction/11-btmgc-analysis-4.svg)
Note that here I try to reason that write amplification is a constant (i.e., the data we write to S3 is proportional to the data the user write). The main problem with the current legacy compaction algorithm is that write amplification is proportional to the database size.
The next step is to optimize the write amplification above the GC horizon (i.e., change the image creation criteria, top-most compaction, or introduce tiered compaction), to ensure the write amplification of the whole system is a constant factor.
20GB layers → +20GB layers → delete 20GB, need 40GB temporary space
# Sub-Compactions
The gc-compaction algorithm may take a long time and we need to split the job into multiple sub-compaction jobs.
![](images/036-bottom-most-gc-compaction/13-job-split.svg)
As in the figure, the auto-trigger schedules a compaction job covering the full keyspace below a specific LSN. In such case that we cannot finish compacting it in one run in a reasonable amount of time, the algorithm will vertically split it into multiple jobs (in this case, 5).
Each gc-compaction job will create one level of delta layers and one flat level of image layers for each LSN. Those layers will be automatically split based on size, which means that if the sub-compaction job produces 1GB of deltas, it will produce 4 * 256MB delta layers. For those layers that is not fully contained within the sub-compaction job rectangles, it will be rewritten to only contain the keys outside of the key range.
# Implementation
The main implementation of gc-compaction is in `compaction.rs`.
* `compact_with_gc`: The main loop of gc-compaction. It takes a rectangle range of the layer map and compact that specific range. It selects layers intersecting with the rectangle, downloads the layers, creates the k-merge iterator to read those layers in the key-lsn order, and decide which keys to keep or insert a reconstructed page. The process is the basic unit of a gc-compaction and is not interruptable. If the process gets preempted by L0 compaction, it has to be restarted from scratch. For layers overlaps with the rectangle but not fully inside, the main loop will also rewrite them so that the new layer (or two layers if both left and right ends are outside of the rectangle) has the same LSN range as the original one but only contain the keys outside of the compaction range.
* `gc_compaction_split_jobs`: Splits a big gc-compaction job into sub-compactions based on heuristics in the layer map. The function looks at the layer map and splits the compaction job based on the size of the layers so that each compaction job only pulls ~4GB of layer files.
* `generate_key_retention` and `KeyHistoryRetention`: Implements the algorithm described in the "basic idea" and "branch" chapter of this RFC. It takes a vector of history of a key (key-lsn-value) and decides which LSNs of the key to retain. If there are too many deltas between two retain_lsns, it will reconstruct the page and insert an image into the compaction result. Also, we implement `KeyHistoryRetention::verify` to ensure the generated result is not corrupted -- all retain_lsns and all LSNs above the gc-horizon should be accessible.
* `GcCompactionQueue`: the automatic trigger implementation for gc-compaction. `GcCompactionQueue::iteration` is called at the end of the tenant compaction loop. It will then call `trigger_auto_compaction` to decide whether to trigger a gc-compaction job for this tenant. If yes, the compaction-job will be added to the compaction queue, and the queue will be slowly drained once there are no other compaction jobs running. gc-compaction has the lowest priority. If a sub-compaction job is not successful or gets preempted by L0 compaction (see limitations for reasons why a compaction job would fail), it will _not_ be retried.
* Changes to `index_part.json`: we added a `last_completed_lsn` field to the index part for the auto-trigger to decide when to trigger a compaction.
* Changes to the read path: when gc-compaction updates the layer map, all reads need to wait. See `gc_compaction_layer_update_lock` and comments in the code path for more information.
Gc-compaction can also be scheduled over the HTTP API. Example:
```
curl 'localhost:9898/v1/tenant/:tenant_id/timeline/:timeline_id/compact?enhanced_gc_bottom_most_compaction=true&dry_run=true' -X PUT -H "Content-Type: application/json" -d '{"scheduled": true, "compact_key_range": { "start": "000000067F0000A0000002A1CF0100000000", "end": "000000067F0000A0000002A1D70100000000" } }'
```
The `dry_run` mode can be specified in the query string so that the compaction will go through all layers to estimate how much space can be saved without writing the compaction result into the layer map.
The auto-trigger is controlled by tenant-level flag `gc_compaction_enabled`. If this is set to false, no gc-compaction will be automatically scheduled on this tenant (but manual trigger still works).
# Next Steps
There are still some limitations of gc-compaction itself that needs to be resolved and tested,
- gc-compaction is currently only automatically triggered on root branches. We have not tested gc-compaction on child branches in staging.
- gc-compaction will skip aux key regions because of the possible conflict with the assumption of aux file tombstones.
- gc-compaction does not consider keyspaces at retain_lsns and only look at keys in the layers. This also causes us giving up some sub-compaction jobs because a key might have part of its history available due to traditional GC removing part of the history.
- We limit gc-compaction to run over shards <= 150GB to avoid gc-compaction taking too much time blocking other compaction jobs. The sub-compaction split algorithm needs to be improved to be able to split vertically and horizontally. Also, we need to move the download layer process out of the compaction loop so that we don't block other compaction jobs for too long.
- The compaction trigger always schedules gc-compaction from the lowest LSN to the gc-horizon. Currently we do not schedule compaction jobs that only selects layers in the middle. Allowing this could potentially reduce the number of layers read/write throughout the process.
- gc-compaction will give up if there are too many layers to rewrite or if there are not enough disk space for the compaction.
- gc-compaction sometimes fails with "no key produced during compaction", which means that all existing keys within the compaction range can be collected; but we don't have a way to write this information back to the layer map -- we cannot generate an empty image layer.
- We limit the maximum size of deltas for a single key to 512MB. If above this size, gc-compaction will give up. This can be resolved by changing `generate_key_retention` to be a stream instead of requiring to collect all the key history.
In the future,
- Top-most compaction: ensure we always have an image coverage for the latest data (or near the latest data), so that reads will be fast at the latest LSN.
- Tiered compaction on deltas: ensure read from any LSN is fast.
- Per-timeline compaction → tenant-wide compaction?

Some files were not shown because too many files have changed in this diff Show More