mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-21 04:12:55 +00:00
Compare commits
131 Commits
diko/baseb
...
skyzh/uplo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
293687de5f | ||
|
|
6c81cf3892 | ||
|
|
8b4fbefc29 | ||
|
|
a9a51c038b | ||
|
|
44121cc175 | ||
|
|
0429a0db16 | ||
|
|
d6beb3ffbb | ||
|
|
efd7e52812 | ||
|
|
0f879a2e8f | ||
|
|
8e7ce42229 | ||
|
|
5ec8881c0b | ||
|
|
b254dce8a1 | ||
|
|
3815e3b2b5 | ||
|
|
bbcd70eab3 | ||
|
|
0934ce9bce | ||
|
|
4932963bac | ||
|
|
6d73cfa608 | ||
|
|
d2d9946bab | ||
|
|
daa402f35a | ||
|
|
5f3532970e | ||
|
|
2e681e0ef8 | ||
|
|
8e216a3a59 | ||
|
|
d0a4ae3e8f | ||
|
|
a384d7d501 | ||
|
|
66f53d9d34 | ||
|
|
2af9380962 | ||
|
|
620d50432c | ||
|
|
1d43f3bee8 | ||
|
|
c746678bbc | ||
|
|
9bb4688c54 | ||
|
|
47553dbaf9 | ||
|
|
e50b914a8e | ||
|
|
e33e109403 | ||
|
|
0ee15002fc | ||
|
|
4c7956fa56 | ||
|
|
5a82182c48 | ||
|
|
37e181af8a | ||
|
|
6f4198c78a | ||
|
|
cc1664ef93 | ||
|
|
ebb6e26a64 | ||
|
|
ebc12a388c | ||
|
|
abc1efd5a6 | ||
|
|
6fa1562b57 | ||
|
|
10afac87e7 | ||
|
|
72b3c9cd11 | ||
|
|
232f2447d4 | ||
|
|
a2d2108e6a | ||
|
|
33c0d5e2f4 | ||
|
|
605fb04f89 | ||
|
|
fd1e8ec257 | ||
|
|
be23eae3b6 | ||
|
|
6f70885e11 | ||
|
|
f755979102 | ||
|
|
1d49eefbbb | ||
|
|
6c77638ea1 | ||
|
|
517a3d0d86 | ||
|
|
27ca1e21be | ||
|
|
1dc01c9bed | ||
|
|
7c4c36f5ac | ||
|
|
a2d623696c | ||
|
|
aa75722010 | ||
|
|
6c6de6382a | ||
|
|
158d84ea30 | ||
|
|
4dd9ca7b04 | ||
|
|
552249607d | ||
|
|
a29772bf6e | ||
|
|
0efff1db26 | ||
|
|
5eecde461d | ||
|
|
85164422d0 | ||
|
|
6c3aba7c44 | ||
|
|
68a175d545 | ||
|
|
5e2c444525 | ||
|
|
8d711229c1 | ||
|
|
0e490f3be7 | ||
|
|
7e41ef1bec | ||
|
|
7916aa26e0 | ||
|
|
52ab8f3e65 | ||
|
|
3d822dbbde | ||
|
|
af46b5286f | ||
|
|
47f7efee06 | ||
|
|
868c38f522 | ||
|
|
c8b2ac93cf | ||
|
|
b2954d16ff | ||
|
|
79485e7c3a | ||
|
|
eaf1ab21c4 | ||
|
|
6508f4e5c1 | ||
|
|
a298d2c29b | ||
|
|
8b197de7ff | ||
|
|
15d079cd41 | ||
|
|
dc1625cd8e | ||
|
|
a6d4de25cd | ||
|
|
ec1452a559 | ||
|
|
1950ccfe33 | ||
|
|
2ca6665f4a | ||
|
|
fa954671b2 | ||
|
|
6f4ffdb48b | ||
|
|
3f676df3d5 | ||
|
|
20f4febce1 | ||
|
|
762905cf8d | ||
|
|
830ef35ed3 | ||
|
|
d8d62fb7cb | ||
|
|
e6a404c66d | ||
|
|
7e711ede44 | ||
|
|
e95f2f9a67 | ||
|
|
5a045e7d52 | ||
|
|
67fbc0582e | ||
|
|
3af6b3a2bf | ||
|
|
04013929cb | ||
|
|
83069f6ca1 | ||
|
|
7d4f662fbf | ||
|
|
a5cac52e26 | ||
|
|
dfa055f4be | ||
|
|
a4c76740c0 | ||
|
|
f2e96b2323 | ||
|
|
dee73f0cb4 | ||
|
|
edf51688bc | ||
|
|
4a8f3508f9 | ||
|
|
48052477b4 | ||
|
|
d81353b2d1 | ||
|
|
143500dc4f | ||
|
|
1a5f7ce6ad | ||
|
|
01ccb34118 | ||
|
|
f669e18477 | ||
|
|
632cde7f13 | ||
|
|
118e13438d | ||
|
|
fc136eec8f | ||
|
|
818e5130f1 | ||
|
|
c243521ae5 | ||
|
|
5303c71589 | ||
|
|
d146897415 | ||
|
|
d63815fa40 |
@@ -4,6 +4,7 @@
|
||||
!Cargo.lock
|
||||
!Cargo.toml
|
||||
!Makefile
|
||||
!postgres.mk
|
||||
!rust-toolchain.toml
|
||||
!scripts/ninstall.sh
|
||||
!docker-compose/run-tests.sh
|
||||
|
||||
78
.github/workflows/_build-and-test-locally.yml
vendored
78
.github/workflows/_build-and-test-locally.yml
vendored
@@ -38,6 +38,11 @@ on:
|
||||
required: false
|
||||
default: 1
|
||||
type: number
|
||||
rerun-failed:
|
||||
description: 'rerun failed tests to ignore flaky tests'
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -99,11 +104,10 @@ jobs:
|
||||
|
||||
# Set some environment variables used by all the steps.
|
||||
#
|
||||
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
|
||||
# It also includes --features, if any
|
||||
# CARGO_FLAGS is extra options to pass to all "cargo" subcommands.
|
||||
#
|
||||
# CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
|
||||
# because "cargo metadata" doesn't accept --release or --debug options
|
||||
# CARGO_PROFILE is passed to "cargo build", "cargo test" etc, but not to
|
||||
# "cargo metadata", because it doesn't accept --release or --debug options.
|
||||
#
|
||||
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
|
||||
# corresponding Cargo.toml files for their descriptions.
|
||||
@@ -112,16 +116,16 @@ jobs:
|
||||
ARCH: ${{ inputs.arch }}
|
||||
SANITIZERS: ${{ inputs.sanitizers }}
|
||||
run: |
|
||||
CARGO_FEATURES="--features testing"
|
||||
CARGO_FLAGS="--locked --features testing"
|
||||
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
|
||||
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
|
||||
CARGO_FLAGS="--locked"
|
||||
CARGO_PROFILE=""
|
||||
elif [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=""
|
||||
CARGO_FLAGS="--locked"
|
||||
CARGO_PROFILE=""
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=""
|
||||
CARGO_FLAGS="--locked --release"
|
||||
CARGO_PROFILE="--release"
|
||||
fi
|
||||
if [[ $SANITIZERS == 'enabled' ]]; then
|
||||
make_vars="WITH_SANITIZERS=yes"
|
||||
@@ -131,8 +135,8 @@ jobs:
|
||||
{
|
||||
echo "cov_prefix=${cov_prefix}"
|
||||
echo "make_vars=${make_vars}"
|
||||
echo "CARGO_FEATURES=${CARGO_FEATURES}"
|
||||
echo "CARGO_FLAGS=${CARGO_FLAGS}"
|
||||
echo "CARGO_PROFILE=${CARGO_PROFILE}"
|
||||
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
|
||||
} >> $GITHUB_ENV
|
||||
|
||||
@@ -184,34 +188,18 @@ jobs:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
run: mold -run make ${make_vars} postgres-v14 -j$(nproc)
|
||||
|
||||
- name: Build postgres v15
|
||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||
run: mold -run make ${make_vars} postgres-v15 -j$(nproc)
|
||||
|
||||
- name: Build postgres v16
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: mold -run make ${make_vars} postgres-v16 -j$(nproc)
|
||||
|
||||
- name: Build postgres v17
|
||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
||||
run: mold -run make ${make_vars} postgres-v17 -j$(nproc)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: mold -run make ${make_vars} neon-pg-ext -j$(nproc)
|
||||
- name: Build all
|
||||
# Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables
|
||||
run: mold -run make ${make_vars} all -j$(nproc) CARGO_BUILD_FLAGS="$CARGO_FLAGS"
|
||||
|
||||
- name: Build walproposer-lib
|
||||
run: mold -run make ${make_vars} walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Run cargo build
|
||||
env:
|
||||
WITH_TESTS: ${{ inputs.sanitizers != 'enabled' && '--tests' || '' }}
|
||||
- name: Build unit tests
|
||||
if: inputs.sanitizers != 'enabled'
|
||||
run: |
|
||||
export ASAN_OPTIONS=detect_leaks=0
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins ${WITH_TESTS}
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_PROFILE --tests
|
||||
|
||||
# Do install *before* running rust tests because they might recompile the
|
||||
# binaries with different features/flags.
|
||||
@@ -223,7 +211,7 @@ jobs:
|
||||
# Install target binaries
|
||||
mkdir -p /tmp/neon/bin/
|
||||
binaries=$(
|
||||
${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
|
||||
${cov_prefix} cargo metadata $CARGO_FLAGS --format-version=1 --no-deps |
|
||||
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
||||
)
|
||||
for bin in $binaries; do
|
||||
@@ -240,7 +228,7 @@ jobs:
|
||||
mkdir -p /tmp/neon/test_bin/
|
||||
|
||||
test_exe_paths=$(
|
||||
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
|
||||
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_PROFILE --message-format=json --no-run |
|
||||
jq -r '.executable | select(. != null)'
|
||||
)
|
||||
for bin in $test_exe_paths; do
|
||||
@@ -274,10 +262,10 @@ jobs:
|
||||
export LD_LIBRARY_PATH
|
||||
|
||||
#nextest does not yet support running doctests
|
||||
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
||||
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_PROFILE
|
||||
|
||||
# run all non-pageserver tests
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E '!package(pageserver)'
|
||||
|
||||
# run pageserver tests
|
||||
# (When developing new pageserver features gated by config fields, we commonly make the rust
|
||||
@@ -286,13 +274,13 @@ jobs:
|
||||
# pageserver tests from non-pageserver tests cuts down the time it takes for this CI step.)
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=tokio-epoll-uring \
|
||||
${cov_prefix} \
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(pageserver)'
|
||||
|
||||
# Run separate tests for real S3
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
@@ -301,17 +289,17 @@ jobs:
|
||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||
|
||||
- name: Install postgres binaries
|
||||
run: |
|
||||
# Use tar to copy files matching the pattern, preserving the paths in the destionation
|
||||
tar c \
|
||||
pg_install/v* \
|
||||
pg_install/build/*/src/test/regress/*.so \
|
||||
pg_install/build/*/src/test/regress/pg_regress \
|
||||
pg_install/build/*/src/test/isolation/isolationtester \
|
||||
pg_install/build/*/src/test/isolation/pg_isolation_regress \
|
||||
build/*/src/test/regress/*.so \
|
||||
build/*/src/test/regress/pg_regress \
|
||||
build/*/src/test/isolation/isolationtester \
|
||||
build/*/src/test/isolation/pg_isolation_regress \
|
||||
| tar x -C /tmp/neon
|
||||
|
||||
- name: Upload Neon artifact
|
||||
@@ -379,7 +367,7 @@ jobs:
|
||||
- name: Pytest regression tests
|
||||
continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 75 || 180 }}
|
||||
timeout-minutes: ${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 75 || 180 }}
|
||||
with:
|
||||
build_type: ${{ inputs.build-type }}
|
||||
test_selection: regress
|
||||
@@ -387,14 +375,14 @@ jobs:
|
||||
run_with_real_s3: true
|
||||
real_s3_bucket: neon-github-ci-tests
|
||||
real_s3_region: eu-central-1
|
||||
rerun_failed: ${{ inputs.test-run-count == 1 }}
|
||||
rerun_failed: ${{ inputs.rerun-failed }}
|
||||
pg_version: ${{ matrix.pg_version }}
|
||||
sanitizers: ${{ inputs.sanitizers }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
# `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
|
||||
# Attempt to stop tests gracefully to generate test reports
|
||||
# until they are forcibly stopped by the stricter `timeout-minutes` limit.
|
||||
extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }} --count=${{ inputs.test-run-count }}
|
||||
extra_params: --session-timeout=${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 3000 || 10200 }} --count=${{ inputs.test-run-count }}
|
||||
${{ inputs.test-selection != '' && format('-k "{0}"', inputs.test-selection) || '' }}
|
||||
env:
|
||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
29
.github/workflows/build-macos.yml
vendored
29
.github/workflows/build-macos.yml
vendored
@@ -94,11 +94,6 @@ jobs:
|
||||
run: |
|
||||
make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Get postgres headers ${{ matrix.postgres-version }}
|
||||
if: steps.cache_pg.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Upload "pg_install/${{ matrix.postgres-version }}" artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
@@ -110,7 +105,7 @@ jobs:
|
||||
|
||||
build-walproposer-lib:
|
||||
if: |
|
||||
inputs.pg_versions != '[]' || inputs.rebuild_everything ||
|
||||
contains(inputs.pg_versions, 'v17') || inputs.rebuild_everything ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
@@ -140,11 +135,17 @@ jobs:
|
||||
name: pg_install--v17
|
||||
path: pg_install/v17
|
||||
|
||||
# `actions/download-artifact` doesn't preserve permissions:
|
||||
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
|
||||
- name: Make pg_install/v*/bin/* executable
|
||||
run: |
|
||||
chmod +x pg_install/v*/bin/*
|
||||
|
||||
- name: Cache walproposer-lib
|
||||
id: cache_walproposer_lib
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: pg_install/build/walproposer-lib
|
||||
path: build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Checkout submodule vendor/postgres-v17
|
||||
@@ -167,13 +168,13 @@ jobs:
|
||||
- name: Build walproposer-lib (only for v17)
|
||||
if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
|
||||
run:
|
||||
make walproposer-lib -j$(sysctl -n hw.ncpu)
|
||||
make walproposer-lib -j$(sysctl -n hw.ncpu) PG_INSTALL_CACHED=1
|
||||
|
||||
- name: Upload "pg_install/build/walproposer-lib" artifact
|
||||
- name: Upload "build/walproposer-lib" artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: pg_install--build--walproposer-lib
|
||||
path: pg_install/build/walproposer-lib
|
||||
name: build--walproposer-lib
|
||||
path: build/walproposer-lib
|
||||
# The artifact is supposed to be used by the next job in the same workflow,
|
||||
# so there’s no need to store it for too long.
|
||||
retention-days: 1
|
||||
@@ -226,11 +227,11 @@ jobs:
|
||||
name: pg_install--v17
|
||||
path: pg_install/v17
|
||||
|
||||
- name: Download "pg_install/build/walproposer-lib" artifact
|
||||
- name: Download "build/walproposer-lib" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: pg_install--build--walproposer-lib
|
||||
path: pg_install/build/walproposer-lib
|
||||
name: build--walproposer-lib
|
||||
path: build/walproposer-lib
|
||||
|
||||
# `actions/download-artifact` doesn't preserve permissions:
|
||||
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
|
||||
|
||||
@@ -58,6 +58,7 @@ jobs:
|
||||
test-cfg: ${{ inputs.pg-versions }}
|
||||
test-selection: ${{ inputs.test-selection }}
|
||||
test-run-count: ${{ fromJson(inputs.run-count) }}
|
||||
rerun-failed: false
|
||||
secrets: inherit
|
||||
|
||||
create-test-report:
|
||||
|
||||
28
.github/workflows/build_and_test.yml
vendored
28
.github/workflows/build_and_test.yml
vendored
@@ -69,7 +69,7 @@ jobs:
|
||||
submodules: true
|
||||
|
||||
- name: Check for file changes
|
||||
uses: step-security/paths-filter@v3
|
||||
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
|
||||
id: files-changed
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
@@ -199,6 +199,28 @@ jobs:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
secrets: inherit
|
||||
|
||||
validate-compute-manifest:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [ meta, check-permissions ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: '24'
|
||||
|
||||
- name: Validate manifest against schema
|
||||
run: |
|
||||
make -C compute manifest-schema-validation
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
@@ -648,7 +670,7 @@ jobs:
|
||||
ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64
|
||||
|
||||
compute-node-image-arch:
|
||||
needs: [ check-permissions, build-build-tools-image, meta ]
|
||||
needs: [ check-permissions, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
@@ -721,7 +743,6 @@ jobs:
|
||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
PG_VERSION=${{ matrix.version.pg }}
|
||||
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
|
||||
DEBIAN_VERSION=${{ matrix.version.debian }}
|
||||
provenance: false
|
||||
push: true
|
||||
@@ -741,7 +762,6 @@ jobs:
|
||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
PG_VERSION=${{ matrix.version.pg }}
|
||||
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
|
||||
DEBIAN_VERSION=${{ matrix.version.debian }}
|
||||
provenance: false
|
||||
push: true
|
||||
|
||||
151
.github/workflows/build_and_test_fully.yml
vendored
Normal file
151
.github/workflows/build_and_test_fully.yml
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
name: Build and Test Fully
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 3 * * *' # run once a day, timezone is utc
|
||||
workflow_dispatch:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow per any non-`main` branch.
|
||||
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
|
||||
jobs:
|
||||
tag:
|
||||
runs-on: [ self-hosted, small ]
|
||||
container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
|
||||
steps:
|
||||
# Need `fetch-depth: 0` to count the number of commits in the branch
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get build tag
|
||||
run: |
|
||||
echo run:$GITHUB_RUN_ID
|
||||
echo ref:$GITHUB_REF_NAME
|
||||
echo rev:$(git rev-list --count HEAD)
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
|
||||
echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
|
||||
echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'"
|
||||
echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
shell: bash
|
||||
id: build-tag
|
||||
|
||||
build-build-tools-image:
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
secrets: inherit
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ tag, build-build-tools-image ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
build-type: [ debug, release ]
|
||||
uses: ./.github/workflows/_build-and-test-locally.yml
|
||||
with:
|
||||
arch: ${{ matrix.arch }}
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
rerun-failed: false
|
||||
test-cfg: '[{"pg_version":"v14", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v15", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v16", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v14", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v15", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v16", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "withouts-lfc"}]'
|
||||
secrets: inherit
|
||||
|
||||
|
||||
create-test-report:
|
||||
needs: [ build-and-test-locally, build-build-tools-image ]
|
||||
if: ${{ !cancelled() }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
outputs:
|
||||
report-url: ${{ steps.create-allure-report.outputs.report-url }}
|
||||
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
id: create-allure-report
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
||||
retries: 5
|
||||
script: |
|
||||
const report = {
|
||||
reportUrl: "${{ steps.create-allure-report.outputs.report-url }}",
|
||||
reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
|
||||
}
|
||||
|
||||
const coverage = {}
|
||||
|
||||
const script = require("./scripts/comment-test-report.js")
|
||||
await script({
|
||||
github,
|
||||
context,
|
||||
fetch,
|
||||
report,
|
||||
coverage,
|
||||
})
|
||||
@@ -79,6 +79,7 @@ jobs:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
rerun-failed: false
|
||||
test-cfg: '[{"pg_version":"v17"}]'
|
||||
sanitizers: enabled
|
||||
secrets: inherit
|
||||
|
||||
11
.github/workflows/large_oltp_benchmark.yml
vendored
11
.github/workflows/large_oltp_benchmark.yml
vendored
@@ -33,11 +33,19 @@ jobs:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
include:
|
||||
# test only read-only custom scripts in new branch without database maintenance
|
||||
- target: new_branch
|
||||
custom_scripts: select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3
|
||||
test_maintenance: false
|
||||
# test all custom scripts in new branch with database maintenance
|
||||
- target: new_branch
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
test_maintenance: true
|
||||
# test all custom scripts in reuse branch with database maintenance
|
||||
- target: reuse_branch
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
|
||||
test_maintenance: true
|
||||
max-parallel: 1 # we want to run each benchmark sequentially to not have noisy neighbors on shared storage (PS, SK)
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
@@ -145,6 +153,7 @@ jobs:
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Benchmark database maintenance
|
||||
if: ${{ matrix.test_maintenance }}
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
|
||||
175
.github/workflows/large_oltp_growth.yml
vendored
Normal file
175
.github/workflows/large_oltp_growth.yml
vendored
Normal file
@@ -0,0 +1,175 @@
|
||||
name: large oltp growth
|
||||
# workflow to grow the reuse branch of large oltp benchmark continuously (about 16 GB per run)
|
||||
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
# push:
|
||||
# branches: [ bodobolero/increase_large_oltp_workload ]
|
||||
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 6 * * *' # 06:00 UTC
|
||||
- cron: '0 8 * * *' # 08:00 UTC
|
||||
- cron: '0 10 * * *' # 10:00 UTC
|
||||
- cron: '0 12 * * *' # 12:00 UTC
|
||||
- cron: '0 14 * * *' # 14:00 UTC
|
||||
- cron: '0 16 * * *' # 16:00 UTC
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow globally because we need dedicated resources which only exist once
|
||||
group: large-oltp-growth
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
oltp:
|
||||
strategy:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
include:
|
||||
# for now only grow the reuse branch, not the other branches.
|
||||
- target: reuse_branch
|
||||
custom_scripts:
|
||||
- grow_action_blocks.sql
|
||||
- grow_action_kwargs.sql
|
||||
- grow_device_fingerprint_event.sql
|
||||
- grow_edges.sql
|
||||
- grow_hotel_rate_mapping.sql
|
||||
- grow_ocr_pipeline_results_version.sql
|
||||
- grow_priceline_raw_response.sql
|
||||
- grow_relabled_transactions.sql
|
||||
- grow_state_values.sql
|
||||
- grow_values.sql
|
||||
- grow_vertices.sql
|
||||
- update_accounting_coding_body_tracking_category_selection.sql
|
||||
- update_action_blocks.sql
|
||||
- update_action_kwargs.sql
|
||||
- update_denormalized_approval_workflow.sql
|
||||
- update_device_fingerprint_event.sql
|
||||
- update_edges.sql
|
||||
- update_heron_transaction_enriched_log.sql
|
||||
- update_heron_transaction_enrichment_requests.sql
|
||||
- update_hotel_rate_mapping.sql
|
||||
- update_incoming_webhooks.sql
|
||||
- update_manual_transaction.sql
|
||||
- update_ml_receipt_matching_log.sql
|
||||
- update_ocr_pipeine_results_version.sql
|
||||
- update_orc_pipeline_step_results.sql
|
||||
- update_orc_pipeline_step_results_version.sql
|
||||
- update_priceline_raw_response.sql
|
||||
- update_quickbooks_transactions.sql
|
||||
- update_raw_finicity_transaction.sql
|
||||
- update_relabeled_transactions.sql
|
||||
- update_state_values.sql
|
||||
- update_stripe_authorization_event_log.sql
|
||||
- update_transaction.sql
|
||||
- update_values.sql
|
||||
- update_vertices.sql
|
||||
max-parallel: 1 # we want to run each growth workload sequentially (for now there is just one)
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "1h"
|
||||
TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ join(matrix.custom_scripts, ' ') }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: 16 # pre-determined by pre-determined project
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
PLATFORM: ${{ matrix.target }}
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Configure AWS credentials # necessary to download artefacts
|
||||
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${{ matrix.target }}" in
|
||||
reuse_branch)
|
||||
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown target=${{ matrix.target }}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: pgbench with custom-scripts
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: true
|
||||
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_growth
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
slack-message: |
|
||||
Periodic large oltp tenant growth increase: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
2
.github/workflows/neon_extra_builds.yml
vendored
2
.github/workflows/neon_extra_builds.yml
vendored
@@ -53,7 +53,7 @@ jobs:
|
||||
submodules: true
|
||||
|
||||
- name: Check for Postgres changes
|
||||
uses: step-security/paths-filter@v3
|
||||
uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242 #v3
|
||||
id: files_changed
|
||||
with:
|
||||
token: ${{ github.token }}
|
||||
|
||||
4
.github/workflows/pre-merge-checks.yml
vendored
4
.github/workflows/pre-merge-checks.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
|
||||
- uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
id: python-src
|
||||
with:
|
||||
files: |
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
poetry.lock
|
||||
pyproject.toml
|
||||
|
||||
- uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
|
||||
- uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
|
||||
id: rust-src
|
||||
with:
|
||||
files: |
|
||||
|
||||
84
.github/workflows/proxy-benchmark.yml
vendored
Normal file
84
.github/workflows/proxy-benchmark.yml
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
name: Periodic proxy performance test on unit-perf hetzner runner
|
||||
|
||||
on:
|
||||
push: # TODO: remove after testing
|
||||
branches:
|
||||
- test-proxy-bench # Runs on pushes to branches starting with test-proxy-bench
|
||||
# schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
# - cron: '0 5 * * *' # Runs at 5 UTC once a day
|
||||
workflow_dispatch: # adds an ability to run this manually
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}
|
||||
cancel-in-progress: false
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
run_periodic_proxybench_test:
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
runs-on: [self-hosted, unit-perf]
|
||||
timeout-minutes: 60 # 1h timeout
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
steps:
|
||||
- name: Checkout proxy-bench Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: neondatabase/proxy-bench
|
||||
path: proxy-bench
|
||||
|
||||
- name: Set up the environment which depends on $RUNNER_TEMP on nvme drive
|
||||
id: set-env
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
PROXY_BENCH_PATH=$(realpath ./proxy-bench)
|
||||
{
|
||||
echo "PROXY_BENCH_PATH=$PROXY_BENCH_PATH"
|
||||
echo "NEON_DIR=${RUNNER_TEMP}/neon"
|
||||
echo "TEST_OUTPUT=${PROXY_BENCH_PATH}/test_output"
|
||||
echo ""
|
||||
} >> "$GITHUB_ENV"
|
||||
|
||||
- name: Run proxy-bench
|
||||
run: ${PROXY_BENCH_PATH}/run.sh
|
||||
|
||||
- name: Ingest Bench Results # neon repo script
|
||||
if: always()
|
||||
run: |
|
||||
mkdir -p $TEST_OUTPUT
|
||||
python $NEON_DIR/scripts/proxy_bench_results_ingest.py --out $TEST_OUTPUT
|
||||
|
||||
- name: Push Metrics to Proxy perf database
|
||||
if: always()
|
||||
env:
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PROXY_TEST_RESULT_CONNSTR }}"
|
||||
REPORT_FROM: $TEST_OUTPUT
|
||||
run: $NEON_DIR/scripts/generate_and_push_perf_report.sh
|
||||
|
||||
- name: Docker cleanup
|
||||
if: always()
|
||||
run: docker compose down
|
||||
|
||||
- name: Notify Failure
|
||||
if: failure()
|
||||
run: echo "Proxy bench job failed" && exit 1
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,10 +1,12 @@
|
||||
/artifact_cache
|
||||
/build
|
||||
/pg_install
|
||||
/target
|
||||
/tmp_check
|
||||
/tmp_check_cli
|
||||
__pycache__/
|
||||
test_output/
|
||||
neon_previous/
|
||||
.vscode
|
||||
.idea
|
||||
*.swp
|
||||
|
||||
80
Cargo.lock
generated
80
Cargo.lock
generated
@@ -1279,6 +1279,7 @@ dependencies = [
|
||||
"remote_storage",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"url",
|
||||
"utils",
|
||||
]
|
||||
|
||||
@@ -1304,6 +1305,7 @@ dependencies = [
|
||||
"fail",
|
||||
"flate2",
|
||||
"futures",
|
||||
"hostname-validator",
|
||||
"http 1.1.0",
|
||||
"indexmap 2.9.0",
|
||||
"itertools 0.10.5",
|
||||
@@ -1316,8 +1318,10 @@ dependencies = [
|
||||
"opentelemetry",
|
||||
"opentelemetry_sdk",
|
||||
"p256 0.13.2",
|
||||
"pageserver_page_api",
|
||||
"postgres",
|
||||
"postgres_initdb",
|
||||
"postgres_versioninfo",
|
||||
"regex",
|
||||
"remote_storage",
|
||||
"reqwest",
|
||||
@@ -1334,6 +1338,7 @@ dependencies = [
|
||||
"tokio-postgres",
|
||||
"tokio-stream",
|
||||
"tokio-util",
|
||||
"tonic 0.13.1",
|
||||
"tower 0.5.2",
|
||||
"tower-http",
|
||||
"tower-otel",
|
||||
@@ -2767,6 +2772,12 @@ dependencies = [
|
||||
"windows",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hostname-validator"
|
||||
version = "1.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f558a64ac9af88b5ba400d99b579451af0d39c6d360980045b91aac966d705e2"
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.9"
|
||||
@@ -4255,6 +4266,7 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"tonic 0.13.1",
|
||||
"tracing",
|
||||
"url",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -4334,6 +4346,7 @@ dependencies = [
|
||||
"postgres_backend",
|
||||
"postgres_connection",
|
||||
"postgres_ffi",
|
||||
"postgres_ffi_types",
|
||||
"postgres_initdb",
|
||||
"posthog_client_lite",
|
||||
"pprof",
|
||||
@@ -4403,7 +4416,9 @@ dependencies = [
|
||||
"nix 0.30.1",
|
||||
"once_cell",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
"postgres_ffi_types",
|
||||
"postgres_versioninfo",
|
||||
"posthog_client_lite",
|
||||
"rand 0.8.5",
|
||||
"remote_storage",
|
||||
"reqwest",
|
||||
@@ -4414,6 +4429,7 @@ dependencies = [
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror 1.0.69",
|
||||
"tracing",
|
||||
"tracing-utils",
|
||||
"utils",
|
||||
]
|
||||
@@ -4427,6 +4443,7 @@ dependencies = [
|
||||
"futures",
|
||||
"http-utils",
|
||||
"pageserver_api",
|
||||
"postgres_versioninfo",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"thiserror 1.0.69",
|
||||
@@ -4465,11 +4482,18 @@ dependencies = [
|
||||
name = "pageserver_page_api"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"futures",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"postgres_ffi_types",
|
||||
"prost 0.13.5",
|
||||
"prost-types 0.13.5",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tonic 0.13.1",
|
||||
"tonic-build",
|
||||
"utils",
|
||||
@@ -4889,6 +4913,8 @@ dependencies = [
|
||||
"memoffset 0.9.0",
|
||||
"once_cell",
|
||||
"postgres",
|
||||
"postgres_ffi_types",
|
||||
"postgres_versioninfo",
|
||||
"pprof",
|
||||
"regex",
|
||||
"serde",
|
||||
@@ -4897,17 +4923,37 @@ dependencies = [
|
||||
"utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres_ffi_types"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"thiserror 1.0.69",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres_initdb"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"camino",
|
||||
"postgres_versioninfo",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres_versioninfo"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"serde",
|
||||
"serde_repr",
|
||||
"thiserror 1.0.69",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "posthog_client_lite"
|
||||
version = "0.1.0"
|
||||
@@ -5120,7 +5166,7 @@ dependencies = [
|
||||
"petgraph",
|
||||
"prettyplease",
|
||||
"prost 0.13.5",
|
||||
"prost-types 0.13.3",
|
||||
"prost-types 0.13.5",
|
||||
"regex",
|
||||
"syn 2.0.100",
|
||||
"tempfile",
|
||||
@@ -5163,9 +5209,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "prost-types"
|
||||
version = "0.13.3"
|
||||
version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670"
|
||||
checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
|
||||
dependencies = [
|
||||
"prost 0.13.5",
|
||||
]
|
||||
@@ -6099,6 +6145,7 @@ dependencies = [
|
||||
"postgres-protocol",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
"postgres_versioninfo",
|
||||
"pprof",
|
||||
"pq_proto",
|
||||
"rand 0.8.5",
|
||||
@@ -6143,6 +6190,7 @@ dependencies = [
|
||||
"const_format",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"postgres_versioninfo",
|
||||
"pq_proto",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -6465,6 +6513,17 @@ dependencies = [
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_repr"
|
||||
version = "0.1.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_spanned"
|
||||
version = "0.6.6"
|
||||
@@ -6759,6 +6818,7 @@ dependencies = [
|
||||
"chrono",
|
||||
"clap",
|
||||
"clashmap",
|
||||
"compute_api",
|
||||
"control_plane",
|
||||
"cron",
|
||||
"diesel",
|
||||
@@ -6770,6 +6830,7 @@ dependencies = [
|
||||
"hex",
|
||||
"http-utils",
|
||||
"humantime",
|
||||
"humantime-serde",
|
||||
"hyper 0.14.30",
|
||||
"itertools 0.10.5",
|
||||
"json-structural-diff",
|
||||
@@ -6780,6 +6841,7 @@ dependencies = [
|
||||
"pageserver_api",
|
||||
"pageserver_client",
|
||||
"postgres_connection",
|
||||
"posthog_client_lite",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"reqwest",
|
||||
@@ -7558,6 +7620,7 @@ dependencies = [
|
||||
"axum",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"flate2",
|
||||
"h2 0.4.4",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
@@ -7577,6 +7640,7 @@ dependencies = [
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -7588,7 +7652,7 @@ dependencies = [
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"prost-build 0.13.3",
|
||||
"prost-types 0.13.3",
|
||||
"prost-types 0.13.5",
|
||||
"quote",
|
||||
"syn 2.0.100",
|
||||
]
|
||||
@@ -7600,7 +7664,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9687bd5bfeafebdded2356950f278bba8226f0b32109537c4253406e09aafe1"
|
||||
dependencies = [
|
||||
"prost 0.13.5",
|
||||
"prost-types 0.13.3",
|
||||
"prost-types 0.13.5",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic 0.13.1",
|
||||
@@ -8156,6 +8220,7 @@ dependencies = [
|
||||
"futures",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"postgres_ffi_types",
|
||||
"pprof",
|
||||
"prost 0.13.5",
|
||||
"remote_storage",
|
||||
@@ -8628,7 +8693,6 @@ dependencies = [
|
||||
"num-iter",
|
||||
"num-rational",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"p256 0.13.2",
|
||||
"parquet",
|
||||
"prettyplease",
|
||||
|
||||
10
Cargo.toml
10
Cargo.toml
@@ -22,6 +22,8 @@ members = [
|
||||
"libs/http-utils",
|
||||
"libs/pageserver_api",
|
||||
"libs/postgres_ffi",
|
||||
"libs/postgres_ffi_types",
|
||||
"libs/postgres_versioninfo",
|
||||
"libs/safekeeper_api",
|
||||
"libs/desim",
|
||||
"libs/neon-shmem",
|
||||
@@ -150,6 +152,7 @@ pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointe
|
||||
procfs = "0.16"
|
||||
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
|
||||
prost = "0.13.5"
|
||||
prost-types = "0.13.5"
|
||||
rand = "0.8"
|
||||
redis = { version = "0.29.2", features = ["tokio-rustls-comp", "keep-alive"] }
|
||||
regex = "1.10.2"
|
||||
@@ -173,6 +176,7 @@ serde_json = "1"
|
||||
serde_path_to_error = "0.1"
|
||||
serde_with = { version = "3", features = [ "base64" ] }
|
||||
serde_assert = "0.5.0"
|
||||
serde_repr = "0.1.20"
|
||||
sha2 = "0.10.2"
|
||||
signal-hook = "0.3"
|
||||
smallvec = "1.11"
|
||||
@@ -196,10 +200,10 @@ tokio-postgres-rustls = "0.12.0"
|
||||
tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
|
||||
tokio-stream = "0.1"
|
||||
tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
tokio-util = { version = "0.7.10", features = ["io", "io-util", "rt"] }
|
||||
toml = "0.8"
|
||||
toml_edit = "0.22"
|
||||
tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "prost", "router", "server", "tls-ring", "tls-native-roots"] }
|
||||
tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "gzip", "prost", "router", "server", "tls-ring", "tls-native-roots", "zstd"] }
|
||||
tonic-reflection = { version = "0.13.1", features = ["server"] }
|
||||
tower = { version = "0.5.2", default-features = false }
|
||||
tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] }
|
||||
@@ -259,6 +263,8 @@ pageserver_page_api = { path = "./pageserver/page_api" }
|
||||
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
|
||||
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
|
||||
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
|
||||
postgres_ffi_types = { version = "0.1", path = "./libs/postgres_ffi_types/" }
|
||||
postgres_versioninfo = { version = "0.1", path = "./libs/postgres_versioninfo/" }
|
||||
postgres_initdb = { path = "./libs/postgres_initdb" }
|
||||
posthog_client_lite = { version = "0.1", path = "./libs/posthog_client_lite" }
|
||||
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
|
||||
|
||||
10
Dockerfile
10
Dockerfile
@@ -5,8 +5,6 @@
|
||||
ARG REPOSITORY=ghcr.io/neondatabase
|
||||
ARG IMAGE=build-tools
|
||||
ARG TAG=pinned
|
||||
ARG DEFAULT_PG_VERSION=17
|
||||
ARG STABLE_PG_VERSION=16
|
||||
ARG DEBIAN_VERSION=bookworm
|
||||
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
|
||||
|
||||
@@ -42,12 +40,12 @@ COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
|
||||
COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
|
||||
COPY --chown=nonroot pgxn pgxn
|
||||
COPY --chown=nonroot Makefile Makefile
|
||||
COPY --chown=nonroot postgres.mk postgres.mk
|
||||
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
|
||||
|
||||
ENV BUILD_TYPE=release
|
||||
RUN set -e \
|
||||
&& mold -run make -j $(nproc) -s neon-pg-ext \
|
||||
&& rm -rf pg_install/build \
|
||||
&& tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .
|
||||
|
||||
# Prepare cargo-chef recipe
|
||||
@@ -63,14 +61,11 @@ FROM $REPOSITORY/$IMAGE:$TAG AS build
|
||||
WORKDIR /home/nonroot
|
||||
ARG GIT_VERSION=local
|
||||
ARG BUILD_TAG
|
||||
ARG STABLE_PG_VERSION
|
||||
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v17/lib pg_install/v17/lib
|
||||
COPY --from=plan /home/nonroot/recipe.json recipe.json
|
||||
|
||||
ARG ADDITIONAL_RUSTFLAGS=""
|
||||
@@ -97,7 +92,6 @@ RUN set -e \
|
||||
# Build final image
|
||||
#
|
||||
FROM $BASE_IMAGE_SHA
|
||||
ARG DEFAULT_PG_VERSION
|
||||
WORKDIR /data
|
||||
|
||||
RUN set -e \
|
||||
@@ -107,8 +101,6 @@ RUN set -e \
|
||||
libreadline-dev \
|
||||
libseccomp-dev \
|
||||
ca-certificates \
|
||||
# System postgres for use with client libraries (e.g. in storage controller)
|
||||
postgresql-15 \
|
||||
openssl \
|
||||
unzip \
|
||||
curl \
|
||||
|
||||
244
Makefile
244
Makefile
@@ -1,8 +1,21 @@
|
||||
ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
|
||||
# Where to install Postgres, default is ./pg_install, maybe useful for package
|
||||
# managers.
|
||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
|
||||
|
||||
# Supported PostgreSQL versions
|
||||
POSTGRES_VERSIONS = v17 v16 v15 v14
|
||||
|
||||
# CARGO_BUILD_FLAGS: Extra flags to pass to `cargo build`. `--locked`
|
||||
# and `--features testing` are popular examples.
|
||||
#
|
||||
# CARGO_PROFILE: Set to override the cargo profile to use. By default,
|
||||
# it is derived from BUILD_TYPE.
|
||||
|
||||
# All intermediate build artifacts are stored here.
|
||||
BUILD_DIR := build
|
||||
|
||||
ICU_PREFIX_DIR := /usr/local/icu
|
||||
|
||||
#
|
||||
@@ -16,12 +29,12 @@ ifeq ($(BUILD_TYPE),release)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
|
||||
PG_CFLAGS += -O2 -g3 $(CFLAGS)
|
||||
PG_LDFLAGS = $(LDFLAGS)
|
||||
# Unfortunately, `--profile=...` is a nightly feature
|
||||
CARGO_BUILD_FLAGS += --release
|
||||
CARGO_PROFILE ?= --profile=release
|
||||
else ifeq ($(BUILD_TYPE),debug)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
|
||||
PG_CFLAGS += -O0 -g3 $(CFLAGS)
|
||||
PG_LDFLAGS = $(LDFLAGS)
|
||||
CARGO_PROFILE ?= --profile=dev
|
||||
else
|
||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||
endif
|
||||
@@ -85,141 +98,29 @@ CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
|
||||
# Top level Makefile to build Neon and PostgreSQL
|
||||
#
|
||||
.PHONY: all
|
||||
all: neon postgres neon-pg-ext
|
||||
all: neon postgres-install neon-pg-ext
|
||||
|
||||
### Neon Rust bits
|
||||
#
|
||||
# The 'postgres_ffi' depends on the Postgres headers.
|
||||
.PHONY: neon
|
||||
neon: postgres-headers walproposer-lib cargo-target-dir
|
||||
neon: postgres-headers-install walproposer-lib cargo-target-dir
|
||||
+@echo "Compiling Neon"
|
||||
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
|
||||
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS) $(CARGO_PROFILE)
|
||||
|
||||
.PHONY: cargo-target-dir
|
||||
cargo-target-dir:
|
||||
# https://github.com/rust-lang/cargo/issues/14281
|
||||
mkdir -p target
|
||||
test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG
|
||||
|
||||
### PostgreSQL parts
|
||||
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
|
||||
# to avoid the duplication in the future, but it's tolerable for now.
|
||||
#
|
||||
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
||||
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)
|
||||
test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
|
||||
|
||||
+@echo "Configuring Postgres $* build"
|
||||
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
|
||||
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
|
||||
echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
|
||||
exit 1; }
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
|
||||
|
||||
VERSION=$*; \
|
||||
EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
|
||||
CFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \
|
||||
$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
|
||||
--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
|
||||
|
||||
# nicer alias to run 'configure'
|
||||
# Note: I've been unable to use templates for this part of our configuration.
|
||||
# I'm not sure why it wouldn't work, but this is the only place (apart from
|
||||
# the "build-all-versions" entry points) where direct mention of PostgreSQL
|
||||
# versions is used.
|
||||
.PHONY: postgres-configure-v17
|
||||
postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
|
||||
.PHONY: postgres-configure-v16
|
||||
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
|
||||
.PHONY: postgres-configure-v15
|
||||
postgres-configure-v15: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
|
||||
.PHONY: postgres-configure-v14
|
||||
postgres-configure-v14: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
|
||||
|
||||
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
|
||||
.PHONY: postgres-headers-%
|
||||
postgres-headers-%: postgres-configure-%
|
||||
+@echo "Installing PostgreSQL $* headers"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/include MAKELEVEL=0 install
|
||||
|
||||
# Compile and install PostgreSQL
|
||||
.PHONY: postgres-%
|
||||
postgres-%: postgres-configure-% \
|
||||
postgres-headers-% # to prevent `make install` conflicts with neon's `postgres-headers`
|
||||
+@echo "Compiling PostgreSQL $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 install
|
||||
+@echo "Compiling libpq $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq install
|
||||
+@echo "Compiling pg_prewarm $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_prewarm install
|
||||
+@echo "Compiling pg_buffercache $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache install
|
||||
+@echo "Compiling pg_visibility $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install
|
||||
+@echo "Compiling pageinspect $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
|
||||
+@echo "Compiling pg_trgm $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_trgm install
|
||||
+@echo "Compiling amcheck $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
|
||||
+@echo "Compiling test_decoding $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
|
||||
|
||||
.PHONY: postgres-clean-%
|
||||
postgres-clean-%:
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 clean
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache clean
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect clean
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq clean
|
||||
|
||||
.PHONY: postgres-check-%
|
||||
postgres-check-%: postgres-%
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 check
|
||||
|
||||
.PHONY: neon-pg-ext-%
|
||||
neon-pg-ext-%: postgres-%
|
||||
+@echo "Compiling neon $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
|
||||
neon-pg-ext-%: postgres-install-%
|
||||
+@echo "Compiling neon-specific Postgres extensions for $*"
|
||||
mkdir -p $(BUILD_DIR)/pgxn-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
|
||||
+@echo "Compiling neon_walredo $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
|
||||
+@echo "Compiling neon_rmgr $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
|
||||
+@echo "Compiling neon_test_utils $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
|
||||
+@echo "Compiling neon_utils $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
|
||||
|
||||
.PHONY: neon-pg-clean-ext-%
|
||||
neon-pg-clean-ext-%:
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile clean
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile clean
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile clean
|
||||
-C $(BUILD_DIR)/pgxn-$*\
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/Makefile install
|
||||
|
||||
# Build walproposer as a static library. walproposer source code is located
|
||||
# in the pgxn/neon directory.
|
||||
@@ -233,15 +134,15 @@ neon-pg-clean-ext-%:
|
||||
.PHONY: walproposer-lib
|
||||
walproposer-lib: neon-pg-ext-v17
|
||||
+@echo "Compiling walproposer-lib"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
mkdir -p $(BUILD_DIR)/walproposer-lib
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
|
||||
-C $(BUILD_DIR)/walproposer-lib \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(BUILD_DIR)/walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(BUILD_DIR)/walproposer-lib
|
||||
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgport.a \
|
||||
pg_strong_random.o
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
|
||||
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \
|
||||
checksum_helper.o \
|
||||
cryptohash_openssl.o \
|
||||
hmac_openssl.o \
|
||||
@@ -249,69 +150,18 @@ walproposer-lib: neon-pg-ext-v17
|
||||
parse_manifest.o \
|
||||
scram-common.o
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
|
||||
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \
|
||||
pg_crc32c.o
|
||||
endif
|
||||
|
||||
.PHONY: walproposer-lib-clean
|
||||
walproposer-lib-clean:
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
|
||||
|
||||
# Shorthand to call neon-pg-ext-% target for all Postgres versions
|
||||
.PHONY: neon-pg-ext
|
||||
neon-pg-ext: \
|
||||
neon-pg-ext-v14 \
|
||||
neon-pg-ext-v15 \
|
||||
neon-pg-ext-v16 \
|
||||
neon-pg-ext-v17
|
||||
|
||||
.PHONY: neon-pg-clean-ext
|
||||
neon-pg-clean-ext: \
|
||||
neon-pg-clean-ext-v14 \
|
||||
neon-pg-clean-ext-v15 \
|
||||
neon-pg-clean-ext-v16 \
|
||||
neon-pg-clean-ext-v17
|
||||
|
||||
# shorthand to build all Postgres versions
|
||||
.PHONY: postgres
|
||||
postgres: \
|
||||
postgres-v14 \
|
||||
postgres-v15 \
|
||||
postgres-v16 \
|
||||
postgres-v17
|
||||
|
||||
.PHONY: postgres-headers
|
||||
postgres-headers: \
|
||||
postgres-headers-v14 \
|
||||
postgres-headers-v15 \
|
||||
postgres-headers-v16 \
|
||||
postgres-headers-v17
|
||||
|
||||
.PHONY: postgres-clean
|
||||
postgres-clean: \
|
||||
postgres-clean-v14 \
|
||||
postgres-clean-v15 \
|
||||
postgres-clean-v16 \
|
||||
postgres-clean-v17
|
||||
|
||||
.PHONY: postgres-check
|
||||
postgres-check: \
|
||||
postgres-check-v14 \
|
||||
postgres-check-v15 \
|
||||
postgres-check-v16 \
|
||||
postgres-check-v17
|
||||
|
||||
# This doesn't remove the effects of 'configure'.
|
||||
.PHONY: clean
|
||||
clean: postgres-clean neon-pg-clean-ext
|
||||
$(MAKE) -C compute clean
|
||||
$(CARGO_CMD_PREFIX) cargo clean
|
||||
neon-pg-ext: $(foreach pg_version,$(POSTGRES_VERSIONS),neon-pg-ext-$(pg_version))
|
||||
|
||||
# This removes everything
|
||||
.PHONY: distclean
|
||||
distclean:
|
||||
$(RM) -r $(POSTGRES_INSTALL_DIR)
|
||||
$(RM) -r $(POSTGRES_INSTALL_DIR) $(BUILD_DIR)
|
||||
$(CARGO_CMD_PREFIX) cargo clean
|
||||
|
||||
.PHONY: fmt
|
||||
@@ -320,7 +170,7 @@ fmt:
|
||||
|
||||
postgres-%-pg-bsd-indent: postgres-%
|
||||
+@echo "Compiling pg_bsd_indent"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/src/tools/pg_bsd_indent/
|
||||
|
||||
# Create typedef list for the core. Note that generally it should be combined with
|
||||
# buildfarm one to cover platform specific stuff.
|
||||
@@ -339,7 +189,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
|
||||
cat $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/typedefs.list |\
|
||||
cat - postgres-$*-typedefs.list | sort | uniq > postgres-$*-typedefs-full.list
|
||||
+@echo note: you might want to run it on selected files/dirs instead.
|
||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
INDENT=$(BUILD_DIR)/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/pgindent --typedefs postgres-$*-typedefs-full.list \
|
||||
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/ \
|
||||
--excludes $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/exclude_file_patterns
|
||||
@@ -350,12 +200,28 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
|
||||
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
|
||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
|
||||
-C $(BUILD_DIR)/neon-v17 \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
|
||||
|
||||
|
||||
.PHONY: setup-pre-commit-hook
|
||||
setup-pre-commit-hook:
|
||||
ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
|
||||
|
||||
# Targets for building PostgreSQL are defined in postgres.mk.
|
||||
#
|
||||
# But if the caller has indicated that PostgreSQL is already
|
||||
# installed, by setting the PG_INSTALL_CACHED variable, skip it.
|
||||
ifdef PG_INSTALL_CACHED
|
||||
postgres-install: skip-install
|
||||
$(foreach pg_version,$(POSTGRES_VERSIONS),postgres-install-$(pg_version)): skip-install
|
||||
postgres-headers-install:
|
||||
+@echo "Skipping installation of PostgreSQL headers because PG_INSTALL_CACHED is set"
|
||||
skip-install:
|
||||
+@echo "Skipping PostgreSQL installation because PG_INSTALL_CACHED is set"
|
||||
|
||||
else
|
||||
include postgres.mk
|
||||
endif
|
||||
|
||||
@@ -165,6 +165,7 @@ RUN curl -fsSL \
|
||||
&& rm sql_exporter.tar.gz
|
||||
|
||||
# protobuf-compiler (protoc)
|
||||
# Keep the version the same as in compute/compute-node.Dockerfile
|
||||
ENV PROTOC_VERSION=25.1
|
||||
RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
|
||||
&& unzip -q protoc.zip -d protoc \
|
||||
@@ -179,7 +180,7 @@ RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/
|
||||
&& mv s5cmd /usr/local/bin/s5cmd
|
||||
|
||||
# LLVM
|
||||
ENV LLVM_VERSION=19
|
||||
ENV LLVM_VERSION=20
|
||||
RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
|
||||
&& echo "deb http://apt.llvm.org/${DEBIAN_VERSION}/ llvm-toolchain-${DEBIAN_VERSION}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
|
||||
&& apt update \
|
||||
@@ -292,7 +293,7 @@ WORKDIR /home/nonroot
|
||||
|
||||
# Rust
|
||||
# Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
|
||||
ENV RUSTC_VERSION=1.87.0
|
||||
ENV RUSTC_VERSION=1.88.0
|
||||
ENV RUSTUP_HOME="/home/nonroot/.rustup"
|
||||
ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
|
||||
ARG RUSTFILT_VERSION=0.2.1
|
||||
|
||||
3
compute/.gitignore
vendored
3
compute/.gitignore
vendored
@@ -3,3 +3,6 @@ etc/neon_collector.yml
|
||||
etc/neon_collector_autoscaling.yml
|
||||
etc/sql_exporter.yml
|
||||
etc/sql_exporter_autoscaling.yml
|
||||
|
||||
# Node.js dependencies
|
||||
node_modules/
|
||||
|
||||
@@ -22,7 +22,7 @@ sql_exporter.yml: $(jsonnet_files)
|
||||
--output-file etc/$@ \
|
||||
--tla-str collector_name=neon_collector \
|
||||
--tla-str collector_file=neon_collector.yml \
|
||||
--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter' \
|
||||
--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter&pgaudit.log=none' \
|
||||
etc/sql_exporter.jsonnet
|
||||
|
||||
sql_exporter_autoscaling.yml: $(jsonnet_files)
|
||||
@@ -30,7 +30,7 @@ sql_exporter_autoscaling.yml: $(jsonnet_files)
|
||||
--output-file etc/$@ \
|
||||
--tla-str collector_name=neon_collector_autoscaling \
|
||||
--tla-str collector_file=neon_collector_autoscaling.yml \
|
||||
--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling' \
|
||||
--tla-str 'connection_string=postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling&pgaudit.log=none' \
|
||||
etc/sql_exporter.jsonnet
|
||||
|
||||
.PHONY: clean
|
||||
@@ -48,3 +48,11 @@ jsonnetfmt-test:
|
||||
.PHONY: jsonnetfmt-format
|
||||
jsonnetfmt-format:
|
||||
jsonnetfmt --in-place $(jsonnet_files)
|
||||
|
||||
.PHONY: manifest-schema-validation
|
||||
manifest-schema-validation: node_modules
|
||||
node_modules/.bin/jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
|
||||
|
||||
node_modules: package.json
|
||||
npm install
|
||||
touch node_modules
|
||||
|
||||
@@ -77,9 +77,6 @@
|
||||
# build_and_test.yml github workflow for how that's done.
|
||||
|
||||
ARG PG_VERSION
|
||||
ARG REPOSITORY=ghcr.io/neondatabase
|
||||
ARG IMAGE=build-tools
|
||||
ARG TAG=pinned
|
||||
ARG BUILD_TAG
|
||||
ARG DEBIAN_VERSION=bookworm
|
||||
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
|
||||
@@ -118,6 +115,9 @@ ARG EXTENSIONS=all
|
||||
FROM $BASE_IMAGE_SHA AS build-deps
|
||||
ARG DEBIAN_VERSION
|
||||
|
||||
# Keep in sync with build-tools.Dockerfile
|
||||
ENV PROTOC_VERSION=25.1
|
||||
|
||||
# Use strict mode for bash to catch errors early
|
||||
SHELL ["/bin/bash", "-euo", "pipefail", "-c"]
|
||||
|
||||
@@ -149,8 +149,17 @@ RUN case $DEBIAN_VERSION in \
|
||||
ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \
|
||||
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
|
||||
libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd curl unzip g++ \
|
||||
libclang-dev \
|
||||
jsonnet \
|
||||
$VERSION_INSTALLS \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/* \
|
||||
&& useradd -ms /bin/bash nonroot -b /home \
|
||||
# Install protoc from binary release, since Debian's versions are too old.
|
||||
&& curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
|
||||
&& unzip -q protoc.zip -d protoc \
|
||||
&& mv protoc/bin/protoc /usr/local/bin/protoc \
|
||||
&& mv protoc/include/google /usr/local/include/google \
|
||||
&& rm -rf protoc.zip protoc
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -171,9 +180,6 @@ RUN cd postgres && \
|
||||
eval $CONFIGURE_CMD && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
|
||||
# Install headers
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
|
||||
make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
|
||||
# Enable some of contrib extensions
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
|
||||
echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
|
||||
@@ -1057,17 +1063,10 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg build with nonroot user and cargo installed"
|
||||
# This layer is base and common for layers with `pgrx`
|
||||
# Layer "build-deps with Rust toolchain installed"
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build AS pg-build-nonroot-with-cargo
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
useradd -ms /bin/bash nonroot -b /home
|
||||
FROM build-deps AS build-deps-with-cargo
|
||||
|
||||
ENV HOME=/home/nonroot
|
||||
ENV PATH="/home/nonroot/.cargo/bin:$PATH"
|
||||
@@ -1082,13 +1081,29 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
|
||||
rm rustup-init
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-build with Rust toolchain installed"
|
||||
# This layer is base and common for layers with `pgrx`
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build AS pg-build-with-cargo
|
||||
ARG PG_VERSION
|
||||
|
||||
ENV HOME=/home/nonroot
|
||||
ENV PATH="/home/nonroot/.cargo/bin:$PATH"
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
COPY --from=build-deps-with-cargo /home/nonroot /home/nonroot
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "rust extensions"
|
||||
# This layer is used to build `pgrx` deps
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build-nonroot-with-cargo AS rust-extensions-build
|
||||
FROM pg-build-with-cargo AS rust-extensions-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN case "${PG_VERSION:?}" in \
|
||||
@@ -1110,7 +1125,7 @@ USER root
|
||||
# and eventually get merged with `rust-extensions-build`
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx12
|
||||
FROM pg-build-with-cargo AS rust-extensions-build-pgrx12
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN cargo install --locked --version 0.12.9 cargo-pgrx && \
|
||||
@@ -1127,7 +1142,7 @@ USER root
|
||||
# and eventually get merged with `rust-extensions-build`
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx14
|
||||
FROM pg-build-with-cargo AS rust-extensions-build-pgrx14
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN cargo install --locked --version 0.14.1 cargo-pgrx && \
|
||||
@@ -1144,10 +1159,12 @@ USER root
|
||||
|
||||
FROM build-deps AS pgrag-src
|
||||
ARG PG_VERSION
|
||||
|
||||
WORKDIR /ext-src
|
||||
COPY compute/patches/onnxruntime.patch .
|
||||
|
||||
RUN wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \
|
||||
mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < /ext-src/onnxruntime.patch && \
|
||||
echo "#nothing to test here" > neon-test.sh
|
||||
|
||||
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.1.2.tar.gz -O pgrag.tar.gz && \
|
||||
@@ -1162,7 +1179,7 @@ COPY --from=pgrag-src /ext-src/ /ext-src/
|
||||
# Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise
|
||||
WORKDIR /ext-src/onnxruntime-src
|
||||
RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
|
||||
python3 python3-pip python3-venv protobuf-compiler && \
|
||||
python3 python3-pip python3-venv && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
python3 -m venv venv && \
|
||||
. venv/bin/activate && \
|
||||
@@ -1555,29 +1572,31 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
FROM build-deps AS pgaudit-src
|
||||
ARG PG_VERSION
|
||||
WORKDIR /ext-src
|
||||
COPY "compute/patches/pgaudit-parallel_workers-${PG_VERSION}.patch" .
|
||||
RUN case "${PG_VERSION}" in \
|
||||
"v14") \
|
||||
export PGAUDIT_VERSION=1.6.2 \
|
||||
export PGAUDIT_CHECKSUM=1f350d70a0cbf488c0f2b485e3a5c9b11f78ad9e3cbb95ef6904afa1eb3187eb \
|
||||
export PGAUDIT_VERSION=1.6.3 \
|
||||
export PGAUDIT_CHECKSUM=37a8f5a7cc8d9188e536d15cf0fdc457fcdab2547caedb54442c37f124110919 \
|
||||
;; \
|
||||
"v15") \
|
||||
export PGAUDIT_VERSION=1.7.0 \
|
||||
export PGAUDIT_CHECKSUM=8f4a73e451c88c567e516e6cba7dc1e23bc91686bb6f1f77f8f3126d428a8bd8 \
|
||||
export PGAUDIT_VERSION=1.7.1 \
|
||||
export PGAUDIT_CHECKSUM=e9c8e6e092d82b2f901d72555ce0fe7780552f35f8985573796cd7e64b09d4ec \
|
||||
;; \
|
||||
"v16") \
|
||||
export PGAUDIT_VERSION=16.0 \
|
||||
export PGAUDIT_CHECKSUM=d53ef985f2d0b15ba25c512c4ce967dce07b94fd4422c95bd04c4c1a055fe738 \
|
||||
export PGAUDIT_VERSION=16.1 \
|
||||
export PGAUDIT_CHECKSUM=3bae908ab70ba0c6f51224009dbcfff1a97bd6104c6273297a64292e1b921fee \
|
||||
;; \
|
||||
"v17") \
|
||||
export PGAUDIT_VERSION=17.0 \
|
||||
export PGAUDIT_CHECKSUM=7d0d08d030275d525f36cd48b38c6455f1023da863385badff0cec44965bfd8c \
|
||||
export PGAUDIT_VERSION=17.1 \
|
||||
export PGAUDIT_CHECKSUM=9c5f37504d393486cc75d2ced83f75f5899be64fa85f689d6babb833b4361e6c \
|
||||
;; \
|
||||
*) \
|
||||
echo "pgaudit is not supported on this PostgreSQL version" && exit 1;; \
|
||||
esac && \
|
||||
wget https://github.com/pgaudit/pgaudit/archive/refs/tags/${PGAUDIT_VERSION}.tar.gz -O pgaudit.tar.gz && \
|
||||
echo "${PGAUDIT_CHECKSUM} pgaudit.tar.gz" | sha256sum --check && \
|
||||
mkdir pgaudit-src && cd pgaudit-src && tar xzf ../pgaudit.tar.gz --strip-components=1 -C .
|
||||
mkdir pgaudit-src && cd pgaudit-src && tar xzf ../pgaudit.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < "/ext-src/pgaudit-parallel_workers-${PG_VERSION}.patch"
|
||||
|
||||
FROM pg-build AS pgaudit-build
|
||||
COPY --from=pgaudit-src /ext-src/ /ext-src/
|
||||
@@ -1621,18 +1640,7 @@ FROM pg-build AS neon-ext-build
|
||||
ARG PG_VERSION
|
||||
|
||||
COPY pgxn/ pgxn/
|
||||
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon \
|
||||
-s install && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon_utils \
|
||||
-s install && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon_test_utils \
|
||||
-s install && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon_rmgr \
|
||||
-s install
|
||||
RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1722,7 +1730,7 @@ FROM extensions-${EXTENSIONS} AS neon-pg-ext-build
|
||||
# Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
|
||||
FROM build-deps-with-cargo AS compute-tools
|
||||
ARG BUILD_TAG
|
||||
ENV BUILD_TAG=$BUILD_TAG
|
||||
|
||||
@@ -1732,7 +1740,7 @@ COPY --chown=nonroot . .
|
||||
RUN --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/registry \
|
||||
--mount=type=cache,uid=1000,target=/home/nonroot/.cargo/git \
|
||||
--mount=type=cache,uid=1000,target=/home/nonroot/target \
|
||||
mold -run cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy && \
|
||||
cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy && \
|
||||
mkdir target-bin && \
|
||||
cp target/release-line-debug-size-lto/compute_ctl \
|
||||
target/release-line-debug-size-lto/fast_import \
|
||||
@@ -1826,10 +1834,11 @@ RUN rm /usr/local/pgsql/lib/lib*.a
|
||||
# Preprocess the sql_exporter configuration files
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS sql_exporter_preprocessor
|
||||
FROM build-deps AS sql_exporter_preprocessor
|
||||
ARG PG_VERSION
|
||||
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
COPY --chown=nonroot compute compute
|
||||
|
||||
@@ -1976,7 +1985,7 @@ RUN apt update && \
|
||||
locales \
|
||||
lsof \
|
||||
procps \
|
||||
rsyslog \
|
||||
rsyslog-gnutls \
|
||||
screen \
|
||||
tcpdump \
|
||||
$VERSION_INSTALLS && \
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
import 'sql_exporter/compute_logical_snapshot_files.libsonnet',
|
||||
import 'sql_exporter/compute_logical_snapshots_bytes.libsonnet',
|
||||
import 'sql_exporter/compute_max_connections.libsonnet',
|
||||
import 'sql_exporter/compute_pg_oldest_frozen_xid_age.libsonnet',
|
||||
import 'sql_exporter/compute_pg_oldest_mxid_age.libsonnet',
|
||||
import 'sql_exporter/compute_receive_lsn.libsonnet',
|
||||
import 'sql_exporter/compute_subscriptions_count.libsonnet',
|
||||
import 'sql_exporter/connection_counts.libsonnet',
|
||||
|
||||
@@ -21,6 +21,8 @@ unix_socket_dir=/tmp/
|
||||
unix_socket_mode=0777
|
||||
; required for pgbouncer_exporter
|
||||
ignore_startup_parameters=extra_float_digits
|
||||
; pidfile for graceful termination
|
||||
pidfile=/tmp/pgbouncer.pid
|
||||
|
||||
;; Disable connection logging. It produces a lot of logs that no one looks at,
|
||||
;; and we can get similar log entries from the proxy too. We had incidents in
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
{
|
||||
metric_name: 'compute_pg_oldest_frozen_xid_age',
|
||||
type: 'gauge',
|
||||
help: 'Age of oldest XIDs that have not been frozen by VACUUM. An indicator of how long it has been since VACUUM last ran.',
|
||||
key_labels: [
|
||||
'database_name',
|
||||
],
|
||||
value_label: 'metric',
|
||||
values: [
|
||||
'frozen_xid_age',
|
||||
],
|
||||
query: importstr 'sql_exporter/compute_pg_oldest_frozen_xid_age.sql',
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
SELECT datname database_name,
|
||||
age(datfrozenxid) frozen_xid_age
|
||||
FROM pg_database
|
||||
ORDER BY frozen_xid_age DESC LIMIT 10;
|
||||
@@ -0,0 +1,13 @@
|
||||
{
|
||||
metric_name: 'compute_pg_oldest_mxid_age',
|
||||
type: 'gauge',
|
||||
help: 'Age of oldest MXIDs that have not been replaced by VACUUM. An indicator of how long it has been since VACUUM last ran.',
|
||||
key_labels: [
|
||||
'database_name',
|
||||
],
|
||||
value_label: 'metric',
|
||||
values: [
|
||||
'min_mxid_age',
|
||||
],
|
||||
query: importstr 'sql_exporter/compute_pg_oldest_mxid_age.sql',
|
||||
}
|
||||
4
compute/etc/sql_exporter/compute_pg_oldest_mxid_age.sql
Normal file
4
compute/etc/sql_exporter/compute_pg_oldest_mxid_age.sql
Normal file
@@ -0,0 +1,4 @@
|
||||
SELECT datname database_name,
|
||||
mxid_age(datminmxid) min_mxid_age
|
||||
FROM pg_database
|
||||
ORDER BY min_mxid_age DESC LIMIT 10;
|
||||
209
compute/manifest.schema.json
Normal file
209
compute/manifest.schema.json
Normal file
@@ -0,0 +1,209 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "Neon Compute Manifest Schema",
|
||||
"description": "Schema for Neon compute node configuration manifest",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pg_settings": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"common": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"client_connection_check_interval": {
|
||||
"type": "string",
|
||||
"description": "Check for client disconnection interval in milliseconds"
|
||||
},
|
||||
"effective_io_concurrency": {
|
||||
"type": "string",
|
||||
"description": "Effective IO concurrency setting"
|
||||
},
|
||||
"fsync": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to force fsync to disk"
|
||||
},
|
||||
"hot_standby": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether hot standby is enabled"
|
||||
},
|
||||
"idle_in_transaction_session_timeout": {
|
||||
"type": "string",
|
||||
"description": "Timeout for idle transactions in milliseconds"
|
||||
},
|
||||
"listen_addresses": {
|
||||
"type": "string",
|
||||
"description": "Addresses to listen on"
|
||||
},
|
||||
"log_connections": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to log connections"
|
||||
},
|
||||
"log_disconnections": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to log disconnections"
|
||||
},
|
||||
"log_temp_files": {
|
||||
"type": "string",
|
||||
"description": "Size threshold for logging temporary files in KB"
|
||||
},
|
||||
"log_error_verbosity": {
|
||||
"type": "string",
|
||||
"enum": ["terse", "verbose", "default"],
|
||||
"description": "Error logging verbosity level"
|
||||
},
|
||||
"log_min_error_statement": {
|
||||
"type": "string",
|
||||
"description": "Minimum error level for statement logging"
|
||||
},
|
||||
"maintenance_io_concurrency": {
|
||||
"type": "string",
|
||||
"description": "Maintenance IO concurrency setting"
|
||||
},
|
||||
"max_connections": {
|
||||
"type": "string",
|
||||
"description": "Maximum number of connections"
|
||||
},
|
||||
"max_replication_flush_lag": {
|
||||
"type": "string",
|
||||
"description": "Maximum replication flush lag"
|
||||
},
|
||||
"max_replication_slots": {
|
||||
"type": "string",
|
||||
"description": "Maximum number of replication slots"
|
||||
},
|
||||
"max_replication_write_lag": {
|
||||
"type": "string",
|
||||
"description": "Maximum replication write lag"
|
||||
},
|
||||
"max_wal_senders": {
|
||||
"type": "string",
|
||||
"description": "Maximum number of WAL senders"
|
||||
},
|
||||
"max_wal_size": {
|
||||
"type": "string",
|
||||
"description": "Maximum WAL size"
|
||||
},
|
||||
"neon.unstable_extensions": {
|
||||
"type": "string",
|
||||
"description": "List of unstable extensions"
|
||||
},
|
||||
"neon.protocol_version": {
|
||||
"type": "string",
|
||||
"description": "Neon protocol version"
|
||||
},
|
||||
"password_encryption": {
|
||||
"type": "string",
|
||||
"description": "Password encryption method"
|
||||
},
|
||||
"restart_after_crash": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to restart after crash"
|
||||
},
|
||||
"superuser_reserved_connections": {
|
||||
"type": "string",
|
||||
"description": "Number of reserved connections for superuser"
|
||||
},
|
||||
"synchronous_standby_names": {
|
||||
"type": "string",
|
||||
"description": "Names of synchronous standby servers"
|
||||
},
|
||||
"wal_keep_size": {
|
||||
"type": "string",
|
||||
"description": "WAL keep size"
|
||||
},
|
||||
"wal_level": {
|
||||
"type": "string",
|
||||
"description": "WAL level"
|
||||
},
|
||||
"wal_log_hints": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to log hints in WAL"
|
||||
},
|
||||
"wal_sender_timeout": {
|
||||
"type": "string",
|
||||
"description": "WAL sender timeout in milliseconds"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"client_connection_check_interval",
|
||||
"effective_io_concurrency",
|
||||
"fsync",
|
||||
"hot_standby",
|
||||
"idle_in_transaction_session_timeout",
|
||||
"listen_addresses",
|
||||
"log_connections",
|
||||
"log_disconnections",
|
||||
"log_temp_files",
|
||||
"log_error_verbosity",
|
||||
"log_min_error_statement",
|
||||
"maintenance_io_concurrency",
|
||||
"max_connections",
|
||||
"max_replication_flush_lag",
|
||||
"max_replication_slots",
|
||||
"max_replication_write_lag",
|
||||
"max_wal_senders",
|
||||
"max_wal_size",
|
||||
"neon.unstable_extensions",
|
||||
"neon.protocol_version",
|
||||
"password_encryption",
|
||||
"restart_after_crash",
|
||||
"superuser_reserved_connections",
|
||||
"synchronous_standby_names",
|
||||
"wal_keep_size",
|
||||
"wal_level",
|
||||
"wal_log_hints",
|
||||
"wal_sender_timeout"
|
||||
]
|
||||
},
|
||||
"replica": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hot_standby": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether hot standby is enabled for replicas"
|
||||
}
|
||||
},
|
||||
"required": ["hot_standby"]
|
||||
},
|
||||
"per_version": {
|
||||
"type": "object",
|
||||
"patternProperties": {
|
||||
"^1[4-7]$": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"common": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"io_combine_limit": {
|
||||
"type": "string",
|
||||
"description": "IO combine limit"
|
||||
}
|
||||
}
|
||||
},
|
||||
"replica": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"recovery_prefetch": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to enable recovery prefetch for PostgreSQL replicas"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["common", "replica", "per_version"]
|
||||
}
|
||||
},
|
||||
"required": ["pg_settings"]
|
||||
}
|
||||
@@ -105,17 +105,17 @@ pg_settings:
|
||||
# Neon hot standby ignores pages that are not in the shared_buffers
|
||||
recovery_prefetch: "off"
|
||||
16:
|
||||
common:
|
||||
common: {}
|
||||
replica:
|
||||
# prefetching of blocks referenced in WAL doesn't make sense for us
|
||||
# Neon hot standby ignores pages that are not in the shared_buffers
|
||||
recovery_prefetch: "off"
|
||||
15:
|
||||
common:
|
||||
common: {}
|
||||
replica:
|
||||
# prefetching of blocks referenced in WAL doesn't make sense for us
|
||||
# Neon hot standby ignores pages that are not in the shared_buffers
|
||||
recovery_prefetch: "off"
|
||||
14:
|
||||
common:
|
||||
replica:
|
||||
common: {}
|
||||
replica: {}
|
||||
|
||||
37
compute/package-lock.json
generated
Normal file
37
compute/package-lock.json
generated
Normal file
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"name": "neon-compute",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "neon-compute",
|
||||
"dependencies": {
|
||||
"@sourcemeta/jsonschema": "9.3.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@sourcemeta/jsonschema": {
|
||||
"version": "9.3.4",
|
||||
"resolved": "https://registry.npmjs.org/@sourcemeta/jsonschema/-/jsonschema-9.3.4.tgz",
|
||||
"integrity": "sha512-hkujfkZAIGXUs4U//We9faZW8LZ4/H9LqagRYsFSulH/VLcKPNhZyCTGg7AhORuzm27zqENvKpnX4g2FzudYFw==",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"license": "AGPL-3.0",
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux",
|
||||
"win32"
|
||||
],
|
||||
"bin": {
|
||||
"jsonschema": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sourcemeta"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
7
compute/package.json
Normal file
7
compute/package.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"name": "neon-compute",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@sourcemeta/jsonschema": "9.3.4"
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,8 @@
|
||||
diff --git a/sql/anon.sql b/sql/anon.sql
|
||||
index 0cdc769..f6cc950 100644
|
||||
index 0cdc769..b450327 100644
|
||||
--- a/sql/anon.sql
|
||||
+++ b/sql/anon.sql
|
||||
@@ -1141,3 +1141,8 @@ $$
|
||||
@@ -1141,3 +1141,15 @@ $$
|
||||
-- TODO : https://en.wikipedia.org/wiki/L-diversity
|
||||
|
||||
-- TODO : https://en.wikipedia.org/wiki/T-closeness
|
||||
@@ -11,6 +11,13 @@ index 0cdc769..f6cc950 100644
|
||||
+
|
||||
+GRANT ALL ON SCHEMA anon to neon_superuser;
|
||||
+GRANT ALL ON ALL TABLES IN SCHEMA anon TO neon_superuser;
|
||||
+
|
||||
+DO $$
|
||||
+BEGIN
|
||||
+ IF current_setting('server_version_num')::int >= 150000 THEN
|
||||
+ GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO neon_superuser;
|
||||
+ END IF;
|
||||
+END $$;
|
||||
diff --git a/sql/init.sql b/sql/init.sql
|
||||
index 7da6553..9b6164b 100644
|
||||
--- a/sql/init.sql
|
||||
|
||||
15
compute/patches/onnxruntime.patch
Normal file
15
compute/patches/onnxruntime.patch
Normal file
@@ -0,0 +1,15 @@
|
||||
diff --git a/cmake/deps.txt b/cmake/deps.txt
|
||||
index d213b09034..229de2ebf0 100644
|
||||
--- a/cmake/deps.txt
|
||||
+++ b/cmake/deps.txt
|
||||
@@ -22,7 +22,9 @@ dlpack;https://github.com/dmlc/dlpack/archive/refs/tags/v0.6.zip;4d565dd2e5b3132
|
||||
# it contains changes on top of 3.4.0 which are required to fix build issues.
|
||||
# Until the 3.4.1 release this is the best option we have.
|
||||
# Issue link: https://gitlab.com/libeigen/eigen/-/issues/2744
|
||||
-eigen;https://gitlab.com/libeigen/eigen/-/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;be8be39fdbc6e60e94fa7870b280707069b5b81a
|
||||
+# Moved to github mirror to avoid gitlab issues.Add commentMore actions
|
||||
+# Issue link: https://github.com/bazelbuild/bazel-central-registry/issues/4355
|
||||
+eigen;https://github.com/eigen-mirror/eigen/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;61418a349000ba7744a3ad03cf5071f22ebf860a
|
||||
flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v23.5.26.zip;59422c3b5e573dd192fead2834d25951f1c1670c
|
||||
fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494
|
||||
fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1
|
||||
143
compute/patches/pgaudit-parallel_workers-v14.patch
Normal file
143
compute/patches/pgaudit-parallel_workers-v14.patch
Normal file
@@ -0,0 +1,143 @@
|
||||
commit 7220bb3a3f23fa27207d77562dcc286f9a123313
|
||||
Author: Tristan Partin <tristan.partin@databricks.com>
|
||||
Date: 2025-06-23 02:09:31 +0000
|
||||
|
||||
Disable logging in parallel workers
|
||||
|
||||
When a query uses parallel workers, pgaudit will log the same query for
|
||||
every parallel worker. This is undesireable since it can result in log
|
||||
amplification for queries that use parallel workers.
|
||||
|
||||
Signed-off-by: Tristan Partin <tristan.partin@databricks.com>
|
||||
|
||||
diff --git a/expected/pgaudit.out b/expected/pgaudit.out
|
||||
index baa8011..a601375 100644
|
||||
--- a/expected/pgaudit.out
|
||||
+++ b/expected/pgaudit.out
|
||||
@@ -2563,6 +2563,37 @@ COMMIT;
|
||||
NOTICE: AUDIT: SESSION,12,4,MISC,COMMIT,,,COMMIT;,<not logged>
|
||||
DROP TABLE part_test;
|
||||
NOTICE: AUDIT: SESSION,13,1,DDL,DROP TABLE,,,DROP TABLE part_test;,<not logged>
|
||||
+--
|
||||
+-- Test logging in parallel workers
|
||||
+SET pgaudit.log = 'read';
|
||||
+SET pgaudit.log_client = on;
|
||||
+SET pgaudit.log_level = 'notice';
|
||||
+-- Force parallel execution for testing
|
||||
+SET max_parallel_workers_per_gather = 2;
|
||||
+SET parallel_tuple_cost = 0;
|
||||
+SET parallel_setup_cost = 0;
|
||||
+SET min_parallel_table_scan_size = 0;
|
||||
+SET min_parallel_index_scan_size = 0;
|
||||
+-- Create table with enough data to trigger parallel execution
|
||||
+CREATE TABLE parallel_test (id int, data text);
|
||||
+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';
|
||||
+SELECT count(*) FROM parallel_test;
|
||||
+NOTICE: AUDIT: SESSION,14,1,READ,SELECT,,,SELECT count(*) FROM parallel_test;,<not logged>
|
||||
+ count
|
||||
+-------
|
||||
+ 1000
|
||||
+(1 row)
|
||||
+
|
||||
+-- Cleanup parallel test
|
||||
+DROP TABLE parallel_test;
|
||||
+RESET max_parallel_workers_per_gather;
|
||||
+RESET parallel_tuple_cost;
|
||||
+RESET parallel_setup_cost;
|
||||
+RESET min_parallel_table_scan_size;
|
||||
+RESET min_parallel_index_scan_size;
|
||||
+RESET pgaudit.log;
|
||||
+RESET pgaudit.log_client;
|
||||
+RESET pgaudit.log_level;
|
||||
-- Cleanup
|
||||
-- Set client_min_messages up to warning to avoid noise
|
||||
SET client_min_messages = 'warning';
|
||||
diff --git a/pgaudit.c b/pgaudit.c
|
||||
index 5e6fd38..ac9ded2 100644
|
||||
--- a/pgaudit.c
|
||||
+++ b/pgaudit.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/htup_details.h"
|
||||
+#include "access/parallel.h"
|
||||
#include "access/sysattr.h"
|
||||
#include "access/xact.h"
|
||||
#include "access/relation.h"
|
||||
@@ -1303,7 +1304,7 @@ pgaudit_ExecutorStart_hook(QueryDesc *queryDesc, int eflags)
|
||||
{
|
||||
AuditEventStackItem *stackItem = NULL;
|
||||
|
||||
- if (!internalStatement)
|
||||
+ if (!internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Push the audit even onto the stack */
|
||||
stackItem = stack_push();
|
||||
@@ -1384,7 +1385,7 @@ pgaudit_ExecutorCheckPerms_hook(List *rangeTabls, bool abort)
|
||||
|
||||
/* Log DML if the audit role is valid or session logging is enabled */
|
||||
if ((auditOid != InvalidOid || auditLogBitmap != 0) &&
|
||||
- !IsAbortedTransactionBlockState())
|
||||
+ !IsAbortedTransactionBlockState() && !IsParallelWorker())
|
||||
{
|
||||
/* If auditLogRows is on, wait for rows processed to be set */
|
||||
if (auditLogRows && auditEventStack != NULL)
|
||||
@@ -1438,7 +1439,7 @@ pgaudit_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 c
|
||||
else
|
||||
standard_ExecutorRun(queryDesc, direction, count, execute_once);
|
||||
|
||||
- if (auditLogRows && !internalStatement)
|
||||
+ if (auditLogRows && !internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Find an item from the stack by the query memory context */
|
||||
stackItem = stack_find_context(queryDesc->estate->es_query_cxt);
|
||||
@@ -1458,7 +1459,7 @@ pgaudit_ExecutorEnd_hook(QueryDesc *queryDesc)
|
||||
AuditEventStackItem *stackItem = NULL;
|
||||
AuditEventStackItem *auditEventStackFull = NULL;
|
||||
|
||||
- if (auditLogRows && !internalStatement)
|
||||
+ if (auditLogRows && !internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Find an item from the stack by the query memory context */
|
||||
stackItem = stack_find_context(queryDesc->estate->es_query_cxt);
|
||||
diff --git a/sql/pgaudit.sql b/sql/pgaudit.sql
|
||||
index cc1374a..1870a60 100644
|
||||
--- a/sql/pgaudit.sql
|
||||
+++ b/sql/pgaudit.sql
|
||||
@@ -1612,6 +1612,36 @@ COMMIT;
|
||||
|
||||
DROP TABLE part_test;
|
||||
|
||||
+--
|
||||
+-- Test logging in parallel workers
|
||||
+SET pgaudit.log = 'read';
|
||||
+SET pgaudit.log_client = on;
|
||||
+SET pgaudit.log_level = 'notice';
|
||||
+
|
||||
+-- Force parallel execution for testing
|
||||
+SET max_parallel_workers_per_gather = 2;
|
||||
+SET parallel_tuple_cost = 0;
|
||||
+SET parallel_setup_cost = 0;
|
||||
+SET min_parallel_table_scan_size = 0;
|
||||
+SET min_parallel_index_scan_size = 0;
|
||||
+
|
||||
+-- Create table with enough data to trigger parallel execution
|
||||
+CREATE TABLE parallel_test (id int, data text);
|
||||
+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';
|
||||
+
|
||||
+SELECT count(*) FROM parallel_test;
|
||||
+
|
||||
+-- Cleanup parallel test
|
||||
+DROP TABLE parallel_test;
|
||||
+RESET max_parallel_workers_per_gather;
|
||||
+RESET parallel_tuple_cost;
|
||||
+RESET parallel_setup_cost;
|
||||
+RESET min_parallel_table_scan_size;
|
||||
+RESET min_parallel_index_scan_size;
|
||||
+RESET pgaudit.log;
|
||||
+RESET pgaudit.log_client;
|
||||
+RESET pgaudit.log_level;
|
||||
+
|
||||
-- Cleanup
|
||||
-- Set client_min_messages up to warning to avoid noise
|
||||
SET client_min_messages = 'warning';
|
||||
143
compute/patches/pgaudit-parallel_workers-v15.patch
Normal file
143
compute/patches/pgaudit-parallel_workers-v15.patch
Normal file
@@ -0,0 +1,143 @@
|
||||
commit 29dc2847f6255541992f18faf8a815dfab79631a
|
||||
Author: Tristan Partin <tristan.partin@databricks.com>
|
||||
Date: 2025-06-23 02:09:31 +0000
|
||||
|
||||
Disable logging in parallel workers
|
||||
|
||||
When a query uses parallel workers, pgaudit will log the same query for
|
||||
every parallel worker. This is undesireable since it can result in log
|
||||
amplification for queries that use parallel workers.
|
||||
|
||||
Signed-off-by: Tristan Partin <tristan.partin@databricks.com>
|
||||
|
||||
diff --git a/expected/pgaudit.out b/expected/pgaudit.out
|
||||
index b22560b..73f0327 100644
|
||||
--- a/expected/pgaudit.out
|
||||
+++ b/expected/pgaudit.out
|
||||
@@ -2563,6 +2563,37 @@ COMMIT;
|
||||
NOTICE: AUDIT: SESSION,12,4,MISC,COMMIT,,,COMMIT;,<not logged>
|
||||
DROP TABLE part_test;
|
||||
NOTICE: AUDIT: SESSION,13,1,DDL,DROP TABLE,,,DROP TABLE part_test;,<not logged>
|
||||
+--
|
||||
+-- Test logging in parallel workers
|
||||
+SET pgaudit.log = 'read';
|
||||
+SET pgaudit.log_client = on;
|
||||
+SET pgaudit.log_level = 'notice';
|
||||
+-- Force parallel execution for testing
|
||||
+SET max_parallel_workers_per_gather = 2;
|
||||
+SET parallel_tuple_cost = 0;
|
||||
+SET parallel_setup_cost = 0;
|
||||
+SET min_parallel_table_scan_size = 0;
|
||||
+SET min_parallel_index_scan_size = 0;
|
||||
+-- Create table with enough data to trigger parallel execution
|
||||
+CREATE TABLE parallel_test (id int, data text);
|
||||
+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';
|
||||
+SELECT count(*) FROM parallel_test;
|
||||
+NOTICE: AUDIT: SESSION,14,1,READ,SELECT,,,SELECT count(*) FROM parallel_test;,<not logged>
|
||||
+ count
|
||||
+-------
|
||||
+ 1000
|
||||
+(1 row)
|
||||
+
|
||||
+-- Cleanup parallel test
|
||||
+DROP TABLE parallel_test;
|
||||
+RESET max_parallel_workers_per_gather;
|
||||
+RESET parallel_tuple_cost;
|
||||
+RESET parallel_setup_cost;
|
||||
+RESET min_parallel_table_scan_size;
|
||||
+RESET min_parallel_index_scan_size;
|
||||
+RESET pgaudit.log;
|
||||
+RESET pgaudit.log_client;
|
||||
+RESET pgaudit.log_level;
|
||||
-- Cleanup
|
||||
-- Set client_min_messages up to warning to avoid noise
|
||||
SET client_min_messages = 'warning';
|
||||
diff --git a/pgaudit.c b/pgaudit.c
|
||||
index 5e6fd38..ac9ded2 100644
|
||||
--- a/pgaudit.c
|
||||
+++ b/pgaudit.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/htup_details.h"
|
||||
+#include "access/parallel.h"
|
||||
#include "access/sysattr.h"
|
||||
#include "access/xact.h"
|
||||
#include "access/relation.h"
|
||||
@@ -1303,7 +1304,7 @@ pgaudit_ExecutorStart_hook(QueryDesc *queryDesc, int eflags)
|
||||
{
|
||||
AuditEventStackItem *stackItem = NULL;
|
||||
|
||||
- if (!internalStatement)
|
||||
+ if (!internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Push the audit even onto the stack */
|
||||
stackItem = stack_push();
|
||||
@@ -1384,7 +1385,7 @@ pgaudit_ExecutorCheckPerms_hook(List *rangeTabls, bool abort)
|
||||
|
||||
/* Log DML if the audit role is valid or session logging is enabled */
|
||||
if ((auditOid != InvalidOid || auditLogBitmap != 0) &&
|
||||
- !IsAbortedTransactionBlockState())
|
||||
+ !IsAbortedTransactionBlockState() && !IsParallelWorker())
|
||||
{
|
||||
/* If auditLogRows is on, wait for rows processed to be set */
|
||||
if (auditLogRows && auditEventStack != NULL)
|
||||
@@ -1438,7 +1439,7 @@ pgaudit_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 c
|
||||
else
|
||||
standard_ExecutorRun(queryDesc, direction, count, execute_once);
|
||||
|
||||
- if (auditLogRows && !internalStatement)
|
||||
+ if (auditLogRows && !internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Find an item from the stack by the query memory context */
|
||||
stackItem = stack_find_context(queryDesc->estate->es_query_cxt);
|
||||
@@ -1458,7 +1459,7 @@ pgaudit_ExecutorEnd_hook(QueryDesc *queryDesc)
|
||||
AuditEventStackItem *stackItem = NULL;
|
||||
AuditEventStackItem *auditEventStackFull = NULL;
|
||||
|
||||
- if (auditLogRows && !internalStatement)
|
||||
+ if (auditLogRows && !internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Find an item from the stack by the query memory context */
|
||||
stackItem = stack_find_context(queryDesc->estate->es_query_cxt);
|
||||
diff --git a/sql/pgaudit.sql b/sql/pgaudit.sql
|
||||
index 8052426..7f0667b 100644
|
||||
--- a/sql/pgaudit.sql
|
||||
+++ b/sql/pgaudit.sql
|
||||
@@ -1612,6 +1612,36 @@ COMMIT;
|
||||
|
||||
DROP TABLE part_test;
|
||||
|
||||
+--
|
||||
+-- Test logging in parallel workers
|
||||
+SET pgaudit.log = 'read';
|
||||
+SET pgaudit.log_client = on;
|
||||
+SET pgaudit.log_level = 'notice';
|
||||
+
|
||||
+-- Force parallel execution for testing
|
||||
+SET max_parallel_workers_per_gather = 2;
|
||||
+SET parallel_tuple_cost = 0;
|
||||
+SET parallel_setup_cost = 0;
|
||||
+SET min_parallel_table_scan_size = 0;
|
||||
+SET min_parallel_index_scan_size = 0;
|
||||
+
|
||||
+-- Create table with enough data to trigger parallel execution
|
||||
+CREATE TABLE parallel_test (id int, data text);
|
||||
+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';
|
||||
+
|
||||
+SELECT count(*) FROM parallel_test;
|
||||
+
|
||||
+-- Cleanup parallel test
|
||||
+DROP TABLE parallel_test;
|
||||
+RESET max_parallel_workers_per_gather;
|
||||
+RESET parallel_tuple_cost;
|
||||
+RESET parallel_setup_cost;
|
||||
+RESET min_parallel_table_scan_size;
|
||||
+RESET min_parallel_index_scan_size;
|
||||
+RESET pgaudit.log;
|
||||
+RESET pgaudit.log_client;
|
||||
+RESET pgaudit.log_level;
|
||||
+
|
||||
-- Cleanup
|
||||
-- Set client_min_messages up to warning to avoid noise
|
||||
SET client_min_messages = 'warning';
|
||||
143
compute/patches/pgaudit-parallel_workers-v16.patch
Normal file
143
compute/patches/pgaudit-parallel_workers-v16.patch
Normal file
@@ -0,0 +1,143 @@
|
||||
commit cc708dde7ef2af2a8120d757102d2e34c0463a0f
|
||||
Author: Tristan Partin <tristan.partin@databricks.com>
|
||||
Date: 2025-06-23 02:09:31 +0000
|
||||
|
||||
Disable logging in parallel workers
|
||||
|
||||
When a query uses parallel workers, pgaudit will log the same query for
|
||||
every parallel worker. This is undesireable since it can result in log
|
||||
amplification for queries that use parallel workers.
|
||||
|
||||
Signed-off-by: Tristan Partin <tristan.partin@databricks.com>
|
||||
|
||||
diff --git a/expected/pgaudit.out b/expected/pgaudit.out
|
||||
index 8772054..9b66ac6 100644
|
||||
--- a/expected/pgaudit.out
|
||||
+++ b/expected/pgaudit.out
|
||||
@@ -2556,6 +2556,37 @@ DROP SERVER fdw_server;
|
||||
NOTICE: AUDIT: SESSION,11,1,DDL,DROP SERVER,,,DROP SERVER fdw_server;,<not logged>
|
||||
DROP EXTENSION postgres_fdw;
|
||||
NOTICE: AUDIT: SESSION,12,1,DDL,DROP EXTENSION,,,DROP EXTENSION postgres_fdw;,<not logged>
|
||||
+--
|
||||
+-- Test logging in parallel workers
|
||||
+SET pgaudit.log = 'read';
|
||||
+SET pgaudit.log_client = on;
|
||||
+SET pgaudit.log_level = 'notice';
|
||||
+-- Force parallel execution for testing
|
||||
+SET max_parallel_workers_per_gather = 2;
|
||||
+SET parallel_tuple_cost = 0;
|
||||
+SET parallel_setup_cost = 0;
|
||||
+SET min_parallel_table_scan_size = 0;
|
||||
+SET min_parallel_index_scan_size = 0;
|
||||
+-- Create table with enough data to trigger parallel execution
|
||||
+CREATE TABLE parallel_test (id int, data text);
|
||||
+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';
|
||||
+SELECT count(*) FROM parallel_test;
|
||||
+NOTICE: AUDIT: SESSION,13,1,READ,SELECT,,,SELECT count(*) FROM parallel_test;,<not logged>
|
||||
+ count
|
||||
+-------
|
||||
+ 1000
|
||||
+(1 row)
|
||||
+
|
||||
+-- Cleanup parallel test
|
||||
+DROP TABLE parallel_test;
|
||||
+RESET max_parallel_workers_per_gather;
|
||||
+RESET parallel_tuple_cost;
|
||||
+RESET parallel_setup_cost;
|
||||
+RESET min_parallel_table_scan_size;
|
||||
+RESET min_parallel_index_scan_size;
|
||||
+RESET pgaudit.log;
|
||||
+RESET pgaudit.log_client;
|
||||
+RESET pgaudit.log_level;
|
||||
-- Cleanup
|
||||
-- Set client_min_messages up to warning to avoid noise
|
||||
SET client_min_messages = 'warning';
|
||||
diff --git a/pgaudit.c b/pgaudit.c
|
||||
index 004d1f9..f061164 100644
|
||||
--- a/pgaudit.c
|
||||
+++ b/pgaudit.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/htup_details.h"
|
||||
+#include "access/parallel.h"
|
||||
#include "access/sysattr.h"
|
||||
#include "access/xact.h"
|
||||
#include "access/relation.h"
|
||||
@@ -1339,7 +1340,7 @@ pgaudit_ExecutorStart_hook(QueryDesc *queryDesc, int eflags)
|
||||
{
|
||||
AuditEventStackItem *stackItem = NULL;
|
||||
|
||||
- if (!internalStatement)
|
||||
+ if (!internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Push the audit even onto the stack */
|
||||
stackItem = stack_push();
|
||||
@@ -1420,7 +1421,7 @@ pgaudit_ExecutorCheckPerms_hook(List *rangeTabls, List *permInfos, bool abort)
|
||||
|
||||
/* Log DML if the audit role is valid or session logging is enabled */
|
||||
if ((auditOid != InvalidOid || auditLogBitmap != 0) &&
|
||||
- !IsAbortedTransactionBlockState())
|
||||
+ !IsAbortedTransactionBlockState() && !IsParallelWorker())
|
||||
{
|
||||
/* If auditLogRows is on, wait for rows processed to be set */
|
||||
if (auditLogRows && auditEventStack != NULL)
|
||||
@@ -1475,7 +1476,7 @@ pgaudit_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 c
|
||||
else
|
||||
standard_ExecutorRun(queryDesc, direction, count, execute_once);
|
||||
|
||||
- if (auditLogRows && !internalStatement)
|
||||
+ if (auditLogRows && !internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Find an item from the stack by the query memory context */
|
||||
stackItem = stack_find_context(queryDesc->estate->es_query_cxt);
|
||||
@@ -1495,7 +1496,7 @@ pgaudit_ExecutorEnd_hook(QueryDesc *queryDesc)
|
||||
AuditEventStackItem *stackItem = NULL;
|
||||
AuditEventStackItem *auditEventStackFull = NULL;
|
||||
|
||||
- if (auditLogRows && !internalStatement)
|
||||
+ if (auditLogRows && !internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Find an item from the stack by the query memory context */
|
||||
stackItem = stack_find_context(queryDesc->estate->es_query_cxt);
|
||||
diff --git a/sql/pgaudit.sql b/sql/pgaudit.sql
|
||||
index 6aae88b..de6d7fd 100644
|
||||
--- a/sql/pgaudit.sql
|
||||
+++ b/sql/pgaudit.sql
|
||||
@@ -1631,6 +1631,36 @@ DROP USER MAPPING FOR regress_user1 SERVER fdw_server;
|
||||
DROP SERVER fdw_server;
|
||||
DROP EXTENSION postgres_fdw;
|
||||
|
||||
+--
|
||||
+-- Test logging in parallel workers
|
||||
+SET pgaudit.log = 'read';
|
||||
+SET pgaudit.log_client = on;
|
||||
+SET pgaudit.log_level = 'notice';
|
||||
+
|
||||
+-- Force parallel execution for testing
|
||||
+SET max_parallel_workers_per_gather = 2;
|
||||
+SET parallel_tuple_cost = 0;
|
||||
+SET parallel_setup_cost = 0;
|
||||
+SET min_parallel_table_scan_size = 0;
|
||||
+SET min_parallel_index_scan_size = 0;
|
||||
+
|
||||
+-- Create table with enough data to trigger parallel execution
|
||||
+CREATE TABLE parallel_test (id int, data text);
|
||||
+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';
|
||||
+
|
||||
+SELECT count(*) FROM parallel_test;
|
||||
+
|
||||
+-- Cleanup parallel test
|
||||
+DROP TABLE parallel_test;
|
||||
+RESET max_parallel_workers_per_gather;
|
||||
+RESET parallel_tuple_cost;
|
||||
+RESET parallel_setup_cost;
|
||||
+RESET min_parallel_table_scan_size;
|
||||
+RESET min_parallel_index_scan_size;
|
||||
+RESET pgaudit.log;
|
||||
+RESET pgaudit.log_client;
|
||||
+RESET pgaudit.log_level;
|
||||
+
|
||||
-- Cleanup
|
||||
-- Set client_min_messages up to warning to avoid noise
|
||||
SET client_min_messages = 'warning';
|
||||
143
compute/patches/pgaudit-parallel_workers-v17.patch
Normal file
143
compute/patches/pgaudit-parallel_workers-v17.patch
Normal file
@@ -0,0 +1,143 @@
|
||||
commit 8d02e4c6c5e1e8676251b0717a46054267091cb4
|
||||
Author: Tristan Partin <tristan.partin@databricks.com>
|
||||
Date: 2025-06-23 02:09:31 +0000
|
||||
|
||||
Disable logging in parallel workers
|
||||
|
||||
When a query uses parallel workers, pgaudit will log the same query for
|
||||
every parallel worker. This is undesireable since it can result in log
|
||||
amplification for queries that use parallel workers.
|
||||
|
||||
Signed-off-by: Tristan Partin <tristan.partin@databricks.com>
|
||||
|
||||
diff --git a/expected/pgaudit.out b/expected/pgaudit.out
|
||||
index d696287..4b1059a 100644
|
||||
--- a/expected/pgaudit.out
|
||||
+++ b/expected/pgaudit.out
|
||||
@@ -2568,6 +2568,37 @@ DROP SERVER fdw_server;
|
||||
NOTICE: AUDIT: SESSION,11,1,DDL,DROP SERVER,,,DROP SERVER fdw_server,<not logged>
|
||||
DROP EXTENSION postgres_fdw;
|
||||
NOTICE: AUDIT: SESSION,12,1,DDL,DROP EXTENSION,,,DROP EXTENSION postgres_fdw,<not logged>
|
||||
+--
|
||||
+-- Test logging in parallel workers
|
||||
+SET pgaudit.log = 'read';
|
||||
+SET pgaudit.log_client = on;
|
||||
+SET pgaudit.log_level = 'notice';
|
||||
+-- Force parallel execution for testing
|
||||
+SET max_parallel_workers_per_gather = 2;
|
||||
+SET parallel_tuple_cost = 0;
|
||||
+SET parallel_setup_cost = 0;
|
||||
+SET min_parallel_table_scan_size = 0;
|
||||
+SET min_parallel_index_scan_size = 0;
|
||||
+-- Create table with enough data to trigger parallel execution
|
||||
+CREATE TABLE parallel_test (id int, data text);
|
||||
+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';
|
||||
+SELECT count(*) FROM parallel_test;
|
||||
+NOTICE: AUDIT: SESSION,13,1,READ,SELECT,,,SELECT count(*) FROM parallel_test,<not logged>
|
||||
+ count
|
||||
+-------
|
||||
+ 1000
|
||||
+(1 row)
|
||||
+
|
||||
+-- Cleanup parallel test
|
||||
+DROP TABLE parallel_test;
|
||||
+RESET max_parallel_workers_per_gather;
|
||||
+RESET parallel_tuple_cost;
|
||||
+RESET parallel_setup_cost;
|
||||
+RESET min_parallel_table_scan_size;
|
||||
+RESET min_parallel_index_scan_size;
|
||||
+RESET pgaudit.log;
|
||||
+RESET pgaudit.log_client;
|
||||
+RESET pgaudit.log_level;
|
||||
-- Cleanup
|
||||
-- Set client_min_messages up to warning to avoid noise
|
||||
SET client_min_messages = 'warning';
|
||||
diff --git a/pgaudit.c b/pgaudit.c
|
||||
index 1764af1..0e48875 100644
|
||||
--- a/pgaudit.c
|
||||
+++ b/pgaudit.c
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/htup_details.h"
|
||||
+#include "access/parallel.h"
|
||||
#include "access/sysattr.h"
|
||||
#include "access/xact.h"
|
||||
#include "access/relation.h"
|
||||
@@ -1406,7 +1407,7 @@ pgaudit_ExecutorStart_hook(QueryDesc *queryDesc, int eflags)
|
||||
{
|
||||
AuditEventStackItem *stackItem = NULL;
|
||||
|
||||
- if (!internalStatement)
|
||||
+ if (!internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Push the audit event onto the stack */
|
||||
stackItem = stack_push();
|
||||
@@ -1489,7 +1490,7 @@ pgaudit_ExecutorCheckPerms_hook(List *rangeTabls, List *permInfos, bool abort)
|
||||
|
||||
/* Log DML if the audit role is valid or session logging is enabled */
|
||||
if ((auditOid != InvalidOid || auditLogBitmap != 0) &&
|
||||
- !IsAbortedTransactionBlockState())
|
||||
+ !IsAbortedTransactionBlockState() && !IsParallelWorker())
|
||||
{
|
||||
/* If auditLogRows is on, wait for rows processed to be set */
|
||||
if (auditLogRows && auditEventStack != NULL)
|
||||
@@ -1544,7 +1545,7 @@ pgaudit_ExecutorRun_hook(QueryDesc *queryDesc, ScanDirection direction, uint64 c
|
||||
else
|
||||
standard_ExecutorRun(queryDesc, direction, count, execute_once);
|
||||
|
||||
- if (auditLogRows && !internalStatement)
|
||||
+ if (auditLogRows && !internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Find an item from the stack by the query memory context */
|
||||
stackItem = stack_find_context(queryDesc->estate->es_query_cxt);
|
||||
@@ -1564,7 +1565,7 @@ pgaudit_ExecutorEnd_hook(QueryDesc *queryDesc)
|
||||
AuditEventStackItem *stackItem = NULL;
|
||||
AuditEventStackItem *auditEventStackFull = NULL;
|
||||
|
||||
- if (auditLogRows && !internalStatement)
|
||||
+ if (auditLogRows && !internalStatement && !IsParallelWorker())
|
||||
{
|
||||
/* Find an item from the stack by the query memory context */
|
||||
stackItem = stack_find_context(queryDesc->estate->es_query_cxt);
|
||||
diff --git a/sql/pgaudit.sql b/sql/pgaudit.sql
|
||||
index e161f01..c873098 100644
|
||||
--- a/sql/pgaudit.sql
|
||||
+++ b/sql/pgaudit.sql
|
||||
@@ -1637,6 +1637,36 @@ DROP USER MAPPING FOR regress_user1 SERVER fdw_server;
|
||||
DROP SERVER fdw_server;
|
||||
DROP EXTENSION postgres_fdw;
|
||||
|
||||
+--
|
||||
+-- Test logging in parallel workers
|
||||
+SET pgaudit.log = 'read';
|
||||
+SET pgaudit.log_client = on;
|
||||
+SET pgaudit.log_level = 'notice';
|
||||
+
|
||||
+-- Force parallel execution for testing
|
||||
+SET max_parallel_workers_per_gather = 2;
|
||||
+SET parallel_tuple_cost = 0;
|
||||
+SET parallel_setup_cost = 0;
|
||||
+SET min_parallel_table_scan_size = 0;
|
||||
+SET min_parallel_index_scan_size = 0;
|
||||
+
|
||||
+-- Create table with enough data to trigger parallel execution
|
||||
+CREATE TABLE parallel_test (id int, data text);
|
||||
+INSERT INTO parallel_test SELECT generate_series(1, 1000), 'test data';
|
||||
+
|
||||
+SELECT count(*) FROM parallel_test;
|
||||
+
|
||||
+-- Cleanup parallel test
|
||||
+DROP TABLE parallel_test;
|
||||
+RESET max_parallel_workers_per_gather;
|
||||
+RESET parallel_tuple_cost;
|
||||
+RESET parallel_setup_cost;
|
||||
+RESET min_parallel_table_scan_size;
|
||||
+RESET min_parallel_index_scan_size;
|
||||
+RESET pgaudit.log;
|
||||
+RESET pgaudit.log_client;
|
||||
+RESET pgaudit.log_level;
|
||||
+
|
||||
-- Cleanup
|
||||
-- Set client_min_messages up to warning to avoid noise
|
||||
SET client_min_messages = 'warning';
|
||||
@@ -26,7 +26,7 @@ commands:
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
|
||||
- name: pgbouncer-exporter
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
@@ -59,7 +59,7 @@ files:
|
||||
# the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
|
||||
# resolve host" log messages that they generate.
|
||||
Defaults !fqdn
|
||||
|
||||
|
||||
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
|
||||
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
|
||||
# regardless of hostname (ALL)
|
||||
|
||||
@@ -26,7 +26,7 @@ commands:
|
||||
- name: postgres-exporter
|
||||
user: nobody
|
||||
sysvInitAction: respawn
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
|
||||
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter pgaudit.log=none" /bin/postgres_exporter --config.file=/etc/postgres_exporter.yml'
|
||||
- name: pgbouncer-exporter
|
||||
user: postgres
|
||||
sysvInitAction: respawn
|
||||
@@ -59,7 +59,7 @@ files:
|
||||
# the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
|
||||
# resolve host" log messages that they generate.
|
||||
Defaults !fqdn
|
||||
|
||||
|
||||
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
|
||||
# and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
|
||||
# regardless of hostname (ALL)
|
||||
|
||||
@@ -27,6 +27,7 @@ fail.workspace = true
|
||||
flate2.workspace = true
|
||||
futures.workspace = true
|
||||
http.workspace = true
|
||||
hostname-validator = "1.1"
|
||||
indexmap.workspace = true
|
||||
itertools.workspace = true
|
||||
jsonwebtoken.workspace = true
|
||||
@@ -38,6 +39,7 @@ once_cell.workspace = true
|
||||
opentelemetry.workspace = true
|
||||
opentelemetry_sdk.workspace = true
|
||||
p256 = { version = "0.13", features = ["pem"] }
|
||||
pageserver_page_api.workspace = true
|
||||
postgres.workspace = true
|
||||
regex.workspace = true
|
||||
reqwest = { workspace = true, features = ["json"] }
|
||||
@@ -53,6 +55,7 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
|
||||
tokio-postgres.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tonic.workspace = true
|
||||
tower-otel.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-opentelemetry.workspace = true
|
||||
@@ -64,6 +67,7 @@ uuid.workspace = true
|
||||
walkdir.workspace = true
|
||||
x509-cert.workspace = true
|
||||
|
||||
postgres_versioninfo.workspace = true
|
||||
postgres_initdb.workspace = true
|
||||
compute_api.workspace = true
|
||||
utils.workspace = true
|
||||
|
||||
@@ -36,6 +36,8 @@
|
||||
use std::ffi::OsString;
|
||||
use std::fs::File;
|
||||
use std::process::exit;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::sync::mpsc;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
@@ -124,6 +126,10 @@ struct Cli {
|
||||
/// Interval in seconds for collecting installed extensions statistics
|
||||
#[arg(long, default_value = "3600")]
|
||||
pub installed_extensions_collection_interval: u64,
|
||||
|
||||
/// Run in development mode, skipping VM-specific operations like process termination
|
||||
#[arg(long, action = clap::ArgAction::SetTrue)]
|
||||
pub dev: bool,
|
||||
}
|
||||
|
||||
impl Cli {
|
||||
@@ -159,7 +165,7 @@ fn main() -> Result<()> {
|
||||
.build()?;
|
||||
let _rt_guard = runtime.enter();
|
||||
|
||||
runtime.block_on(init())?;
|
||||
runtime.block_on(init(cli.dev))?;
|
||||
|
||||
// enable core dumping for all child processes
|
||||
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
|
||||
@@ -186,7 +192,9 @@ fn main() -> Result<()> {
|
||||
cgroup: cli.cgroup,
|
||||
#[cfg(target_os = "linux")]
|
||||
vm_monitor_addr: cli.vm_monitor_addr,
|
||||
installed_extensions_collection_interval: cli.installed_extensions_collection_interval,
|
||||
installed_extensions_collection_interval: Arc::new(AtomicU64::new(
|
||||
cli.installed_extensions_collection_interval,
|
||||
)),
|
||||
},
|
||||
config,
|
||||
)?;
|
||||
@@ -198,13 +206,13 @@ fn main() -> Result<()> {
|
||||
deinit_and_exit(exit_code);
|
||||
}
|
||||
|
||||
async fn init() -> Result<()> {
|
||||
async fn init(dev_mode: bool) -> Result<()> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL).await?;
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
thread::spawn(move || {
|
||||
for sig in signals.forever() {
|
||||
handle_exit_signal(sig);
|
||||
handle_exit_signal(sig, dev_mode);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -263,9 +271,9 @@ fn deinit_and_exit(exit_code: Option<i32>) -> ! {
|
||||
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
|
||||
/// to prevent leakage. TODO: it is better to convert compute_ctl to async and
|
||||
/// wait for termination which would be easy then.
|
||||
fn handle_exit_signal(sig: i32) {
|
||||
fn handle_exit_signal(sig: i32, dev_mode: bool) {
|
||||
info!("received {sig} termination signal");
|
||||
forward_termination_signal();
|
||||
forward_termination_signal(dev_mode);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ use anyhow::{Context, bail};
|
||||
use aws_config::BehaviorVersion;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use clap::{Parser, Subcommand};
|
||||
use compute_tools::extension_server::{PostgresMajorVersion, get_pg_version};
|
||||
use compute_tools::extension_server::get_pg_version;
|
||||
use nix::unistd::Pid;
|
||||
use std::ops::Not;
|
||||
use tracing::{Instrument, error, info, info_span, warn};
|
||||
@@ -179,12 +179,8 @@ impl PostgresProcess {
|
||||
.await
|
||||
.context("create pgdata directory")?;
|
||||
|
||||
let pg_version = match get_pg_version(self.pgbin.as_ref()) {
|
||||
PostgresMajorVersion::V14 => 14,
|
||||
PostgresMajorVersion::V15 => 15,
|
||||
PostgresMajorVersion::V16 => 16,
|
||||
PostgresMajorVersion::V17 => 17,
|
||||
};
|
||||
let pg_version = get_pg_version(self.pgbin.as_ref());
|
||||
|
||||
postgres_initdb::do_run_initdb(postgres_initdb::RunInitdbArgs {
|
||||
superuser: initdb_user,
|
||||
locale: DEFAULT_LOCALE, // XXX: this shouldn't be hard-coded,
|
||||
@@ -486,10 +482,8 @@ async fn cmd_pgdata(
|
||||
};
|
||||
|
||||
let superuser = "cloud_admin";
|
||||
let destination_connstring = format!(
|
||||
"host=localhost port={} user={} dbname=neondb",
|
||||
pg_port, superuser
|
||||
);
|
||||
let destination_connstring =
|
||||
format!("host=localhost port={pg_port} user={superuser} dbname=neondb");
|
||||
|
||||
let pgdata_dir = workdir.join("pgdata");
|
||||
let mut proc = PostgresProcess::new(pgdata_dir.clone(), pg_bin_dir.clone(), pg_lib_dir.clone());
|
||||
|
||||
@@ -69,7 +69,7 @@ impl clap::builder::TypedValueParser for S3Uri {
|
||||
S3Uri::from_str(value_str).map_err(|e| {
|
||||
clap::Error::raw(
|
||||
clap::error::ErrorKind::InvalidValue,
|
||||
format!("Failed to parse S3 URI: {}", e),
|
||||
format!("Failed to parse S3 URI: {e}"),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<Cat
|
||||
|
||||
spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
|
||||
@@ -119,7 +119,7 @@ pub async fn get_database_schema(
|
||||
_ => {
|
||||
let mut lines = stderr_reader.lines();
|
||||
if let Some(line) = lines.next_line().await? {
|
||||
if line.contains(&format!("FATAL: database \"{}\" does not exist", dbname)) {
|
||||
if line.contains(&format!("FATAL: database \"{dbname}\" does not exist")) {
|
||||
return Err(SchemaDumpError::DatabaseDoesNotExist);
|
||||
}
|
||||
warn!("pg_dump stderr: {}", line)
|
||||
|
||||
@@ -6,7 +6,7 @@ use compute_api::responses::{
|
||||
LfcPrewarmState, TlsConfig,
|
||||
};
|
||||
use compute_api::spec::{
|
||||
ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent,
|
||||
ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverProtocol, PgIdent,
|
||||
};
|
||||
use futures::StreamExt;
|
||||
use futures::future::join_all;
|
||||
@@ -15,17 +15,17 @@ use itertools::Itertools;
|
||||
use nix::sys::signal::{Signal, kill};
|
||||
use nix::unistd::Pid;
|
||||
use once_cell::sync::Lazy;
|
||||
use pageserver_page_api::{self as page_api, BaseBackupCompression};
|
||||
use postgres;
|
||||
use postgres::NoTls;
|
||||
use postgres::error::SqlState;
|
||||
use remote_storage::{DownloadError, RemotePath};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::net::SocketAddr;
|
||||
use std::os::unix::fs::{PermissionsExt, symlink};
|
||||
use std::path::Path;
|
||||
use std::process::{Command, Stdio};
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Condvar, Mutex, RwLock};
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{env, fs};
|
||||
@@ -35,6 +35,8 @@ use url::Url;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::measured_stream::MeasuredReader;
|
||||
use utils::pid_file;
|
||||
use utils::shard::{ShardCount, ShardIndex, ShardNumber};
|
||||
|
||||
use crate::configurator::launch_configurator;
|
||||
use crate::disk_quota::set_disk_quota;
|
||||
@@ -44,6 +46,7 @@ use crate::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use crate::metrics::COMPUTE_CTL_UP;
|
||||
use crate::monitor::launch_monitor;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::pgbouncer::*;
|
||||
use crate::rsyslog::{
|
||||
PostgresLogsRsyslogConfig, configure_audit_rsyslog, configure_postgres_logs_export,
|
||||
launch_pgaudit_gc,
|
||||
@@ -67,6 +70,7 @@ pub static BUILD_TAG: Lazy<String> = Lazy::new(|| {
|
||||
.unwrap_or(BUILD_TAG_DEFAULT)
|
||||
.to_string()
|
||||
});
|
||||
const DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL: u64 = 3600;
|
||||
|
||||
/// Static configuration params that don't change after startup. These mostly
|
||||
/// come from the CLI args, or are derived from them.
|
||||
@@ -100,7 +104,7 @@ pub struct ComputeNodeParams {
|
||||
pub remote_ext_base_url: Option<Url>,
|
||||
|
||||
/// Interval for installed extensions collection
|
||||
pub installed_extensions_collection_interval: u64,
|
||||
pub installed_extensions_collection_interval: Arc<AtomicU64>,
|
||||
}
|
||||
|
||||
/// Compute node info shared across several `compute_ctl` threads.
|
||||
@@ -123,6 +127,9 @@ pub struct ComputeNode {
|
||||
// key: ext_archive_name, value: started download time, download_completed?
|
||||
pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
|
||||
pub compute_ctl_config: ComputeCtlConfig,
|
||||
|
||||
/// Handle to the extension stats collection task
|
||||
extension_stats_task: Mutex<Option<tokio::task::JoinHandle<()>>>,
|
||||
}
|
||||
|
||||
// store some metrics about download size that might impact startup time
|
||||
@@ -161,6 +168,10 @@ pub struct ComputeState {
|
||||
pub lfc_prewarm_state: LfcPrewarmState,
|
||||
pub lfc_offload_state: LfcOffloadState,
|
||||
|
||||
/// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if
|
||||
/// mode == ComputeMode::Primary. None otherwise
|
||||
pub terminate_flush_lsn: Option<Lsn>,
|
||||
|
||||
pub metrics: ComputeMetrics,
|
||||
}
|
||||
|
||||
@@ -176,6 +187,7 @@ impl ComputeState {
|
||||
metrics: ComputeMetrics::default(),
|
||||
lfc_prewarm_state: LfcPrewarmState::default(),
|
||||
lfc_offload_state: LfcOffloadState::default(),
|
||||
terminate_flush_lsn: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,10 +223,50 @@ pub struct ParsedSpec {
|
||||
pub pageserver_connstr: String,
|
||||
pub safekeeper_connstrings: Vec<String>,
|
||||
pub storage_auth_token: Option<String>,
|
||||
pub endpoint_storage_addr: Option<SocketAddr>,
|
||||
/// k8s dns name and port
|
||||
pub endpoint_storage_addr: Option<String>,
|
||||
pub endpoint_storage_token: Option<String>,
|
||||
}
|
||||
|
||||
impl ParsedSpec {
|
||||
pub fn validate(&self) -> Result<(), String> {
|
||||
// Only Primary nodes are using safekeeper_connstrings, and at the moment
|
||||
// this method only validates that part of the specs.
|
||||
if self.spec.mode != ComputeMode::Primary {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// While it seems like a good idea to check for an odd number of entries in
|
||||
// the safekeepers connection string, changes to the list of safekeepers might
|
||||
// incur appending a new server to a list of 3, in which case a list of 4
|
||||
// entries is okay in production.
|
||||
//
|
||||
// Still we want unique entries, and at least one entry in the vector
|
||||
if self.safekeeper_connstrings.is_empty() {
|
||||
return Err(String::from("safekeeper_connstrings is empty"));
|
||||
}
|
||||
|
||||
// check for uniqueness of the connection strings in the set
|
||||
let mut connstrings = self.safekeeper_connstrings.clone();
|
||||
|
||||
connstrings.sort();
|
||||
let mut previous = &connstrings[0];
|
||||
|
||||
for current in connstrings.iter().skip(1) {
|
||||
// duplicate entry?
|
||||
if current == previous {
|
||||
return Err(format!(
|
||||
"duplicate entry in safekeeper_connstrings: {current}!",
|
||||
));
|
||||
}
|
||||
|
||||
previous = current;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
type Error = String;
|
||||
fn try_from(spec: ComputeSpec) -> Result<Self, String> {
|
||||
@@ -244,6 +296,7 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
} else {
|
||||
spec.safekeeper_connstrings.clone()
|
||||
};
|
||||
|
||||
let storage_auth_token = spec.storage_auth_token.clone();
|
||||
let tenant_id: TenantId = if let Some(tenant_id) = spec.tenant_id {
|
||||
tenant_id
|
||||
@@ -266,19 +319,16 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
.or(Err("invalid timeline id"))?
|
||||
};
|
||||
|
||||
let endpoint_storage_addr: Option<SocketAddr> = spec
|
||||
let endpoint_storage_addr: Option<String> = spec
|
||||
.endpoint_storage_addr
|
||||
.clone()
|
||||
.or_else(|| spec.cluster.settings.find("neon.endpoint_storage_addr"))
|
||||
.unwrap_or_default()
|
||||
.parse()
|
||||
.ok();
|
||||
.or_else(|| spec.cluster.settings.find("neon.endpoint_storage_addr"));
|
||||
let endpoint_storage_token = spec
|
||||
.endpoint_storage_token
|
||||
.clone()
|
||||
.or_else(|| spec.cluster.settings.find("neon.endpoint_storage_token"));
|
||||
|
||||
Ok(ParsedSpec {
|
||||
let res = ParsedSpec {
|
||||
spec,
|
||||
pageserver_connstr,
|
||||
safekeeper_connstrings,
|
||||
@@ -287,7 +337,11 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
timeline_id,
|
||||
endpoint_storage_addr,
|
||||
endpoint_storage_token,
|
||||
})
|
||||
};
|
||||
|
||||
// Now check validity of the parsed specification
|
||||
res.validate()?;
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -354,14 +408,11 @@ impl ComputeNode {
|
||||
// that can affect `compute_ctl` and prevent it from properly configuring the database schema.
|
||||
// Unset them via connection string options before connecting to the database.
|
||||
// N.B. keep it in sync with `ZENITH_OPTIONS` in `get_maintenance_client()`.
|
||||
//
|
||||
// TODO(ololobus): we currently pass `-c default_transaction_read_only=off` from control plane
|
||||
// as well. After rolling out this code, we can remove this parameter from control plane.
|
||||
// In the meantime, double-passing is fine, the last value is applied.
|
||||
// See: <https://github.com/neondatabase/cloud/blob/133dd8c4dbbba40edfbad475bf6a45073ca63faf/goapp/controlplane/internal/pkg/compute/provisioner/provisioner_common.go#L70>
|
||||
const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
|
||||
const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0 -c pgaudit.log=none";
|
||||
let options = match conn_conf.get_options() {
|
||||
Some(options) => format!("{} {}", options, EXTRA_OPTIONS),
|
||||
// Allow the control plane to override any options set by the
|
||||
// compute
|
||||
Some(options) => format!("{EXTRA_OPTIONS} {options}"),
|
||||
None => EXTRA_OPTIONS.to_string(),
|
||||
};
|
||||
conn_conf.options(&options);
|
||||
@@ -381,6 +432,7 @@ impl ComputeNode {
|
||||
state_changed: Condvar::new(),
|
||||
ext_download_progress: RwLock::new(HashMap::new()),
|
||||
compute_ctl_config: config.compute_ctl_config,
|
||||
extension_stats_task: Mutex::new(None),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -468,6 +520,9 @@ impl ComputeNode {
|
||||
None
|
||||
};
|
||||
|
||||
// Terminate the extension stats collection task
|
||||
this.terminate_extension_stats_task();
|
||||
|
||||
// Terminate the vm_monitor so it releases the file watcher on
|
||||
// /sys/fs/cgroup/neon-postgres.
|
||||
// Note: the vm-monitor only runs on linux because it requires cgroups.
|
||||
@@ -489,12 +544,21 @@ impl ComputeNode {
|
||||
// Reap the postgres process
|
||||
delay_exit |= this.cleanup_after_postgres_exit()?;
|
||||
|
||||
// /terminate returns LSN. If we don't sleep at all, connection will break and we
|
||||
// won't get result. If we sleep too much, tests will take significantly longer
|
||||
// and Github Action run will error out
|
||||
let sleep_duration = if delay_exit {
|
||||
Duration::from_secs(30)
|
||||
} else {
|
||||
Duration::from_millis(300)
|
||||
};
|
||||
|
||||
// If launch failed, keep serving HTTP requests for a while, so the cloud
|
||||
// control plane can get the actual error.
|
||||
if delay_exit {
|
||||
info!("giving control plane 30s to collect the error before shutdown");
|
||||
std::thread::sleep(Duration::from_secs(30));
|
||||
}
|
||||
std::thread::sleep(sleep_duration);
|
||||
Ok(exit_code)
|
||||
}
|
||||
|
||||
@@ -695,10 +759,15 @@ impl ComputeNode {
|
||||
// Configure and start rsyslog for compliance audit logging
|
||||
match pspec.spec.audit_log_level {
|
||||
ComputeAudit::Hipaa | ComputeAudit::Extended | ComputeAudit::Full => {
|
||||
let remote_endpoint =
|
||||
let remote_tls_endpoint =
|
||||
std::env::var("AUDIT_LOGGING_TLS_ENDPOINT").unwrap_or("".to_string());
|
||||
let remote_plain_endpoint =
|
||||
std::env::var("AUDIT_LOGGING_ENDPOINT").unwrap_or("".to_string());
|
||||
if remote_endpoint.is_empty() {
|
||||
anyhow::bail!("AUDIT_LOGGING_ENDPOINT is empty");
|
||||
|
||||
if remote_plain_endpoint.is_empty() && remote_tls_endpoint.is_empty() {
|
||||
anyhow::bail!(
|
||||
"AUDIT_LOGGING_ENDPOINT and AUDIT_LOGGING_TLS_ENDPOINT are both empty"
|
||||
);
|
||||
}
|
||||
|
||||
let log_directory_path = Path::new(&self.params.pgdata).join("log");
|
||||
@@ -714,7 +783,8 @@ impl ComputeNode {
|
||||
log_directory_path.clone(),
|
||||
endpoint_id,
|
||||
project_id,
|
||||
&remote_endpoint,
|
||||
&remote_plain_endpoint,
|
||||
&remote_tls_endpoint,
|
||||
)?;
|
||||
|
||||
// Launch a background task to clean up the audit logs
|
||||
@@ -866,20 +936,25 @@ impl ComputeNode {
|
||||
// Maybe sync safekeepers again, to speed up next startup
|
||||
let compute_state = self.state.lock().unwrap().clone();
|
||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
|
||||
let lsn = if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
|
||||
info!("syncing safekeepers on shutdown");
|
||||
let storage_auth_token = pspec.storage_auth_token.clone();
|
||||
let lsn = self.sync_safekeepers(storage_auth_token)?;
|
||||
info!("synced safekeepers at lsn {lsn}");
|
||||
}
|
||||
info!(%lsn, "synced safekeepers");
|
||||
Some(lsn)
|
||||
} else {
|
||||
info!("not primary, not syncing safekeepers");
|
||||
None
|
||||
};
|
||||
|
||||
let mut delay_exit = false;
|
||||
let mut state = self.state.lock().unwrap();
|
||||
if state.status == ComputeStatus::TerminationPending {
|
||||
state.terminate_flush_lsn = lsn;
|
||||
if let ComputeStatus::TerminationPending { mode } = state.status {
|
||||
state.status = ComputeStatus::Terminated;
|
||||
self.state_changed.notify_all();
|
||||
// we were asked to terminate gracefully, don't exit to avoid restart
|
||||
delay_exit = true
|
||||
delay_exit = mode == compute_api::responses::TerminateMode::Fast
|
||||
}
|
||||
drop(state);
|
||||
|
||||
@@ -936,13 +1011,80 @@ impl ComputeNode {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Get basebackup from the libpq connection to pageserver using `connstr` and
|
||||
// unarchive it to `pgdata` directory overriding all its previous content.
|
||||
/// Fetches a basebackup from the Pageserver using the compute state's Pageserver connstring and
|
||||
/// unarchives it to `pgdata` directory, replacing any existing contents.
|
||||
#[instrument(skip_all, fields(%lsn))]
|
||||
fn try_get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
|
||||
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
let start_time = Instant::now();
|
||||
|
||||
let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
|
||||
let started = Instant::now();
|
||||
|
||||
let (connected, size) = match PageserverProtocol::from_connstring(shard0_connstr)? {
|
||||
PageserverProtocol::Libpq => self.try_get_basebackup_libpq(spec, lsn)?,
|
||||
PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,
|
||||
};
|
||||
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.metrics.pageserver_connect_micros =
|
||||
connected.duration_since(started).as_micros() as u64;
|
||||
state.metrics.basebackup_bytes = size as u64;
|
||||
state.metrics.basebackup_ms = started.elapsed().as_millis() as u64;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when
|
||||
/// the connection was established, and the (compressed) size of the basebackup.
|
||||
fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
|
||||
let shard0_connstr = spec
|
||||
.pageserver_connstr
|
||||
.split(',')
|
||||
.next()
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let shard_index = match spec.pageserver_connstr.split(',').count() as u8 {
|
||||
0 | 1 => ShardIndex::unsharded(),
|
||||
count => ShardIndex::new(ShardNumber(0), ShardCount(count)),
|
||||
};
|
||||
|
||||
let (reader, connected) = tokio::runtime::Handle::current().block_on(async move {
|
||||
let mut client = page_api::Client::new(
|
||||
shard0_connstr,
|
||||
spec.tenant_id,
|
||||
spec.timeline_id,
|
||||
shard_index,
|
||||
spec.storage_auth_token.clone(),
|
||||
None, // NB: base backups use payload compression
|
||||
)
|
||||
.await?;
|
||||
let connected = Instant::now();
|
||||
let reader = client
|
||||
.get_base_backup(page_api::GetBaseBackupRequest {
|
||||
lsn: (lsn != Lsn(0)).then_some(lsn),
|
||||
compression: BaseBackupCompression::Gzip,
|
||||
replica: spec.spec.mode != ComputeMode::Primary,
|
||||
full: false,
|
||||
})
|
||||
.await?;
|
||||
anyhow::Ok((reader, connected))
|
||||
})?;
|
||||
|
||||
let mut reader = MeasuredReader::new(tokio_util::io::SyncIoBridge::new(reader));
|
||||
|
||||
// Set `ignore_zeros` so that unpack() reads the entire stream and doesn't just stop at the
|
||||
// end-of-archive marker. If the server errors, the tar::Builder drop handler will write an
|
||||
// end-of-archive marker before the error is emitted, and we would not see the error.
|
||||
let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut reader));
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.params.pgdata)?;
|
||||
|
||||
Ok((connected, reader.get_byte_count()))
|
||||
}
|
||||
|
||||
/// Fetches a basebackup via libpq. The connstring must use postgresql://. Returns the timestamp
|
||||
/// when the connection was established, and the (compressed) size of the basebackup.
|
||||
fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
|
||||
let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
|
||||
let mut config = postgres::Config::from_str(shard0_connstr)?;
|
||||
|
||||
@@ -956,16 +1098,14 @@ impl ComputeNode {
|
||||
}
|
||||
|
||||
config.application_name("compute_ctl");
|
||||
if let Some(spec) = &compute_state.pspec {
|
||||
config.options(&format!(
|
||||
"-c neon.compute_mode={}",
|
||||
spec.spec.mode.to_type_str()
|
||||
));
|
||||
}
|
||||
config.options(&format!(
|
||||
"-c neon.compute_mode={}",
|
||||
spec.spec.mode.to_type_str()
|
||||
));
|
||||
|
||||
// Connect to pageserver
|
||||
let mut client = config.connect(NoTls)?;
|
||||
let pageserver_connect_micros = start_time.elapsed().as_micros() as u64;
|
||||
let connected = Instant::now();
|
||||
|
||||
let basebackup_cmd = match lsn {
|
||||
Lsn(0) => {
|
||||
@@ -1002,16 +1142,13 @@ impl ComputeNode {
|
||||
// Set `ignore_zeros` so that unpack() reads all the Copy data and
|
||||
// doesn't stop at the end-of-archive marker. Otherwise, if the server
|
||||
// sends an Error after finishing the tarball, we will not notice it.
|
||||
// The tar::Builder drop handler will write an end-of-archive marker
|
||||
// before emitting the error, and we would not see it otherwise.
|
||||
let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader));
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.params.pgdata)?;
|
||||
|
||||
// Report metrics
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.metrics.pageserver_connect_micros = pageserver_connect_micros;
|
||||
state.metrics.basebackup_bytes = measured_reader.get_byte_count() as u64;
|
||||
state.metrics.basebackup_ms = start_time.elapsed().as_millis() as u64;
|
||||
Ok(())
|
||||
Ok((connected, measured_reader.get_byte_count()))
|
||||
}
|
||||
|
||||
// Gets the basebackup in a retry loop
|
||||
@@ -1064,7 +1201,7 @@ impl ComputeNode {
|
||||
let sk_configs = sk_connstrs.into_iter().map(|connstr| {
|
||||
// Format connstr
|
||||
let id = connstr.clone();
|
||||
let connstr = format!("postgresql://no_user@{}", connstr);
|
||||
let connstr = format!("postgresql://no_user@{connstr}");
|
||||
let options = format!(
|
||||
"-c timeline_id={} tenant_id={}",
|
||||
pspec.timeline_id, pspec.tenant_id
|
||||
@@ -1427,7 +1564,7 @@ impl ComputeNode {
|
||||
let (mut client, connection) = conf.connect(NoTls).await?;
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1548,6 +1685,8 @@ impl ComputeNode {
|
||||
tls_config = self.compute_ctl_config.tls.clone();
|
||||
}
|
||||
|
||||
self.update_installed_extensions_collection_interval(&spec);
|
||||
|
||||
let max_concurrent_connections = self.max_service_connections(compute_state, &spec);
|
||||
|
||||
// Merge-apply spec & changes to PostgreSQL state.
|
||||
@@ -1570,7 +1709,7 @@ impl ComputeNode {
|
||||
Ok((mut client, connection)) => {
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
if let Err(e) = handle_migrations(&mut client).await {
|
||||
@@ -1612,6 +1751,8 @@ impl ComputeNode {
|
||||
|
||||
let tls_config = self.tls_config(&spec);
|
||||
|
||||
self.update_installed_extensions_collection_interval(&spec);
|
||||
|
||||
if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
|
||||
info!("tuning pgbouncer");
|
||||
|
||||
@@ -1750,7 +1891,7 @@ impl ComputeNode {
|
||||
|
||||
// exit loop
|
||||
ComputeStatus::Failed
|
||||
| ComputeStatus::TerminationPending
|
||||
| ComputeStatus::TerminationPending { .. }
|
||||
| ComputeStatus::Terminated => break 'cert_update,
|
||||
|
||||
// wait
|
||||
@@ -1874,7 +2015,7 @@ impl ComputeNode {
|
||||
let (client, connection) = connect_result.unwrap();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
let result = client
|
||||
@@ -2043,7 +2184,7 @@ LIMIT 100",
|
||||
db_client
|
||||
.simple_query(&query)
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {}", query))?;
|
||||
.with_context(|| format!("Failed to execute query: {query}"))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -2070,7 +2211,7 @@ LIMIT 100",
|
||||
let version: Option<ExtVersion> = db_client
|
||||
.query_opt(version_query, &[&ext_name])
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {}", version_query))?
|
||||
.with_context(|| format!("Failed to execute query: {version_query}"))?
|
||||
.map(|row| row.get(0));
|
||||
|
||||
// sanitize the inputs as postgres idents.
|
||||
@@ -2085,14 +2226,14 @@ LIMIT 100",
|
||||
db_client
|
||||
.simple_query(&query)
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {}", query))?;
|
||||
.with_context(|| format!("Failed to execute query: {query}"))?;
|
||||
} else {
|
||||
let query =
|
||||
format!("CREATE EXTENSION IF NOT EXISTS {ext_name} WITH VERSION {quoted_version}");
|
||||
db_client
|
||||
.simple_query(&query)
|
||||
.await
|
||||
.with_context(|| format!("Failed to execute query: {}", query))?;
|
||||
.with_context(|| format!("Failed to execute query: {query}"))?;
|
||||
}
|
||||
|
||||
Ok(ext_version)
|
||||
@@ -2216,10 +2357,20 @@ LIMIT 100",
|
||||
}
|
||||
|
||||
pub fn spawn_extension_stats_task(&self) {
|
||||
// Cancel any existing task
|
||||
if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
|
||||
handle.abort();
|
||||
}
|
||||
|
||||
let conf = self.tokio_conn_conf.clone();
|
||||
let installed_extensions_collection_interval =
|
||||
self.params.installed_extensions_collection_interval;
|
||||
tokio::spawn(async move {
|
||||
let atomic_interval = self.params.installed_extensions_collection_interval.clone();
|
||||
let mut installed_extensions_collection_interval =
|
||||
2 * atomic_interval.load(std::sync::atomic::Ordering::SeqCst);
|
||||
info!(
|
||||
"[NEON_EXT_SPAWN] Spawning background installed extensions worker with Timeout: {}",
|
||||
installed_extensions_collection_interval
|
||||
);
|
||||
let handle = tokio::spawn(async move {
|
||||
// An initial sleep is added to ensure that two collections don't happen at the same time.
|
||||
// The first collection happens during compute startup.
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(
|
||||
@@ -2232,8 +2383,48 @@ LIMIT 100",
|
||||
loop {
|
||||
interval.tick().await;
|
||||
let _ = installed_extensions(conf.clone()).await;
|
||||
// Acquire a read lock on the compute spec and then update the interval if necessary
|
||||
interval = tokio::time::interval(tokio::time::Duration::from_secs(std::cmp::max(
|
||||
installed_extensions_collection_interval,
|
||||
2 * atomic_interval.load(std::sync::atomic::Ordering::SeqCst),
|
||||
)));
|
||||
installed_extensions_collection_interval = interval.period().as_secs();
|
||||
}
|
||||
});
|
||||
|
||||
// Store the new task handle
|
||||
*self.extension_stats_task.lock().unwrap() = Some(handle);
|
||||
}
|
||||
|
||||
fn terminate_extension_stats_task(&self) {
|
||||
if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
|
||||
handle.abort();
|
||||
}
|
||||
}
|
||||
|
||||
fn update_installed_extensions_collection_interval(&self, spec: &ComputeSpec) {
|
||||
// Update the interval for collecting installed extensions statistics
|
||||
// If the value is -1, we never suspend so set the value to default collection.
|
||||
// If the value is 0, it means default, we will just continue to use the default.
|
||||
if spec.suspend_timeout_seconds == -1 || spec.suspend_timeout_seconds == 0 {
|
||||
info!(
|
||||
"[NEON_EXT_INT_UPD] Spec Timeout: {}, New Timeout: {}",
|
||||
spec.suspend_timeout_seconds, DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL
|
||||
);
|
||||
self.params.installed_extensions_collection_interval.store(
|
||||
DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL,
|
||||
std::sync::atomic::Ordering::SeqCst,
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
"[NEON_EXT_INT_UPD] Spec Timeout: {}",
|
||||
spec.suspend_timeout_seconds
|
||||
);
|
||||
self.params.installed_extensions_collection_interval.store(
|
||||
spec.suspend_timeout_seconds as u64,
|
||||
std::sync::atomic::Ordering::SeqCst,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2251,12 +2442,68 @@ pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn forward_termination_signal() {
|
||||
pub fn forward_termination_signal(dev_mode: bool) {
|
||||
let ss_pid = SYNC_SAFEKEEPERS_PID.load(Ordering::SeqCst);
|
||||
if ss_pid != 0 {
|
||||
let ss_pid = nix::unistd::Pid::from_raw(ss_pid as i32);
|
||||
kill(ss_pid, Signal::SIGTERM).ok();
|
||||
}
|
||||
|
||||
if !dev_mode {
|
||||
// Terminate pgbouncer with SIGKILL
|
||||
match pid_file::read(PGBOUNCER_PIDFILE.into()) {
|
||||
Ok(pid_file::PidFileRead::LockedByOtherProcess(pid)) => {
|
||||
info!("sending SIGKILL to pgbouncer process pid: {}", pid);
|
||||
if let Err(e) = kill(pid, Signal::SIGKILL) {
|
||||
error!("failed to terminate pgbouncer: {}", e);
|
||||
}
|
||||
}
|
||||
// pgbouncer does not lock the pid file, so we read and kill the process directly
|
||||
Ok(pid_file::PidFileRead::NotHeldByAnyProcess(_)) => {
|
||||
if let Ok(pid_str) = std::fs::read_to_string(PGBOUNCER_PIDFILE) {
|
||||
if let Ok(pid) = pid_str.trim().parse::<i32>() {
|
||||
info!(
|
||||
"sending SIGKILL to pgbouncer process pid: {} (from unlocked pid file)",
|
||||
pid
|
||||
);
|
||||
if let Err(e) = kill(Pid::from_raw(pid), Signal::SIGKILL) {
|
||||
error!("failed to terminate pgbouncer: {}", e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
info!("pgbouncer pid file exists but process not running");
|
||||
}
|
||||
}
|
||||
Ok(pid_file::PidFileRead::NotExist) => {
|
||||
info!("pgbouncer pid file not found, process may not be running");
|
||||
}
|
||||
Err(e) => {
|
||||
error!("error reading pgbouncer pid file: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Terminate local_proxy
|
||||
match pid_file::read("/etc/local_proxy/pid".into()) {
|
||||
Ok(pid_file::PidFileRead::LockedByOtherProcess(pid)) => {
|
||||
info!("sending SIGTERM to local_proxy process pid: {}", pid);
|
||||
if let Err(e) = kill(pid, Signal::SIGTERM) {
|
||||
error!("failed to terminate local_proxy: {}", e);
|
||||
}
|
||||
}
|
||||
Ok(pid_file::PidFileRead::NotHeldByAnyProcess(_)) => {
|
||||
info!("local_proxy PID file exists but process not running");
|
||||
}
|
||||
Ok(pid_file::PidFileRead::NotExist) => {
|
||||
info!("local_proxy PID file not found, process may not be running");
|
||||
}
|
||||
Err(e) => {
|
||||
error!("error reading local_proxy PID file: {}", e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
info!("Skipping pgbouncer and local_proxy termination because in dev mode");
|
||||
}
|
||||
|
||||
let pg_pid = PG_PID.load(Ordering::SeqCst);
|
||||
if pg_pid != 0 {
|
||||
let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
|
||||
@@ -2289,3 +2536,21 @@ impl<T: 'static> JoinSetExt<T> for tokio::task::JoinSet<T> {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::File;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn duplicate_safekeeper_connstring() {
|
||||
let file = File::open("tests/cluster_spec.json").unwrap();
|
||||
let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
||||
|
||||
match ParsedSpec::try_from(spec.clone()) {
|
||||
Ok(_p) => panic!("Failed to detect duplicate entry"),
|
||||
Err(e) => assert!(e.starts_with("duplicate entry in safekeeper_connstrings:")),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,7 +51,7 @@ pub fn write_postgres_conf(
|
||||
|
||||
// Write the postgresql.conf content from the spec file as is.
|
||||
if let Some(conf) = &spec.cluster.postgresql_conf {
|
||||
writeln!(file, "{}", conf)?;
|
||||
writeln!(file, "{conf}")?;
|
||||
}
|
||||
|
||||
// Add options for connecting to storage
|
||||
@@ -70,7 +70,7 @@ pub fn write_postgres_conf(
|
||||
);
|
||||
// If generation is given, prepend sk list with g#number:
|
||||
if let Some(generation) = spec.safekeepers_generation {
|
||||
write!(neon_safekeepers_value, "g#{}:", generation)?;
|
||||
write!(neon_safekeepers_value, "g#{generation}:")?;
|
||||
}
|
||||
neon_safekeepers_value.push_str(&spec.safekeeper_connstrings.join(","));
|
||||
writeln!(
|
||||
@@ -109,8 +109,8 @@ pub fn write_postgres_conf(
|
||||
tls::update_key_path_blocking(pgdata_path, tls_config);
|
||||
|
||||
// these are the default, but good to be explicit.
|
||||
writeln!(file, "ssl_cert_file = '{}'", SERVER_CRT)?;
|
||||
writeln!(file, "ssl_key_file = '{}'", SERVER_KEY)?;
|
||||
writeln!(file, "ssl_cert_file = '{SERVER_CRT}'")?;
|
||||
writeln!(file, "ssl_key_file = '{SERVER_KEY}'")?;
|
||||
}
|
||||
|
||||
// Locales
|
||||
@@ -191,8 +191,7 @@ pub fn write_postgres_conf(
|
||||
}
|
||||
writeln!(
|
||||
file,
|
||||
"shared_preload_libraries='{}{}'",
|
||||
libs, extra_shared_preload_libraries
|
||||
"shared_preload_libraries='{libs}{extra_shared_preload_libraries}'"
|
||||
)?;
|
||||
} else {
|
||||
// Typically, this should be unreacheable,
|
||||
@@ -244,8 +243,7 @@ pub fn write_postgres_conf(
|
||||
}
|
||||
writeln!(
|
||||
file,
|
||||
"shared_preload_libraries='{}{}'",
|
||||
libs, extra_shared_preload_libraries
|
||||
"shared_preload_libraries='{libs}{extra_shared_preload_libraries}'"
|
||||
)?;
|
||||
} else {
|
||||
// Typically, this should be unreacheable,
|
||||
@@ -263,7 +261,7 @@ pub fn write_postgres_conf(
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(file, "neon.extension_server_port={}", extension_server_port)?;
|
||||
writeln!(file, "neon.extension_server_port={extension_server_port}")?;
|
||||
|
||||
if spec.drop_subscriptions_before_start {
|
||||
writeln!(file, "neon.disable_logical_replication_subscribers=true")?;
|
||||
@@ -291,7 +289,7 @@ where
|
||||
{
|
||||
let path = pgdata_path.join("compute_ctl_temp_override.conf");
|
||||
let mut file = File::create(path)?;
|
||||
write!(file, "{}", options)?;
|
||||
write!(file, "{options}")?;
|
||||
|
||||
let res = exec();
|
||||
|
||||
|
||||
@@ -10,7 +10,13 @@ input(type="imfile" File="{log_directory}/*.log"
|
||||
startmsg.regex="^[[:digit:]]{{4}}-[[:digit:]]{{2}}-[[:digit:]]{{2}} [[:digit:]]{{2}}:[[:digit:]]{{2}}:[[:digit:]]{{2}}.[[:digit:]]{{3}} GMT,")
|
||||
|
||||
# the directory to store rsyslog state files
|
||||
global(workDirectory="/var/log/rsyslog")
|
||||
global(
|
||||
workDirectory="/var/log/rsyslog"
|
||||
DefaultNetstreamDriverCAFile="/etc/ssl/certs/ca-certificates.crt"
|
||||
)
|
||||
|
||||
# Whether the remote syslog receiver uses tls
|
||||
set $.remote_syslog_tls = "{remote_syslog_tls}";
|
||||
|
||||
# Construct json, endpoint_id and project_id as additional metadata
|
||||
set $.json_log!endpoint_id = "{endpoint_id}";
|
||||
@@ -21,5 +27,29 @@ set $.json_log!msg = $msg;
|
||||
template(name="PgAuditLog" type="string"
|
||||
string="<%PRI%>1 %TIMESTAMP:::date-rfc3339% %HOSTNAME% - - - - %$.json_log%")
|
||||
|
||||
# Forward to remote syslog receiver (@@<hostname>:<port>;format
|
||||
local5.info @@{remote_endpoint};PgAuditLog
|
||||
# Forward to remote syslog receiver (over TLS)
|
||||
if ( $syslogtag == 'pgaudit_log' ) then {{
|
||||
if ( $.remote_syslog_tls == 'true' ) then {{
|
||||
action(type="omfwd" target="{remote_syslog_host}" port="{remote_syslog_port}" protocol="tcp"
|
||||
template="PgAuditLog"
|
||||
queue.type="linkedList"
|
||||
queue.size="1000"
|
||||
action.ResumeRetryCount="10"
|
||||
StreamDriver="gtls"
|
||||
StreamDriverMode="1"
|
||||
StreamDriverAuthMode="x509/name"
|
||||
StreamDriverPermittedPeers="{remote_syslog_host}"
|
||||
StreamDriver.CheckExtendedKeyPurpose="on"
|
||||
StreamDriver.PermitExpiredCerts="off"
|
||||
)
|
||||
stop
|
||||
}} else {{
|
||||
action(type="omfwd" target="{remote_syslog_host}" port="{remote_syslog_port}" protocol="tcp"
|
||||
template="PgAuditLog"
|
||||
queue.type="linkedList"
|
||||
queue.size="1000"
|
||||
action.ResumeRetryCount="10"
|
||||
)
|
||||
stop
|
||||
}}
|
||||
}}
|
||||
|
||||
@@ -74,9 +74,11 @@ More specifically, here is an example ext_index.json
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
|
||||
use anyhow::{Context, Result, bail};
|
||||
use bytes::Bytes;
|
||||
use compute_api::spec::RemoteExtSpec;
|
||||
use postgres_versioninfo::PgMajorVersion;
|
||||
use regex::Regex;
|
||||
use remote_storage::*;
|
||||
use reqwest::StatusCode;
|
||||
@@ -86,8 +88,6 @@ use tracing::log::warn;
|
||||
use url::Url;
|
||||
use zstd::stream::read::Decoder;
|
||||
|
||||
use crate::metrics::{REMOTE_EXT_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
|
||||
|
||||
fn get_pg_config(argument: &str, pgbin: &str) -> String {
|
||||
// gives the result of `pg_config [argument]`
|
||||
// where argument is a flag like `--version` or `--sharedir`
|
||||
@@ -106,7 +106,7 @@ fn get_pg_config(argument: &str, pgbin: &str) -> String {
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub fn get_pg_version(pgbin: &str) -> PostgresMajorVersion {
|
||||
pub fn get_pg_version(pgbin: &str) -> PgMajorVersion {
|
||||
// pg_config --version returns a (platform specific) human readable string
|
||||
// such as "PostgreSQL 15.4". We parse this to v14/v15/v16 etc.
|
||||
let human_version = get_pg_config("--version", pgbin);
|
||||
@@ -114,25 +114,11 @@ pub fn get_pg_version(pgbin: &str) -> PostgresMajorVersion {
|
||||
}
|
||||
|
||||
pub fn get_pg_version_string(pgbin: &str) -> String {
|
||||
match get_pg_version(pgbin) {
|
||||
PostgresMajorVersion::V14 => "v14",
|
||||
PostgresMajorVersion::V15 => "v15",
|
||||
PostgresMajorVersion::V16 => "v16",
|
||||
PostgresMajorVersion::V17 => "v17",
|
||||
}
|
||||
.to_owned()
|
||||
get_pg_version(pgbin).v_str()
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
||||
pub enum PostgresMajorVersion {
|
||||
V14,
|
||||
V15,
|
||||
V16,
|
||||
V17,
|
||||
}
|
||||
|
||||
fn parse_pg_version(human_version: &str) -> PostgresMajorVersion {
|
||||
use PostgresMajorVersion::*;
|
||||
fn parse_pg_version(human_version: &str) -> PgMajorVersion {
|
||||
use PgMajorVersion::*;
|
||||
// Normal releases have version strings like "PostgreSQL 15.4". But there
|
||||
// are also pre-release versions like "PostgreSQL 17devel" or "PostgreSQL
|
||||
// 16beta2" or "PostgreSQL 17rc1". And with the --with-extra-version
|
||||
@@ -143,10 +129,10 @@ fn parse_pg_version(human_version: &str) -> PostgresMajorVersion {
|
||||
.captures(human_version)
|
||||
{
|
||||
Some(captures) if captures.len() == 2 => match &captures["major"] {
|
||||
"14" => return V14,
|
||||
"15" => return V15,
|
||||
"16" => return V16,
|
||||
"17" => return V17,
|
||||
"14" => return PG14,
|
||||
"15" => return PG15,
|
||||
"16" => return PG16,
|
||||
"17" => return PG17,
|
||||
_ => {}
|
||||
},
|
||||
_ => {}
|
||||
@@ -310,10 +296,7 @@ async fn download_extension_tar(remote_ext_base_url: &Url, ext_path: &str) -> Re
|
||||
async fn do_extension_server_request(uri: Url) -> Result<Bytes, (String, String)> {
|
||||
let resp = reqwest::get(uri).await.map_err(|e| {
|
||||
(
|
||||
format!(
|
||||
"could not perform remote extensions server request: {:?}",
|
||||
e
|
||||
),
|
||||
format!("could not perform remote extensions server request: {e:?}"),
|
||||
UNKNOWN_HTTP_STATUS.to_string(),
|
||||
)
|
||||
})?;
|
||||
@@ -323,7 +306,7 @@ async fn do_extension_server_request(uri: Url) -> Result<Bytes, (String, String)
|
||||
StatusCode::OK => match resp.bytes().await {
|
||||
Ok(resp) => Ok(resp),
|
||||
Err(e) => Err((
|
||||
format!("could not read remote extensions server response: {:?}", e),
|
||||
format!("could not read remote extensions server response: {e:?}"),
|
||||
// It's fine to return and report error with status as 200 OK,
|
||||
// because we still failed to read the response.
|
||||
status.to_string(),
|
||||
@@ -334,10 +317,7 @@ async fn do_extension_server_request(uri: Url) -> Result<Bytes, (String, String)
|
||||
status.to_string(),
|
||||
)),
|
||||
_ => Err((
|
||||
format!(
|
||||
"unexpected remote extensions server response status code: {}",
|
||||
status
|
||||
),
|
||||
format!("unexpected remote extensions server response status code: {status}"),
|
||||
status.to_string(),
|
||||
)),
|
||||
}
|
||||
@@ -349,25 +329,25 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_parse_pg_version() {
|
||||
use super::PostgresMajorVersion::*;
|
||||
assert_eq!(parse_pg_version("PostgreSQL 15.4"), V15);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 15.14"), V15);
|
||||
use postgres_versioninfo::PgMajorVersion::*;
|
||||
assert_eq!(parse_pg_version("PostgreSQL 15.4"), PG15);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 15.14"), PG15);
|
||||
assert_eq!(
|
||||
parse_pg_version("PostgreSQL 15.4 (Ubuntu 15.4-0ubuntu0.23.04.1)"),
|
||||
V15
|
||||
PG15
|
||||
);
|
||||
|
||||
assert_eq!(parse_pg_version("PostgreSQL 14.15"), V14);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 14.0"), V14);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 14.15"), PG14);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 14.0"), PG14);
|
||||
assert_eq!(
|
||||
parse_pg_version("PostgreSQL 14.9 (Debian 14.9-1.pgdg120+1"),
|
||||
V14
|
||||
PG14
|
||||
);
|
||||
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16devel"), V16);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16beta1"), V16);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16rc2"), V16);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16extra"), V16);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16devel"), PG16);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16beta1"), PG16);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16rc2"), PG16);
|
||||
assert_eq!(parse_pg_version("PostgreSQL 16extra"), PG16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -65,7 +65,7 @@ pub(in crate::http) async fn configure(
|
||||
|
||||
if state.status == ComputeStatus::Failed {
|
||||
let err = state.error.as_ref().map_or("unknown error", |x| x);
|
||||
let msg = format!("compute configuration failed: {:?}", err);
|
||||
let msg = format!("compute configuration failed: {err:?}");
|
||||
return Err(msg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,32 +1,42 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::compute::{ComputeNode, forward_termination_signal};
|
||||
use crate::http::JsonResponse;
|
||||
use axum::extract::State;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use axum::response::Response;
|
||||
use axum_extra::extract::OptionalQuery;
|
||||
use compute_api::responses::{ComputeStatus, TerminateResponse};
|
||||
use http::StatusCode;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
use tokio::task;
|
||||
use tracing::info;
|
||||
|
||||
use crate::compute::{ComputeNode, forward_termination_signal};
|
||||
use crate::http::JsonResponse;
|
||||
#[derive(Deserialize, Default)]
|
||||
pub struct TerminateQuery {
|
||||
mode: compute_api::responses::TerminateMode,
|
||||
}
|
||||
|
||||
/// Terminate the compute.
|
||||
pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
pub(in crate::http) async fn terminate(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
OptionalQuery(terminate): OptionalQuery<TerminateQuery>,
|
||||
) -> Response {
|
||||
let mode = terminate.unwrap_or_default().mode;
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status == ComputeStatus::Terminated {
|
||||
return StatusCode::CREATED.into_response();
|
||||
return JsonResponse::success(StatusCode::CREATED, state.terminate_flush_lsn);
|
||||
}
|
||||
|
||||
if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
|
||||
return JsonResponse::invalid_status(state.status);
|
||||
}
|
||||
|
||||
state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
state.set_status(
|
||||
ComputeStatus::TerminationPending { mode },
|
||||
&compute.state_changed,
|
||||
);
|
||||
}
|
||||
|
||||
forward_termination_signal();
|
||||
forward_termination_signal(false);
|
||||
info!("sent signal and notified waiters");
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Terminated.
|
||||
@@ -34,7 +44,7 @@ pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>)
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let lsn = task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Terminated {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
@@ -44,11 +54,10 @@ pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>)
|
||||
state.status
|
||||
);
|
||||
}
|
||||
state.terminate_flush_lsn
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
info!("terminated Postgres");
|
||||
|
||||
StatusCode::OK.into_response()
|
||||
JsonResponse::success(StatusCode::OK, TerminateResponse { lsn })
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExten
|
||||
let (mut client, connection) = conf.connect(NoTls).await?;
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
|
||||
@@ -57,7 +57,7 @@ pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExten
|
||||
let (client, connection) = conf.connect(NoTls).await?;
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ mod migration;
|
||||
pub mod monitor;
|
||||
pub mod params;
|
||||
pub mod pg_helpers;
|
||||
pub mod pgbouncer;
|
||||
pub mod rsyslog;
|
||||
pub mod spec;
|
||||
mod spec_apply;
|
||||
|
||||
@@ -4,7 +4,9 @@ use std::thread;
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use anyhow::{Result, bail};
|
||||
use compute_api::spec::ComputeMode;
|
||||
use compute_api::spec::{ComputeMode, PageserverProtocol};
|
||||
use itertools::Itertools as _;
|
||||
use pageserver_page_api as page_api;
|
||||
use postgres::{NoTls, SimpleQueryMessage};
|
||||
use tracing::{info, warn};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
@@ -76,25 +78,17 @@ fn acquire_lsn_lease_with_retry(
|
||||
|
||||
loop {
|
||||
// Note: List of pageservers is dynamic, need to re-read configs before each attempt.
|
||||
let configs = {
|
||||
let (connstrings, auth) = {
|
||||
let state = compute.state.lock().unwrap();
|
||||
|
||||
let spec = state.pspec.as_ref().expect("spec must be set");
|
||||
|
||||
let conn_strings = spec.pageserver_connstr.split(',');
|
||||
|
||||
conn_strings
|
||||
.map(|connstr| {
|
||||
let mut config = postgres::Config::from_str(connstr).expect("Invalid connstr");
|
||||
if let Some(storage_auth_token) = &spec.storage_auth_token {
|
||||
config.password(storage_auth_token.clone());
|
||||
}
|
||||
config
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
(
|
||||
spec.pageserver_connstr.clone(),
|
||||
spec.storage_auth_token.clone(),
|
||||
)
|
||||
};
|
||||
|
||||
let result = try_acquire_lsn_lease(tenant_id, timeline_id, lsn, &configs);
|
||||
let result =
|
||||
try_acquire_lsn_lease(&connstrings, auth.as_deref(), tenant_id, timeline_id, lsn);
|
||||
match result {
|
||||
Ok(Some(res)) => {
|
||||
return Ok(res);
|
||||
@@ -116,68 +110,104 @@ fn acquire_lsn_lease_with_retry(
|
||||
}
|
||||
}
|
||||
|
||||
/// Tries to acquire an LSN lease through PS page_service API.
|
||||
/// Tries to acquire LSN leases on all Pageserver shards.
|
||||
fn try_acquire_lsn_lease(
|
||||
connstrings: &str,
|
||||
auth: Option<&str>,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
configs: &[postgres::Config],
|
||||
) -> Result<Option<SystemTime>> {
|
||||
fn get_valid_until(
|
||||
config: &postgres::Config,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<Option<SystemTime>> {
|
||||
let mut client = config.connect(NoTls)?;
|
||||
let cmd = format!("lease lsn {} {} {} ", tenant_shard_id, timeline_id, lsn);
|
||||
let res = client.simple_query(&cmd)?;
|
||||
let msg = match res.first() {
|
||||
Some(msg) => msg,
|
||||
None => bail!("empty response"),
|
||||
};
|
||||
let row = match msg {
|
||||
SimpleQueryMessage::Row(row) => row,
|
||||
_ => bail!("error parsing lsn lease response"),
|
||||
let connstrings = connstrings.split(',').collect_vec();
|
||||
let shard_count = connstrings.len();
|
||||
let mut leases = Vec::new();
|
||||
|
||||
for (shard_number, &connstring) in connstrings.iter().enumerate() {
|
||||
let tenant_shard_id = match shard_count {
|
||||
0 | 1 => TenantShardId::unsharded(tenant_id),
|
||||
shard_count => TenantShardId {
|
||||
tenant_id,
|
||||
shard_number: ShardNumber(shard_number as u8),
|
||||
shard_count: ShardCount::new(shard_count as u8),
|
||||
},
|
||||
};
|
||||
|
||||
// Note: this will be None if a lease is explicitly not granted.
|
||||
let valid_until_str = row.get("valid_until");
|
||||
|
||||
let valid_until = valid_until_str.map(|s| {
|
||||
SystemTime::UNIX_EPOCH
|
||||
.checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))
|
||||
.expect("Time larger than max SystemTime could handle")
|
||||
});
|
||||
Ok(valid_until)
|
||||
let lease = match PageserverProtocol::from_connstring(connstring)? {
|
||||
PageserverProtocol::Libpq => {
|
||||
acquire_lsn_lease_libpq(connstring, auth, tenant_shard_id, timeline_id, lsn)?
|
||||
}
|
||||
PageserverProtocol::Grpc => {
|
||||
acquire_lsn_lease_grpc(connstring, auth, tenant_shard_id, timeline_id, lsn)?
|
||||
}
|
||||
};
|
||||
leases.push(lease);
|
||||
}
|
||||
|
||||
let shard_count = configs.len();
|
||||
Ok(leases.into_iter().min().flatten())
|
||||
}
|
||||
|
||||
let valid_until = if shard_count > 1 {
|
||||
configs
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(shard_number, config)| {
|
||||
let tenant_shard_id = TenantShardId {
|
||||
tenant_id,
|
||||
shard_count: ShardCount::new(shard_count as u8),
|
||||
shard_number: ShardNumber(shard_number as u8),
|
||||
};
|
||||
get_valid_until(config, tenant_shard_id, timeline_id, lsn)
|
||||
})
|
||||
.collect::<Result<Vec<Option<SystemTime>>>>()?
|
||||
.into_iter()
|
||||
.min()
|
||||
.unwrap()
|
||||
} else {
|
||||
get_valid_until(
|
||||
&configs[0],
|
||||
TenantShardId::unsharded(tenant_id),
|
||||
timeline_id,
|
||||
lsn,
|
||||
)?
|
||||
/// Acquires an LSN lease on a single shard, using the libpq API. The connstring must use a
|
||||
/// postgresql:// scheme.
|
||||
fn acquire_lsn_lease_libpq(
|
||||
connstring: &str,
|
||||
auth: Option<&str>,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<Option<SystemTime>> {
|
||||
let mut config = postgres::Config::from_str(connstring)?;
|
||||
if let Some(auth) = auth {
|
||||
config.password(auth);
|
||||
}
|
||||
let mut client = config.connect(NoTls)?;
|
||||
let cmd = format!("lease lsn {tenant_shard_id} {timeline_id} {lsn} ");
|
||||
let res = client.simple_query(&cmd)?;
|
||||
let msg = match res.first() {
|
||||
Some(msg) => msg,
|
||||
None => bail!("empty response"),
|
||||
};
|
||||
let row = match msg {
|
||||
SimpleQueryMessage::Row(row) => row,
|
||||
_ => bail!("error parsing lsn lease response"),
|
||||
};
|
||||
|
||||
// Note: this will be None if a lease is explicitly not granted.
|
||||
let valid_until_str = row.get("valid_until");
|
||||
|
||||
let valid_until = valid_until_str.map(|s| {
|
||||
SystemTime::UNIX_EPOCH
|
||||
.checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))
|
||||
.expect("Time larger than max SystemTime could handle")
|
||||
});
|
||||
Ok(valid_until)
|
||||
}
|
||||
|
||||
/// Acquires an LSN lease on a single shard, using the gRPC API. The connstring must use a
|
||||
/// grpc:// scheme.
|
||||
fn acquire_lsn_lease_grpc(
|
||||
connstring: &str,
|
||||
auth: Option<&str>,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
lsn: Lsn,
|
||||
) -> Result<Option<SystemTime>> {
|
||||
tokio::runtime::Handle::current().block_on(async move {
|
||||
let mut client = page_api::Client::new(
|
||||
connstring.to_string(),
|
||||
tenant_shard_id.tenant_id,
|
||||
timeline_id,
|
||||
tenant_shard_id.to_index(),
|
||||
auth.map(String::from),
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = page_api::LeaseLsnRequest { lsn };
|
||||
match client.lease_lsn(req).await {
|
||||
Ok(expires) => Ok(Some(expires)),
|
||||
// Lease couldn't be acquired because the LSN has been garbage collected.
|
||||
Err(err) if err.code() == tonic::Code::FailedPrecondition => Ok(None),
|
||||
Err(err) => Err(err.into()),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -83,7 +83,9 @@ impl ComputeMonitor {
|
||||
let compute_status = self.compute.get_status();
|
||||
if matches!(
|
||||
compute_status,
|
||||
ComputeStatus::Terminated | ComputeStatus::TerminationPending | ComputeStatus::Failed
|
||||
ComputeStatus::Terminated
|
||||
| ComputeStatus::TerminationPending { .. }
|
||||
| ComputeStatus::Failed
|
||||
) {
|
||||
info!(
|
||||
"compute is in {} status, stopping compute monitor",
|
||||
|
||||
@@ -36,9 +36,9 @@ pub fn escape_literal(s: &str) -> String {
|
||||
let res = s.replace('\'', "''").replace('\\', "\\\\");
|
||||
|
||||
if res.contains('\\') {
|
||||
format!("E'{}'", res)
|
||||
format!("E'{res}'")
|
||||
} else {
|
||||
format!("'{}'", res)
|
||||
format!("'{res}'")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ pub fn escape_literal(s: &str) -> String {
|
||||
/// with `'{}'` is not required, as it returns a ready-to-use config string.
|
||||
pub fn escape_conf_value(s: &str) -> String {
|
||||
let res = s.replace('\'', "''").replace('\\', "\\\\");
|
||||
format!("'{}'", res)
|
||||
format!("'{res}'")
|
||||
}
|
||||
|
||||
pub trait GenericOptionExt {
|
||||
@@ -446,7 +446,7 @@ pub async fn tune_pgbouncer(
|
||||
let mut pgbouncer_connstr =
|
||||
"host=localhost port=6432 dbname=pgbouncer user=postgres sslmode=disable".to_string();
|
||||
if let Ok(pass) = std::env::var("PGBOUNCER_PASSWORD") {
|
||||
pgbouncer_connstr.push_str(format!(" password={}", pass).as_str());
|
||||
pgbouncer_connstr.push_str(format!(" password={pass}").as_str());
|
||||
}
|
||||
pgbouncer_connstr
|
||||
};
|
||||
@@ -464,7 +464,7 @@ pub async fn tune_pgbouncer(
|
||||
Ok((client, connection)) => {
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
break client;
|
||||
|
||||
1
compute_tools/src/pgbouncer.rs
Normal file
1
compute_tools/src/pgbouncer.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub const PGBOUNCER_PIDFILE: &str = "/tmp/pgbouncer.pid";
|
||||
@@ -4,8 +4,10 @@ use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
use std::{fs::OpenOptions, io::Write};
|
||||
use url::{Host, Url};
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use hostname_validator;
|
||||
use tracing::{error, info, instrument, warn};
|
||||
|
||||
const POSTGRES_LOGS_CONF_PATH: &str = "/etc/rsyslog.d/postgres_logs.conf";
|
||||
@@ -82,18 +84,84 @@ fn restart_rsyslog() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_audit_syslog_address(
|
||||
remote_plain_endpoint: &str,
|
||||
remote_tls_endpoint: &str,
|
||||
) -> Result<(String, u16, String)> {
|
||||
let tls;
|
||||
let remote_endpoint = if !remote_tls_endpoint.is_empty() {
|
||||
tls = "true".to_string();
|
||||
remote_tls_endpoint
|
||||
} else {
|
||||
tls = "false".to_string();
|
||||
remote_plain_endpoint
|
||||
};
|
||||
// Urlify the remote_endpoint, so parsing can be done with url::Url.
|
||||
let url_str = format!("http://{remote_endpoint}");
|
||||
let url = Url::parse(&url_str).map_err(|err| {
|
||||
anyhow!("Error parsing {remote_endpoint}, expected host:port, got {err:?}")
|
||||
})?;
|
||||
|
||||
let is_valid = url.scheme() == "http"
|
||||
&& url.path() == "/"
|
||||
&& url.query().is_none()
|
||||
&& url.fragment().is_none()
|
||||
&& url.username() == ""
|
||||
&& url.password().is_none();
|
||||
|
||||
if !is_valid {
|
||||
return Err(anyhow!(
|
||||
"Invalid address format {remote_endpoint}, expected host:port"
|
||||
));
|
||||
}
|
||||
let host = match url.host() {
|
||||
Some(Host::Domain(h)) if hostname_validator::is_valid(h) => h.to_string(),
|
||||
Some(Host::Ipv4(ip4)) => ip4.to_string(),
|
||||
Some(Host::Ipv6(ip6)) => ip6.to_string(),
|
||||
_ => return Err(anyhow!("Invalid host")),
|
||||
};
|
||||
let port = url
|
||||
.port()
|
||||
.ok_or_else(|| anyhow!("Invalid port in {remote_endpoint}"))?;
|
||||
|
||||
Ok((host, port, tls))
|
||||
}
|
||||
|
||||
fn generate_audit_rsyslog_config(
|
||||
log_directory: String,
|
||||
endpoint_id: &str,
|
||||
project_id: &str,
|
||||
remote_syslog_host: &str,
|
||||
remote_syslog_port: u16,
|
||||
remote_syslog_tls: &str,
|
||||
) -> String {
|
||||
format!(
|
||||
include_str!("config_template/compute_audit_rsyslog_template.conf"),
|
||||
log_directory = log_directory,
|
||||
endpoint_id = endpoint_id,
|
||||
project_id = project_id,
|
||||
remote_syslog_host = remote_syslog_host,
|
||||
remote_syslog_port = remote_syslog_port,
|
||||
remote_syslog_tls = remote_syslog_tls
|
||||
)
|
||||
}
|
||||
|
||||
pub fn configure_audit_rsyslog(
|
||||
log_directory: String,
|
||||
endpoint_id: &str,
|
||||
project_id: &str,
|
||||
remote_endpoint: &str,
|
||||
remote_tls_endpoint: &str,
|
||||
) -> Result<()> {
|
||||
let config_content: String = format!(
|
||||
include_str!("config_template/compute_audit_rsyslog_template.conf"),
|
||||
log_directory = log_directory,
|
||||
endpoint_id = endpoint_id,
|
||||
project_id = project_id,
|
||||
remote_endpoint = remote_endpoint
|
||||
let (remote_syslog_host, remote_syslog_port, remote_syslog_tls) =
|
||||
parse_audit_syslog_address(remote_endpoint, remote_tls_endpoint).unwrap();
|
||||
let config_content = generate_audit_rsyslog_config(
|
||||
log_directory,
|
||||
endpoint_id,
|
||||
project_id,
|
||||
&remote_syslog_host,
|
||||
remote_syslog_port,
|
||||
&remote_syslog_tls,
|
||||
);
|
||||
|
||||
info!("rsyslog config_content: {}", config_content);
|
||||
@@ -258,6 +326,8 @@ pub fn launch_pgaudit_gc(log_directory: String) {
|
||||
mod tests {
|
||||
use crate::rsyslog::PostgresLogsRsyslogConfig;
|
||||
|
||||
use super::{generate_audit_rsyslog_config, parse_audit_syslog_address};
|
||||
|
||||
#[test]
|
||||
fn test_postgres_logs_config() {
|
||||
{
|
||||
@@ -287,4 +357,146 @@ mod tests {
|
||||
assert!(res.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_audit_syslog_address() {
|
||||
{
|
||||
// host:port format (plaintext)
|
||||
let parsed = parse_audit_syslog_address("collector.host.tld:5555", "");
|
||||
assert!(parsed.is_ok());
|
||||
assert_eq!(
|
||||
parsed.unwrap(),
|
||||
(
|
||||
String::from("collector.host.tld"),
|
||||
5555,
|
||||
String::from("false")
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
{
|
||||
// host:port format with ipv4 ip address (plaintext)
|
||||
let parsed = parse_audit_syslog_address("10.0.0.1:5555", "");
|
||||
assert!(parsed.is_ok());
|
||||
assert_eq!(
|
||||
parsed.unwrap(),
|
||||
(String::from("10.0.0.1"), 5555, String::from("false"))
|
||||
);
|
||||
}
|
||||
|
||||
{
|
||||
// host:port format with ipv6 ip address (plaintext)
|
||||
let parsed =
|
||||
parse_audit_syslog_address("[7e60:82ed:cb2e:d617:f904:f395:aaca:e252]:5555", "");
|
||||
assert_eq!(
|
||||
parsed.unwrap(),
|
||||
(
|
||||
String::from("7e60:82ed:cb2e:d617:f904:f395:aaca:e252"),
|
||||
5555,
|
||||
String::from("false")
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
{
|
||||
// Only TLS host:port defined
|
||||
let parsed = parse_audit_syslog_address("", "tls.host.tld:5556");
|
||||
assert_eq!(
|
||||
parsed.unwrap(),
|
||||
(String::from("tls.host.tld"), 5556, String::from("true"))
|
||||
);
|
||||
}
|
||||
|
||||
{
|
||||
// tls host should take precedence, when both defined
|
||||
let parsed = parse_audit_syslog_address("plaintext.host.tld:5555", "tls.host.tld:5556");
|
||||
assert_eq!(
|
||||
parsed.unwrap(),
|
||||
(String::from("tls.host.tld"), 5556, String::from("true"))
|
||||
);
|
||||
}
|
||||
|
||||
{
|
||||
// host without port (plaintext)
|
||||
let parsed = parse_audit_syslog_address("collector.host.tld", "");
|
||||
assert!(parsed.is_err());
|
||||
}
|
||||
|
||||
{
|
||||
// port without host
|
||||
let parsed = parse_audit_syslog_address(":5555", "");
|
||||
assert!(parsed.is_err());
|
||||
}
|
||||
|
||||
{
|
||||
// valid host with invalid port
|
||||
let parsed = parse_audit_syslog_address("collector.host.tld:90001", "");
|
||||
assert!(parsed.is_err());
|
||||
}
|
||||
|
||||
{
|
||||
// invalid hostname with valid port
|
||||
let parsed = parse_audit_syslog_address("-collector.host.tld:5555", "");
|
||||
assert!(parsed.is_err());
|
||||
}
|
||||
|
||||
{
|
||||
// parse error
|
||||
let parsed = parse_audit_syslog_address("collector.host.tld:::5555", "");
|
||||
assert!(parsed.is_err());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_audit_rsyslog_config() {
|
||||
{
|
||||
// plaintext version
|
||||
let log_directory = "/tmp/log".to_string();
|
||||
let endpoint_id = "ep-test-endpoint-id";
|
||||
let project_id = "test-project-id";
|
||||
let remote_syslog_host = "collector.host.tld";
|
||||
let remote_syslog_port = 5555;
|
||||
let remote_syslog_tls = "false";
|
||||
|
||||
let conf_str = generate_audit_rsyslog_config(
|
||||
log_directory,
|
||||
endpoint_id,
|
||||
project_id,
|
||||
remote_syslog_host,
|
||||
remote_syslog_port,
|
||||
remote_syslog_tls,
|
||||
);
|
||||
|
||||
assert!(conf_str.contains(r#"set $.remote_syslog_tls = "false";"#));
|
||||
assert!(conf_str.contains(r#"type="omfwd""#));
|
||||
assert!(conf_str.contains(r#"target="collector.host.tld""#));
|
||||
assert!(conf_str.contains(r#"port="5555""#));
|
||||
assert!(conf_str.contains(r#"StreamDriverPermittedPeers="collector.host.tld""#));
|
||||
}
|
||||
|
||||
{
|
||||
// TLS version
|
||||
let log_directory = "/tmp/log".to_string();
|
||||
let endpoint_id = "ep-test-endpoint-id";
|
||||
let project_id = "test-project-id";
|
||||
let remote_syslog_host = "collector.host.tld";
|
||||
let remote_syslog_port = 5556;
|
||||
let remote_syslog_tls = "true";
|
||||
|
||||
let conf_str = generate_audit_rsyslog_config(
|
||||
log_directory,
|
||||
endpoint_id,
|
||||
project_id,
|
||||
remote_syslog_host,
|
||||
remote_syslog_port,
|
||||
remote_syslog_tls,
|
||||
);
|
||||
|
||||
assert!(conf_str.contains(r#"set $.remote_syslog_tls = "true";"#));
|
||||
assert!(conf_str.contains(r#"type="omfwd""#));
|
||||
assert!(conf_str.contains(r#"target="collector.host.tld""#));
|
||||
assert!(conf_str.contains(r#"port="5556""#));
|
||||
assert!(conf_str.contains(r#"StreamDriverPermittedPeers="collector.host.tld""#));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,12 +23,12 @@ fn do_control_plane_request(
|
||||
) -> Result<ControlPlaneConfigResponse, (bool, String, String)> {
|
||||
let resp = reqwest::blocking::Client::new()
|
||||
.get(uri)
|
||||
.header("Authorization", format!("Bearer {}", jwt))
|
||||
.header("Authorization", format!("Bearer {jwt}"))
|
||||
.send()
|
||||
.map_err(|e| {
|
||||
(
|
||||
true,
|
||||
format!("could not perform request to control plane: {:?}", e),
|
||||
format!("could not perform request to control plane: {e:?}"),
|
||||
UNKNOWN_HTTP_STATUS.to_string(),
|
||||
)
|
||||
})?;
|
||||
@@ -39,7 +39,7 @@ fn do_control_plane_request(
|
||||
Ok(spec_resp) => Ok(spec_resp),
|
||||
Err(e) => Err((
|
||||
true,
|
||||
format!("could not deserialize control plane response: {:?}", e),
|
||||
format!("could not deserialize control plane response: {e:?}"),
|
||||
status.to_string(),
|
||||
)),
|
||||
},
|
||||
@@ -62,7 +62,7 @@ fn do_control_plane_request(
|
||||
// or some internal failure happened. Doesn't make much sense to retry in this case.
|
||||
_ => Err((
|
||||
false,
|
||||
format!("unexpected control plane response status code: {}", status),
|
||||
format!("unexpected control plane response status code: {status}"),
|
||||
status.to_string(),
|
||||
)),
|
||||
}
|
||||
|
||||
@@ -933,56 +933,53 @@ async fn get_operations<'a>(
|
||||
PerDatabasePhase::DeleteDBRoleReferences => {
|
||||
let ctx = ctx.read().await;
|
||||
|
||||
let operations =
|
||||
spec.delta_operations
|
||||
.iter()
|
||||
.flatten()
|
||||
.filter(|op| op.action == "delete_role")
|
||||
.filter_map(move |op| {
|
||||
if db.is_owned_by(&op.name) {
|
||||
return None;
|
||||
}
|
||||
if !ctx.roles.contains_key(&op.name) {
|
||||
return None;
|
||||
}
|
||||
let quoted = op.name.pg_quote();
|
||||
let new_owner = match &db {
|
||||
DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(),
|
||||
DB::UserDB(db) => db.owner.pg_quote(),
|
||||
};
|
||||
let (escaped_role, outer_tag) = op.name.pg_quote_dollar();
|
||||
let operations = spec
|
||||
.delta_operations
|
||||
.iter()
|
||||
.flatten()
|
||||
.filter(|op| op.action == "delete_role")
|
||||
.filter_map(move |op| {
|
||||
if db.is_owned_by(&op.name) {
|
||||
return None;
|
||||
}
|
||||
if !ctx.roles.contains_key(&op.name) {
|
||||
return None;
|
||||
}
|
||||
let quoted = op.name.pg_quote();
|
||||
let new_owner = match &db {
|
||||
DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(),
|
||||
DB::UserDB(db) => db.owner.pg_quote(),
|
||||
};
|
||||
let (escaped_role, outer_tag) = op.name.pg_quote_dollar();
|
||||
|
||||
Some(vec![
|
||||
// This will reassign all dependent objects to the db owner
|
||||
Operation {
|
||||
query: format!(
|
||||
"REASSIGN OWNED BY {} TO {}",
|
||||
quoted, new_owner,
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
// Revoke some potentially blocking privileges (Neon-specific currently)
|
||||
Operation {
|
||||
query: format!(
|
||||
include_str!("sql/pre_drop_role_revoke_privileges.sql"),
|
||||
// N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
|
||||
role_name = escaped_role,
|
||||
outer_tag = outer_tag,
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
// This now will only drop privileges of the role
|
||||
// TODO: this is obviously not 100% true because of the above case,
|
||||
// there could be still some privileges that are not revoked. Maybe this
|
||||
// only drops privileges that were granted *by this* role, not *to this* role,
|
||||
// but this has to be checked.
|
||||
Operation {
|
||||
query: format!("DROP OWNED BY {}", quoted),
|
||||
comment: None,
|
||||
},
|
||||
])
|
||||
})
|
||||
.flatten();
|
||||
Some(vec![
|
||||
// This will reassign all dependent objects to the db owner
|
||||
Operation {
|
||||
query: format!("REASSIGN OWNED BY {quoted} TO {new_owner}",),
|
||||
comment: None,
|
||||
},
|
||||
// Revoke some potentially blocking privileges (Neon-specific currently)
|
||||
Operation {
|
||||
query: format!(
|
||||
include_str!("sql/pre_drop_role_revoke_privileges.sql"),
|
||||
// N.B. this has to be properly dollar-escaped with `pg_quote_dollar()`
|
||||
role_name = escaped_role,
|
||||
outer_tag = outer_tag,
|
||||
),
|
||||
comment: None,
|
||||
},
|
||||
// This now will only drop privileges of the role
|
||||
// TODO: this is obviously not 100% true because of the above case,
|
||||
// there could be still some privileges that are not revoked. Maybe this
|
||||
// only drops privileges that were granted *by this* role, not *to this* role,
|
||||
// but this has to be checked.
|
||||
Operation {
|
||||
query: format!("DROP OWNED BY {quoted}"),
|
||||
comment: None,
|
||||
},
|
||||
])
|
||||
})
|
||||
.flatten();
|
||||
|
||||
Ok(Box::new(operations))
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ pub async fn ping_safekeeper(
|
||||
let (client, conn) = config.connect(tokio_postgres::NoTls).await?;
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = conn.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
6
compute_tools/tests/README.md
Normal file
6
compute_tools/tests/README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
### Test files
|
||||
|
||||
The file `cluster_spec.json` has been copied over from libs/compute_api
|
||||
tests, with some edits:
|
||||
|
||||
- the neon.safekeepers setting contains a duplicate value
|
||||
246
compute_tools/tests/cluster_spec.json
Normal file
246
compute_tools/tests/cluster_spec.json
Normal file
@@ -0,0 +1,246 @@
|
||||
{
|
||||
"format_version": 1.0,
|
||||
|
||||
"timestamp": "2021-05-23T18:25:43.511Z",
|
||||
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
|
||||
"suspend_timeout_seconds": 3600,
|
||||
|
||||
"cluster": {
|
||||
"cluster_id": "test-cluster-42",
|
||||
"name": "Zenith Test",
|
||||
"state": "restarted",
|
||||
"roles": [
|
||||
{
|
||||
"name": "postgres",
|
||||
"encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "alexk",
|
||||
"encrypted_password": null,
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "zenith \"new\"",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "zen",
|
||||
"encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
|
||||
},
|
||||
{
|
||||
"name": "\"name\";\\n select 1;",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
|
||||
},
|
||||
{
|
||||
"name": "MyRole",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
|
||||
}
|
||||
],
|
||||
"databases": [
|
||||
{
|
||||
"name": "DB2",
|
||||
"owner": "alexk",
|
||||
"options": [
|
||||
{
|
||||
"name": "LC_COLLATE",
|
||||
"value": "C",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "LC_CTYPE",
|
||||
"value": "C",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "TEMPLATE",
|
||||
"value": "template0",
|
||||
"vartype": "enum"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "zenith",
|
||||
"owner": "MyRole"
|
||||
},
|
||||
{
|
||||
"name": "zen",
|
||||
"owner": "zen"
|
||||
}
|
||||
],
|
||||
"settings": [
|
||||
{
|
||||
"name": "fsync",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "wal_level",
|
||||
"value": "logical",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "hot_standby",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "prewarm_lfc_on_startup",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "neon.safekeepers",
|
||||
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501,127.0.0.1:6502",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_log_hints",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "log_connections",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "shared_buffers",
|
||||
"value": "32768",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "port",
|
||||
"value": "55432",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_connections",
|
||||
"value": "100",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_wal_senders",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "listen_addresses",
|
||||
"value": "0.0.0.0",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_sender_timeout",
|
||||
"value": "0",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "password_encryption",
|
||||
"value": "md5",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "maintenance_work_mem",
|
||||
"value": "65536",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_parallel_workers",
|
||||
"value": "8",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_worker_processes",
|
||||
"value": "8",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "neon.tenant_id",
|
||||
"value": "b0554b632bd4d547a63b86c3630317e8",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_replication_slots",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "neon.timeline_id",
|
||||
"value": "2414a61ffc94e428f14b5758fe308e13",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "shared_preload_libraries",
|
||||
"value": "neon",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "synchronous_standby_names",
|
||||
"value": "walproposer",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.pageserver_connstring",
|
||||
"value": "host=127.0.0.1 port=6400",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "test.escaping",
|
||||
"value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
|
||||
"vartype": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"delta_operations": [
|
||||
{
|
||||
"action": "delete_db",
|
||||
"name": "zenith_test"
|
||||
},
|
||||
{
|
||||
"action": "rename_db",
|
||||
"name": "DB",
|
||||
"new_name": "DB2"
|
||||
},
|
||||
{
|
||||
"action": "delete_role",
|
||||
"name": "zenith2"
|
||||
},
|
||||
{
|
||||
"action": "rename_role",
|
||||
"name": "zenith new",
|
||||
"new_name": "zenith \"new\""
|
||||
}
|
||||
],
|
||||
"remote_extensions": {
|
||||
"library_index": {
|
||||
"postgis-3": "postgis",
|
||||
"libpgrouting-3.4": "postgis",
|
||||
"postgis_raster-3": "postgis",
|
||||
"postgis_sfcgal-3": "postgis",
|
||||
"postgis_topology-3": "postgis",
|
||||
"address_standardizer-3": "postgis"
|
||||
},
|
||||
"extension_data": {
|
||||
"postgis": {
|
||||
"archive_path": "5834329303/v15/extensions/postgis.tar.zst",
|
||||
"control_data": {
|
||||
"postgis.control": "# postgis extension\ncomment = ''PostGIS geometry and geography spatial types and functions''\ndefault_version = ''3.3.2''\nmodule_pathname = ''$libdir/postgis-3''\nrelocatable = false\ntrusted = true\n",
|
||||
"pgrouting.control": "# pgRouting Extension\ncomment = ''pgRouting Extension''\ndefault_version = ''3.4.2''\nmodule_pathname = ''$libdir/libpgrouting-3.4''\nrelocatable = true\nrequires = ''plpgsql''\nrequires = ''postgis''\ntrusted = true\n",
|
||||
"postgis_raster.control": "# postgis_raster extension\ncomment = ''PostGIS raster types and functions''\ndefault_version = ''3.3.2''\nmodule_pathname = ''$libdir/postgis_raster-3''\nrelocatable = false\nrequires = postgis\ntrusted = true\n",
|
||||
"postgis_sfcgal.control": "# postgis topology extension\ncomment = ''PostGIS SFCGAL functions''\ndefault_version = ''3.3.2''\nrelocatable = true\nrequires = postgis\ntrusted = true\n",
|
||||
"postgis_topology.control": "# postgis topology extension\ncomment = ''PostGIS topology spatial types and functions''\ndefault_version = ''3.3.2''\nrelocatable = false\nschema = topology\nrequires = postgis\ntrusted = true\n",
|
||||
"address_standardizer.control": "# address_standardizer extension\ncomment = ''Used to parse an address into constituent elements. Generally used to support geocoding address normalization step.''\ndefault_version = ''3.3.2''\nrelocatable = true\ntrusted = true\n",
|
||||
"postgis_tiger_geocoder.control": "# postgis tiger geocoder extension\ncomment = ''PostGIS tiger geocoder and reverse geocoder''\ndefault_version = ''3.3.2''\nrelocatable = false\nschema = tiger\nrequires = ''postgis,fuzzystrmatch''\nsuperuser= false\ntrusted = true\n",
|
||||
"address_standardizer_data_us.control": "# address standardizer us dataset\ncomment = ''Address Standardizer US dataset example''\ndefault_version = ''3.3.2''\nrelocatable = true\ntrusted = true\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
"custom_extensions": [],
|
||||
"public_extensions": ["postgis"]
|
||||
},
|
||||
"pgbouncer_settings": {
|
||||
"default_pool_size": "42",
|
||||
"pool_mode": "session"
|
||||
}
|
||||
}
|
||||
@@ -16,9 +16,9 @@ use std::time::Duration;
|
||||
use anyhow::{Context, Result, anyhow, bail};
|
||||
use clap::Parser;
|
||||
use compute_api::requests::ComputeClaimsScope;
|
||||
use compute_api::spec::ComputeMode;
|
||||
use compute_api::spec::{ComputeMode, PageserverProtocol};
|
||||
use control_plane::broker::StorageBroker;
|
||||
use control_plane::endpoint::ComputeControlPlane;
|
||||
use control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode};
|
||||
use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
|
||||
use control_plane::local_env;
|
||||
use control_plane::local_env::{
|
||||
@@ -48,7 +48,7 @@ use postgres_connection::parse_host_port;
|
||||
use safekeeper_api::membership::{SafekeeperGeneration, SafekeeperId};
|
||||
use safekeeper_api::{
|
||||
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
|
||||
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT, PgMajorVersion, PgVersionId,
|
||||
};
|
||||
use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
|
||||
use tokio::task::JoinSet;
|
||||
@@ -64,7 +64,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
|
||||
const DEFAULT_BRANCH_NAME: &str = "main";
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
const DEFAULT_PG_VERSION: u32 = 17;
|
||||
const DEFAULT_PG_VERSION: PgMajorVersion = PgMajorVersion::PG17;
|
||||
|
||||
const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
|
||||
|
||||
@@ -169,7 +169,7 @@ struct TenantCreateCmdArgs {
|
||||
|
||||
#[arg(default_value_t = DEFAULT_PG_VERSION)]
|
||||
#[clap(long, help = "Postgres version to use for the initial timeline")]
|
||||
pg_version: u32,
|
||||
pg_version: PgMajorVersion,
|
||||
|
||||
#[clap(
|
||||
long,
|
||||
@@ -292,7 +292,7 @@ struct TimelineCreateCmdArgs {
|
||||
|
||||
#[arg(default_value_t = DEFAULT_PG_VERSION)]
|
||||
#[clap(long, help = "Postgres version")]
|
||||
pg_version: u32,
|
||||
pg_version: PgMajorVersion,
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
@@ -324,7 +324,7 @@ struct TimelineImportCmdArgs {
|
||||
|
||||
#[arg(default_value_t = DEFAULT_PG_VERSION)]
|
||||
#[clap(long, help = "Postgres version of the backup being imported")]
|
||||
pg_version: u32,
|
||||
pg_version: PgMajorVersion,
|
||||
}
|
||||
|
||||
#[derive(clap::Subcommand)]
|
||||
@@ -603,7 +603,15 @@ struct EndpointCreateCmdArgs {
|
||||
|
||||
#[arg(default_value_t = DEFAULT_PG_VERSION)]
|
||||
#[clap(long, help = "Postgres version")]
|
||||
pg_version: u32,
|
||||
pg_version: PgMajorVersion,
|
||||
|
||||
/// Use gRPC to communicate with Pageservers, by generating grpc:// connstrings.
|
||||
///
|
||||
/// Specified on creation such that it's retained across reconfiguration and restarts.
|
||||
///
|
||||
/// NB: not yet supported by computes.
|
||||
#[clap(long)]
|
||||
grpc: bool,
|
||||
|
||||
#[clap(
|
||||
long,
|
||||
@@ -664,6 +672,13 @@ struct EndpointStartCmdArgs {
|
||||
#[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
|
||||
#[arg(default_value = "90s")]
|
||||
start_timeout: Duration,
|
||||
|
||||
#[clap(
|
||||
long,
|
||||
help = "Run in development mode, skipping VM-specific operations like process termination",
|
||||
action = clap::ArgAction::SetTrue
|
||||
)]
|
||||
dev: bool,
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
@@ -696,10 +711,9 @@ struct EndpointStopCmdArgs {
|
||||
)]
|
||||
destroy: bool,
|
||||
|
||||
#[clap(long, help = "Postgres shutdown mode, passed to \"pg_ctl -m <mode>\"")]
|
||||
#[arg(value_parser(["smart", "fast", "immediate"]))]
|
||||
#[arg(default_value = "fast")]
|
||||
mode: String,
|
||||
#[clap(long, help = "Postgres shutdown mode")]
|
||||
#[clap(default_value = "fast")]
|
||||
mode: EndpointTerminateMode,
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
@@ -905,7 +919,7 @@ fn print_timeline(
|
||||
br_sym = "┗━";
|
||||
}
|
||||
|
||||
print!("{} @{}: ", br_sym, ancestor_lsn);
|
||||
print!("{br_sym} @{ancestor_lsn}: ");
|
||||
}
|
||||
|
||||
// Finally print a timeline id and name with new line
|
||||
@@ -1281,7 +1295,7 @@ async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Re
|
||||
},
|
||||
new_members: None,
|
||||
};
|
||||
let pg_version = args.pg_version * 10000;
|
||||
let pg_version = PgVersionId::from(args.pg_version);
|
||||
let req = safekeeper_api::models::TimelineCreateRequest {
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
@@ -1451,6 +1465,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
args.internal_http_port,
|
||||
args.pg_version,
|
||||
mode,
|
||||
args.grpc,
|
||||
!args.update_catalog,
|
||||
false,
|
||||
)?;
|
||||
@@ -1491,13 +1506,20 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
|
||||
let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
|
||||
let conf = env.get_pageserver_conf(pageserver_id).unwrap();
|
||||
let parsed = parse_host_port(&conf.listen_pg_addr).expect("Bad config");
|
||||
(
|
||||
vec![(parsed.0, parsed.1.unwrap_or(5432))],
|
||||
// If caller is telling us what pageserver to use, this is not a tenant which is
|
||||
// full managed by storage controller, therefore not sharded.
|
||||
DEFAULT_STRIPE_SIZE,
|
||||
)
|
||||
// Use gRPC if requested.
|
||||
let pageserver = if endpoint.grpc {
|
||||
let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
|
||||
let (host, port) = parse_host_port(grpc_addr)?;
|
||||
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
|
||||
(PageserverProtocol::Grpc, host, port)
|
||||
} else {
|
||||
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
||||
let port = port.unwrap_or(5432);
|
||||
(PageserverProtocol::Libpq, host, port)
|
||||
};
|
||||
// If caller is telling us what pageserver to use, this is not a tenant which is
|
||||
// fully managed by storage controller, therefore not sharded.
|
||||
(vec![pageserver], DEFAULT_STRIPE_SIZE)
|
||||
} else {
|
||||
// Look up the currently attached location of the tenant, and its striping metadata,
|
||||
// to pass these on to postgres.
|
||||
@@ -1516,11 +1538,20 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.await?;
|
||||
}
|
||||
|
||||
anyhow::Ok((
|
||||
Host::parse(&shard.listen_pg_addr)
|
||||
.expect("Storage controller reported bad hostname"),
|
||||
shard.listen_pg_port,
|
||||
))
|
||||
let pageserver = if endpoint.grpc {
|
||||
(
|
||||
PageserverProtocol::Grpc,
|
||||
Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))?,
|
||||
shard.listen_grpc_port.expect("no gRPC port"),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
PageserverProtocol::Libpq,
|
||||
Host::parse(&shard.listen_pg_addr)?,
|
||||
shard.listen_pg_port,
|
||||
)
|
||||
};
|
||||
anyhow::Ok(pageserver)
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
@@ -1565,6 +1596,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
stripe_size.0 as usize,
|
||||
args.create_test_user,
|
||||
args.start_timeout,
|
||||
args.dev,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
@@ -1575,11 +1607,19 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.get(endpoint_id.as_str())
|
||||
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||
let pageservers = if let Some(ps_id) = args.endpoint_pageserver_id {
|
||||
let pageserver = PageServerNode::from_env(env, env.get_pageserver_conf(ps_id)?);
|
||||
vec![(
|
||||
pageserver.pg_connection_config.host().clone(),
|
||||
pageserver.pg_connection_config.port(),
|
||||
)]
|
||||
let conf = env.get_pageserver_conf(ps_id)?;
|
||||
// Use gRPC if requested.
|
||||
let pageserver = if endpoint.grpc {
|
||||
let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
|
||||
let (host, port) = parse_host_port(grpc_addr)?;
|
||||
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
|
||||
(PageserverProtocol::Grpc, host, port)
|
||||
} else {
|
||||
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
||||
let port = port.unwrap_or(5432);
|
||||
(PageserverProtocol::Libpq, host, port)
|
||||
};
|
||||
vec![pageserver]
|
||||
} else {
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
storage_controller
|
||||
@@ -1588,18 +1628,30 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.shards
|
||||
.into_iter()
|
||||
.map(|shard| {
|
||||
(
|
||||
Host::parse(&shard.listen_pg_addr)
|
||||
.expect("Storage controller reported malformed host"),
|
||||
shard.listen_pg_port,
|
||||
)
|
||||
// Use gRPC if requested.
|
||||
if endpoint.grpc {
|
||||
(
|
||||
PageserverProtocol::Grpc,
|
||||
Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))
|
||||
.expect("bad hostname"),
|
||||
shard.listen_grpc_port.expect("no gRPC port"),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
PageserverProtocol::Libpq,
|
||||
Host::parse(&shard.listen_pg_addr).expect("bad hostname"),
|
||||
shard.listen_pg_port,
|
||||
)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
// If --safekeepers argument is given, use only the listed
|
||||
// safekeeper nodes; otherwise all from the env.
|
||||
let safekeepers = parse_safekeepers(&args.safekeepers)?;
|
||||
endpoint.reconfigure(pageservers, None, safekeepers).await?;
|
||||
endpoint
|
||||
.reconfigure(Some(pageservers), None, safekeepers, None)
|
||||
.await?;
|
||||
}
|
||||
EndpointCmd::Stop(args) => {
|
||||
let endpoint_id = &args.endpoint_id;
|
||||
@@ -1607,7 +1659,10 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.endpoints
|
||||
.get(endpoint_id)
|
||||
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||
endpoint.stop(&args.mode, args.destroy)?;
|
||||
match endpoint.stop(args.mode, args.destroy).await?.lsn {
|
||||
Some(lsn) => println!("{lsn}"),
|
||||
None => println!("null"),
|
||||
}
|
||||
}
|
||||
EndpointCmd::GenerateJwt(args) => {
|
||||
let endpoint = {
|
||||
@@ -1689,7 +1744,7 @@ async fn handle_pageserver(subcmd: &PageserverCmd, env: &local_env::LocalEnv) ->
|
||||
StopMode::Immediate => true,
|
||||
};
|
||||
if let Err(e) = get_pageserver(env, args.pageserver_id)?.stop(immediate) {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
eprintln!("pageserver stop failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@@ -1698,7 +1753,7 @@ async fn handle_pageserver(subcmd: &PageserverCmd, env: &local_env::LocalEnv) ->
|
||||
let pageserver = get_pageserver(env, args.pageserver_id)?;
|
||||
//TODO what shutdown strategy should we use here?
|
||||
if let Err(e) = pageserver.stop(false) {
|
||||
eprintln!("pageserver stop failed: {}", e);
|
||||
eprintln!("pageserver stop failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@@ -1715,7 +1770,7 @@ async fn handle_pageserver(subcmd: &PageserverCmd, env: &local_env::LocalEnv) ->
|
||||
{
|
||||
Ok(_) => println!("Page server is up and running"),
|
||||
Err(err) => {
|
||||
eprintln!("Page server is not available: {}", err);
|
||||
eprintln!("Page server is not available: {err}");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@@ -1752,7 +1807,7 @@ async fn handle_storage_controller(
|
||||
},
|
||||
};
|
||||
if let Err(e) = svc.stop(stop_args).await {
|
||||
eprintln!("stop failed: {}", e);
|
||||
eprintln!("stop failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@@ -1774,7 +1829,7 @@ async fn handle_safekeeper(subcmd: &SafekeeperCmd, env: &local_env::LocalEnv) ->
|
||||
let safekeeper = get_safekeeper(env, args.id)?;
|
||||
|
||||
if let Err(e) = safekeeper.start(&args.extra_opt, &args.start_timeout).await {
|
||||
eprintln!("safekeeper start failed: {}", e);
|
||||
eprintln!("safekeeper start failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@@ -1786,7 +1841,7 @@ async fn handle_safekeeper(subcmd: &SafekeeperCmd, env: &local_env::LocalEnv) ->
|
||||
StopMode::Immediate => true,
|
||||
};
|
||||
if let Err(e) = safekeeper.stop(immediate) {
|
||||
eprintln!("safekeeper stop failed: {}", e);
|
||||
eprintln!("safekeeper stop failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@@ -1799,12 +1854,12 @@ async fn handle_safekeeper(subcmd: &SafekeeperCmd, env: &local_env::LocalEnv) ->
|
||||
};
|
||||
|
||||
if let Err(e) = safekeeper.stop(immediate) {
|
||||
eprintln!("safekeeper stop failed: {}", e);
|
||||
eprintln!("safekeeper stop failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if let Err(e) = safekeeper.start(&args.extra_opt, &args.start_timeout).await {
|
||||
eprintln!("safekeeper start failed: {}", e);
|
||||
eprintln!("safekeeper start failed: {e}");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
@@ -2039,11 +2094,16 @@ async fn handle_stop_all(args: &StopCmdArgs, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
|
||||
async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
|
||||
let mode = if immediate {
|
||||
EndpointTerminateMode::Immediate
|
||||
} else {
|
||||
EndpointTerminateMode::Fast
|
||||
};
|
||||
// Stop all endpoints
|
||||
match ComputeControlPlane::load(env.clone()) {
|
||||
Ok(cplane) => {
|
||||
for (_k, node) in cplane.endpoints {
|
||||
if let Err(e) = node.stop(if immediate { "immediate" } else { "fast" }, false) {
|
||||
if let Err(e) = node.stop(mode, false).await {
|
||||
eprintln!("postgres stop failed: {e:#}");
|
||||
}
|
||||
}
|
||||
@@ -2055,7 +2115,7 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
|
||||
|
||||
let storage = EndpointStorage::from_env(env);
|
||||
if let Err(e) = storage.stop(immediate) {
|
||||
eprintln!("endpoint_storage stop failed: {:#}", e);
|
||||
eprintln!("endpoint_storage stop failed: {e:#}");
|
||||
}
|
||||
|
||||
for ps_conf in &env.pageservers {
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
//! ```
|
||||
//!
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::Display;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
@@ -51,11 +52,12 @@ use compute_api::requests::{
|
||||
COMPUTE_AUDIENCE, ComputeClaims, ComputeClaimsScope, ConfigurationRequest,
|
||||
};
|
||||
use compute_api::responses::{
|
||||
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TlsConfig,
|
||||
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TerminateResponse,
|
||||
TlsConfig,
|
||||
};
|
||||
use compute_api::spec::{
|
||||
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
|
||||
RemoteExtSpec, Role,
|
||||
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PageserverProtocol,
|
||||
PgIdent, RemoteExtSpec, Role,
|
||||
};
|
||||
use jsonwebtoken::jwk::{
|
||||
AlgorithmParameters, CommonParameters, EllipticCurve, Jwk, JwkSet, KeyAlgorithm, KeyOperations,
|
||||
@@ -65,6 +67,7 @@ use nix::sys::signal::{Signal, kill};
|
||||
use pageserver_api::shard::ShardStripeSize;
|
||||
use pem::Pem;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use safekeeper_api::PgMajorVersion;
|
||||
use safekeeper_api::membership::SafekeeperGeneration;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::{Digest, Sha256};
|
||||
@@ -76,7 +79,6 @@ use utils::id::{NodeId, TenantId, TimelineId};
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::postgresql_conf::PostgresConf;
|
||||
use crate::storage_controller::StorageController;
|
||||
|
||||
// contents of a endpoint.json file
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
@@ -88,7 +90,8 @@ pub struct EndpointConf {
|
||||
pg_port: u16,
|
||||
external_http_port: u16,
|
||||
internal_http_port: u16,
|
||||
pg_version: u32,
|
||||
pg_version: PgMajorVersion,
|
||||
grpc: bool,
|
||||
skip_pg_catalog_updates: bool,
|
||||
reconfigure_concurrency: usize,
|
||||
drop_subscriptions_before_start: bool,
|
||||
@@ -190,8 +193,9 @@ impl ComputeControlPlane {
|
||||
pg_port: Option<u16>,
|
||||
external_http_port: Option<u16>,
|
||||
internal_http_port: Option<u16>,
|
||||
pg_version: u32,
|
||||
pg_version: PgMajorVersion,
|
||||
mode: ComputeMode,
|
||||
grpc: bool,
|
||||
skip_pg_catalog_updates: bool,
|
||||
drop_subscriptions_before_start: bool,
|
||||
) -> Result<Arc<Endpoint>> {
|
||||
@@ -226,6 +230,7 @@ impl ComputeControlPlane {
|
||||
// we also skip catalog updates in the cloud.
|
||||
skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start,
|
||||
grpc,
|
||||
reconfigure_concurrency: 1,
|
||||
features: vec![],
|
||||
cluster: None,
|
||||
@@ -244,6 +249,7 @@ impl ComputeControlPlane {
|
||||
internal_http_port,
|
||||
pg_port,
|
||||
pg_version,
|
||||
grpc,
|
||||
skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start,
|
||||
reconfigure_concurrency: 1,
|
||||
@@ -298,6 +304,8 @@ pub struct Endpoint {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub mode: ComputeMode,
|
||||
/// If true, the endpoint should use gRPC to communicate with Pageservers.
|
||||
pub grpc: bool,
|
||||
|
||||
// port and address of the Postgres server and `compute_ctl`'s HTTP APIs
|
||||
pub pg_address: SocketAddr,
|
||||
@@ -305,7 +313,7 @@ pub struct Endpoint {
|
||||
pub internal_http_address: SocketAddr,
|
||||
|
||||
// postgres major version in the format: 14, 15, etc.
|
||||
pg_version: u32,
|
||||
pg_version: PgMajorVersion,
|
||||
|
||||
// These are not part of the endpoint as such, but the environment
|
||||
// the endpoint runs in.
|
||||
@@ -333,15 +341,35 @@ pub enum EndpointStatus {
|
||||
RunningNoPidfile,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for EndpointStatus {
|
||||
impl Display for EndpointStatus {
|
||||
fn fmt(&self, writer: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let s = match self {
|
||||
writer.write_str(match self {
|
||||
Self::Running => "running",
|
||||
Self::Stopped => "stopped",
|
||||
Self::Crashed => "crashed",
|
||||
Self::RunningNoPidfile => "running, no pidfile",
|
||||
};
|
||||
write!(writer, "{}", s)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Copy, clap::ValueEnum)]
|
||||
pub enum EndpointTerminateMode {
|
||||
#[default]
|
||||
/// Use pg_ctl stop -m fast
|
||||
Fast,
|
||||
/// Use pg_ctl stop -m immediate
|
||||
Immediate,
|
||||
/// Use /terminate?mode=immediate
|
||||
ImmediateTerminate,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for EndpointTerminateMode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(match &self {
|
||||
EndpointTerminateMode::Fast => "fast",
|
||||
EndpointTerminateMode::Immediate => "immediate",
|
||||
EndpointTerminateMode::ImmediateTerminate => "immediate-terminate",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -380,6 +408,7 @@ impl Endpoint {
|
||||
mode: conf.mode,
|
||||
tenant_id: conf.tenant_id,
|
||||
pg_version: conf.pg_version,
|
||||
grpc: conf.grpc,
|
||||
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
|
||||
reconfigure_concurrency: conf.reconfigure_concurrency,
|
||||
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
|
||||
@@ -506,7 +535,7 @@ impl Endpoint {
|
||||
conf.append("hot_standby", "on");
|
||||
// prefetching of blocks referenced in WAL doesn't make sense for us
|
||||
// Neon hot standby ignores pages that are not in the shared_buffers
|
||||
if self.pg_version >= 15 {
|
||||
if self.pg_version >= PgMajorVersion::PG15 {
|
||||
conf.append("recovery_prefetch", "off");
|
||||
}
|
||||
}
|
||||
@@ -608,10 +637,10 @@ impl Endpoint {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_pageserver_connstr(pageservers: &[(Host, u16)]) -> String {
|
||||
fn build_pageserver_connstr(pageservers: &[(PageserverProtocol, Host, u16)]) -> String {
|
||||
pageservers
|
||||
.iter()
|
||||
.map(|(host, port)| format!("postgresql://no_user@{host}:{port}"))
|
||||
.map(|(scheme, host, port)| format!("{scheme}://no_user@{host}:{port}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
}
|
||||
@@ -656,11 +685,12 @@ impl Endpoint {
|
||||
endpoint_storage_addr: String,
|
||||
safekeepers_generation: Option<SafekeeperGeneration>,
|
||||
safekeepers: Vec<NodeId>,
|
||||
pageservers: Vec<(Host, u16)>,
|
||||
pageservers: Vec<(PageserverProtocol, Host, u16)>,
|
||||
remote_ext_base_url: Option<&String>,
|
||||
shard_stripe_size: usize,
|
||||
create_test_user: bool,
|
||||
start_timeout: Duration,
|
||||
dev: bool,
|
||||
) -> Result<()> {
|
||||
if self.status() == EndpointStatus::Running {
|
||||
anyhow::bail!("The endpoint is already running");
|
||||
@@ -750,6 +780,7 @@ impl Endpoint {
|
||||
endpoint_storage_addr: Some(endpoint_storage_addr),
|
||||
endpoint_storage_token: Some(endpoint_storage_token),
|
||||
autoprewarm: false,
|
||||
suspend_timeout_seconds: -1, // Only used in neon_local.
|
||||
};
|
||||
|
||||
// this strange code is needed to support respec() in tests
|
||||
@@ -794,10 +825,10 @@ impl Endpoint {
|
||||
|
||||
// Launch compute_ctl
|
||||
let conn_str = self.connstr("cloud_admin", "postgres");
|
||||
println!("Starting postgres node at '{}'", conn_str);
|
||||
println!("Starting postgres node at '{conn_str}'");
|
||||
if create_test_user {
|
||||
let conn_str = self.connstr("test", "neondb");
|
||||
println!("Also at '{}'", conn_str);
|
||||
println!("Also at '{conn_str}'");
|
||||
}
|
||||
let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
|
||||
cmd.args([
|
||||
@@ -831,6 +862,10 @@ impl Endpoint {
|
||||
cmd.args(["--remote-ext-base-url", remote_ext_base_url]);
|
||||
}
|
||||
|
||||
if dev {
|
||||
cmd.arg("--dev");
|
||||
}
|
||||
|
||||
let child = cmd.spawn()?;
|
||||
// set up a scopeguard to kill & wait for the child in case we panic or bail below
|
||||
let child = scopeguard::guard(child, |mut child| {
|
||||
@@ -883,7 +918,7 @@ impl Endpoint {
|
||||
ComputeStatus::Empty
|
||||
| ComputeStatus::ConfigurationPending
|
||||
| ComputeStatus::Configuration
|
||||
| ComputeStatus::TerminationPending
|
||||
| ComputeStatus::TerminationPending { .. }
|
||||
| ComputeStatus::Terminated => {
|
||||
bail!("unexpected compute status: {:?}", state.status)
|
||||
}
|
||||
@@ -892,8 +927,7 @@ impl Endpoint {
|
||||
Err(e) => {
|
||||
if Instant::now().duration_since(start_at) > start_timeout {
|
||||
return Err(e).context(format!(
|
||||
"timed out {:?} waiting to connect to compute_ctl HTTP",
|
||||
start_timeout,
|
||||
"timed out {start_timeout:?} waiting to connect to compute_ctl HTTP",
|
||||
));
|
||||
}
|
||||
}
|
||||
@@ -932,7 +966,7 @@ impl Endpoint {
|
||||
// reqwest does not export its error construction utility functions, so let's craft the message ourselves
|
||||
let url = response.url().to_owned();
|
||||
let msg = match response.text().await {
|
||||
Ok(err_body) => format!("Error: {}", err_body),
|
||||
Ok(err_body) => format!("Error: {err_body}"),
|
||||
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
|
||||
};
|
||||
Err(anyhow::anyhow!(msg))
|
||||
@@ -941,9 +975,10 @@ impl Endpoint {
|
||||
|
||||
pub async fn reconfigure(
|
||||
&self,
|
||||
mut pageservers: Vec<(Host, u16)>,
|
||||
pageservers: Option<Vec<(PageserverProtocol, Host, u16)>>,
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
safekeepers: Option<Vec<NodeId>>,
|
||||
safekeeper_generation: Option<SafekeeperGeneration>,
|
||||
) -> Result<()> {
|
||||
let (mut spec, compute_ctl_config) = {
|
||||
let config_path = self.endpoint_path().join("config.json");
|
||||
@@ -956,34 +991,24 @@ impl Endpoint {
|
||||
let postgresql_conf = self.read_postgresql_conf()?;
|
||||
spec.cluster.postgresql_conf = Some(postgresql_conf);
|
||||
|
||||
// If we weren't given explicit pageservers, query the storage controller
|
||||
if pageservers.is_empty() {
|
||||
let storage_controller = StorageController::from_env(&self.env);
|
||||
let locate_result = storage_controller.tenant_locate(self.tenant_id).await?;
|
||||
pageservers = locate_result
|
||||
.shards
|
||||
.into_iter()
|
||||
.map(|shard| {
|
||||
(
|
||||
Host::parse(&shard.listen_pg_addr)
|
||||
.expect("Storage controller reported bad hostname"),
|
||||
shard.listen_pg_port,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
}
|
||||
// If pageservers are not specified, don't change them.
|
||||
if let Some(pageservers) = pageservers {
|
||||
anyhow::ensure!(!pageservers.is_empty(), "no pageservers provided");
|
||||
|
||||
let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
|
||||
assert!(!pageserver_connstr.is_empty());
|
||||
spec.pageserver_connstring = Some(pageserver_connstr);
|
||||
if stripe_size.is_some() {
|
||||
spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
|
||||
let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
|
||||
spec.pageserver_connstring = Some(pageserver_connstr);
|
||||
if stripe_size.is_some() {
|
||||
spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
|
||||
}
|
||||
}
|
||||
|
||||
// If safekeepers are not specified, don't change them.
|
||||
if let Some(safekeepers) = safekeepers {
|
||||
let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
|
||||
spec.safekeeper_connstrings = safekeeper_connstrings;
|
||||
if let Some(g) = safekeeper_generation {
|
||||
spec.safekeepers_generation = Some(g.into_inner());
|
||||
}
|
||||
}
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
@@ -1014,15 +1039,52 @@ impl Endpoint {
|
||||
} else {
|
||||
let url = response.url().to_owned();
|
||||
let msg = match response.text().await {
|
||||
Ok(err_body) => format!("Error: {}", err_body),
|
||||
Ok(err_body) => format!("Error: {err_body}"),
|
||||
Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
|
||||
};
|
||||
Err(anyhow::anyhow!(msg))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stop(&self, mode: &str, destroy: bool) -> Result<()> {
|
||||
self.pg_ctl(&["-m", mode, "stop"], &None)?;
|
||||
pub async fn reconfigure_pageservers(
|
||||
&self,
|
||||
pageservers: Vec<(PageserverProtocol, Host, u16)>,
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
) -> Result<()> {
|
||||
self.reconfigure(Some(pageservers), stripe_size, None, None)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn reconfigure_safekeepers(
|
||||
&self,
|
||||
safekeepers: Vec<NodeId>,
|
||||
generation: SafekeeperGeneration,
|
||||
) -> Result<()> {
|
||||
self.reconfigure(None, None, Some(safekeepers), Some(generation))
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn stop(
|
||||
&self,
|
||||
mode: EndpointTerminateMode,
|
||||
destroy: bool,
|
||||
) -> Result<TerminateResponse> {
|
||||
// pg_ctl stop is fast but doesn't allow us to collect LSN. /terminate is
|
||||
// slow, and test runs time out. Solution: special mode "immediate-terminate"
|
||||
// which uses /terminate
|
||||
let response = if let EndpointTerminateMode::ImmediateTerminate = mode {
|
||||
let ip = self.external_http_address.ip();
|
||||
let port = self.external_http_address.port();
|
||||
let url = format!("http://{ip}:{port}/terminate?mode=immediate");
|
||||
let token = self.generate_jwt(Some(ComputeClaimsScope::Admin))?;
|
||||
let request = reqwest::Client::new().post(url).bearer_auth(token);
|
||||
let response = request.send().await.context("/terminate")?;
|
||||
let text = response.text().await.context("/terminate result")?;
|
||||
serde_json::from_str(&text).with_context(|| format!("deserializing {text}"))?
|
||||
} else {
|
||||
self.pg_ctl(&["-m", &mode.to_string(), "stop"], &None)?;
|
||||
TerminateResponse { lsn: None }
|
||||
};
|
||||
|
||||
// Also wait for the compute_ctl process to die. It might have some
|
||||
// cleanup work to do after postgres stops, like syncing safekeepers,
|
||||
@@ -1032,7 +1094,7 @@ impl Endpoint {
|
||||
// waiting. Sometimes we do *not* want this cleanup: tests intentionally
|
||||
// do stop when majority of safekeepers is down, so sync-safekeepers
|
||||
// would hang otherwise. This could be a separate flag though.
|
||||
let send_sigterm = destroy || mode == "immediate";
|
||||
let send_sigterm = destroy || !matches!(mode, EndpointTerminateMode::Fast);
|
||||
self.wait_for_compute_ctl_to_exit(send_sigterm)?;
|
||||
if destroy {
|
||||
println!(
|
||||
@@ -1041,7 +1103,7 @@ impl Endpoint {
|
||||
);
|
||||
std::fs::remove_dir_all(self.endpoint_path())?;
|
||||
}
|
||||
Ok(())
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub fn connstr(&self, user: &str, db_name: &str) -> String {
|
||||
|
||||
@@ -12,9 +12,11 @@ use std::{env, fs};
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use clap::ValueEnum;
|
||||
use pageserver_api::config::PostHogConfig;
|
||||
use pem::Pem;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::{Certificate, Url};
|
||||
use safekeeper_api::PgMajorVersion;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::auth::encode_from_key_file;
|
||||
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
|
||||
@@ -209,6 +211,12 @@ pub struct NeonStorageControllerConf {
|
||||
pub use_https_safekeeper_api: bool,
|
||||
|
||||
pub use_local_compute_notifications: bool,
|
||||
|
||||
pub timeline_safekeeper_count: Option<usize>,
|
||||
|
||||
pub posthog_config: Option<PostHogConfig>,
|
||||
|
||||
pub kick_secondary_downloads: Option<bool>,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
@@ -236,9 +244,12 @@ impl Default for NeonStorageControllerConf {
|
||||
heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
|
||||
long_reconcile_threshold: None,
|
||||
use_https_pageserver_api: false,
|
||||
timelines_onto_safekeepers: false,
|
||||
timelines_onto_safekeepers: true,
|
||||
use_https_safekeeper_api: false,
|
||||
use_local_compute_notifications: true,
|
||||
timeline_safekeeper_count: None,
|
||||
posthog_config: None,
|
||||
kick_secondary_downloads: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -254,7 +265,7 @@ impl Default for EndpointStorageConf {
|
||||
impl NeonBroker {
|
||||
pub fn client_url(&self) -> Url {
|
||||
let url = if let Some(addr) = self.listen_https_addr {
|
||||
format!("https://{}", addr)
|
||||
format!("https://{addr}")
|
||||
} else {
|
||||
format!(
|
||||
"http://{}",
|
||||
@@ -418,25 +429,21 @@ impl LocalEnv {
|
||||
self.pg_distrib_dir.clone()
|
||||
}
|
||||
|
||||
pub fn pg_distrib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_distrib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {
|
||||
let path = self.pg_distrib_dir.clone();
|
||||
|
||||
#[allow(clippy::manual_range_patterns)]
|
||||
match pg_version {
|
||||
14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
|
||||
_ => bail!("Unsupported postgres version: {}", pg_version),
|
||||
}
|
||||
Ok(path.join(pg_version.v_str()))
|
||||
}
|
||||
|
||||
pub fn pg_dir(&self, pg_version: u32, dir_name: &str) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_dir(&self, pg_version: PgMajorVersion, dir_name: &str) -> anyhow::Result<PathBuf> {
|
||||
Ok(self.pg_distrib_dir(pg_version)?.join(dir_name))
|
||||
}
|
||||
|
||||
pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_bin_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {
|
||||
self.pg_dir(pg_version, "bin")
|
||||
}
|
||||
|
||||
pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
|
||||
pub fn pg_lib_dir(&self, pg_version: PgMajorVersion) -> anyhow::Result<PathBuf> {
|
||||
self.pg_dir(pg_version, "lib")
|
||||
}
|
||||
|
||||
@@ -727,7 +734,7 @@ impl LocalEnv {
|
||||
let config_toml_path = dentry.path().join("pageserver.toml");
|
||||
let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(
|
||||
&std::fs::read_to_string(&config_toml_path)
|
||||
.with_context(|| format!("read {:?}", config_toml_path))?,
|
||||
.with_context(|| format!("read {config_toml_path:?}"))?,
|
||||
)
|
||||
.context("parse pageserver.toml")?;
|
||||
let identity_toml_path = dentry.path().join("identity.toml");
|
||||
@@ -737,7 +744,7 @@ impl LocalEnv {
|
||||
}
|
||||
let identity_toml: IdentityTomlSubset = toml_edit::de::from_str(
|
||||
&std::fs::read_to_string(&identity_toml_path)
|
||||
.with_context(|| format!("read {:?}", identity_toml_path))?,
|
||||
.with_context(|| format!("read {identity_toml_path:?}"))?,
|
||||
)
|
||||
.context("parse identity.toml")?;
|
||||
let PageserverConfigTomlSubset {
|
||||
|
||||
@@ -16,11 +16,13 @@ use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::config::{DEFAULT_GRPC_LISTEN_PORT, DEFAULT_HTTP_LISTEN_PORT};
|
||||
use pageserver_api::models::{self, TenantInfo, TimelineInfo};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api;
|
||||
use postgres_backend::AuthType;
|
||||
use postgres_connection::{PgConnectionConfig, parse_host_port};
|
||||
use safekeeper_api::PgMajorVersion;
|
||||
use utils::auth::{Claims, Scope};
|
||||
use utils::id::{NodeId, TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
@@ -120,7 +122,7 @@ impl PageServerNode {
|
||||
.env
|
||||
.generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
|
||||
.unwrap();
|
||||
overrides.push(format!("control_plane_api_token='{}'", jwt_token));
|
||||
overrides.push(format!("control_plane_api_token='{jwt_token}'"));
|
||||
}
|
||||
|
||||
if !conf.other.contains_key("remote_storage") {
|
||||
@@ -252,9 +254,10 @@ impl PageServerNode {
|
||||
// the storage controller
|
||||
let metadata_path = datadir.join("metadata.json");
|
||||
|
||||
let (_http_host, http_port) =
|
||||
let http_host = "localhost".to_string();
|
||||
let (_, http_port) =
|
||||
parse_host_port(&self.conf.listen_http_addr).expect("Unable to parse listen_http_addr");
|
||||
let http_port = http_port.unwrap_or(9898);
|
||||
let http_port = http_port.unwrap_or(DEFAULT_HTTP_LISTEN_PORT);
|
||||
|
||||
let https_port = match self.conf.listen_https_addr.as_ref() {
|
||||
Some(https_addr) => {
|
||||
@@ -265,6 +268,13 @@ impl PageServerNode {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let (mut grpc_host, mut grpc_port) = (None, None);
|
||||
if let Some(grpc_addr) = &self.conf.listen_grpc_addr {
|
||||
let (_, port) = parse_host_port(grpc_addr).expect("Unable to parse listen_grpc_addr");
|
||||
grpc_host = Some("localhost".to_string());
|
||||
grpc_port = Some(port.unwrap_or(DEFAULT_GRPC_LISTEN_PORT));
|
||||
}
|
||||
|
||||
// Intentionally hand-craft JSON: this acts as an implicit format compat test
|
||||
// in case the pageserver-side structure is edited, and reflects the real life
|
||||
// situation: the metadata is written by some other script.
|
||||
@@ -273,7 +283,9 @@ impl PageServerNode {
|
||||
serde_json::to_vec(&pageserver_api::config::NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: self.pg_connection_config.port(),
|
||||
http_host: "localhost".to_string(),
|
||||
grpc_host,
|
||||
grpc_port,
|
||||
http_host,
|
||||
http_port,
|
||||
https_port,
|
||||
other: HashMap::from([(
|
||||
@@ -596,7 +608,7 @@ impl PageServerNode {
|
||||
timeline_id: TimelineId,
|
||||
base: (Lsn, PathBuf),
|
||||
pg_wal: Option<(Lsn, PathBuf)>,
|
||||
pg_version: u32,
|
||||
pg_version: PgMajorVersion,
|
||||
) -> anyhow::Result<()> {
|
||||
// Init base reader
|
||||
let (start_lsn, base_tarfile_path) = base;
|
||||
|
||||
@@ -143,7 +143,7 @@ impl SafekeeperNode {
|
||||
let id_string = id.to_string();
|
||||
// TODO: add availability_zone to the config.
|
||||
// Right now we just specify any value here and use it to check metrics in tests.
|
||||
let availability_zone = format!("sk-{}", id_string);
|
||||
let availability_zone = format!("sk-{id_string}");
|
||||
|
||||
let mut args = vec![
|
||||
"-D".to_owned(),
|
||||
|
||||
@@ -6,6 +6,8 @@ use std::str::FromStr;
|
||||
use std::sync::OnceLock;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use crate::background_process;
|
||||
use crate::local_env::{LocalEnv, NeonStorageControllerConf};
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
use hyper0::Uri;
|
||||
use nix::unistd::Pid;
|
||||
@@ -22,6 +24,7 @@ use pageserver_client::mgmt_api::ResponseErrorMessageExt;
|
||||
use pem::Pem;
|
||||
use postgres_backend::AuthType;
|
||||
use reqwest::{Method, Response};
|
||||
use safekeeper_api::PgMajorVersion;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::process::Command;
|
||||
@@ -31,9 +34,6 @@ use utils::auth::{Claims, Scope, encode_from_key_file};
|
||||
use utils::id::{NodeId, TenantId};
|
||||
use whoami::username;
|
||||
|
||||
use crate::background_process;
|
||||
use crate::local_env::{LocalEnv, NeonStorageControllerConf};
|
||||
|
||||
pub struct StorageController {
|
||||
env: LocalEnv,
|
||||
private_key: Option<Pem>,
|
||||
@@ -48,7 +48,7 @@ pub struct StorageController {
|
||||
|
||||
const COMMAND: &str = "storage_controller";
|
||||
|
||||
const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;
|
||||
const STORAGE_CONTROLLER_POSTGRES_VERSION: PgMajorVersion = PgMajorVersion::PG16;
|
||||
|
||||
const DB_NAME: &str = "storage_controller";
|
||||
|
||||
@@ -167,7 +167,7 @@ impl StorageController {
|
||||
fn storage_controller_instance_dir(&self, instance_id: u8) -> PathBuf {
|
||||
self.env
|
||||
.base_data_dir
|
||||
.join(format!("storage_controller_{}", instance_id))
|
||||
.join(format!("storage_controller_{instance_id}"))
|
||||
}
|
||||
|
||||
fn pid_file(&self, instance_id: u8) -> Utf8PathBuf {
|
||||
@@ -184,9 +184,15 @@ impl StorageController {
|
||||
/// to other versions if that one isn't found. Some automated tests create circumstances
|
||||
/// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
|
||||
async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> {
|
||||
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 16, 15, 14];
|
||||
const PREFER_VERSIONS: [PgMajorVersion; 5] = [
|
||||
STORAGE_CONTROLLER_POSTGRES_VERSION,
|
||||
PgMajorVersion::PG16,
|
||||
PgMajorVersion::PG15,
|
||||
PgMajorVersion::PG14,
|
||||
PgMajorVersion::PG17,
|
||||
];
|
||||
|
||||
for v in prefer_versions {
|
||||
for v in PREFER_VERSIONS {
|
||||
let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap();
|
||||
if tokio::fs::try_exists(&path).await? {
|
||||
return Ok(path);
|
||||
@@ -220,7 +226,7 @@ impl StorageController {
|
||||
"-d",
|
||||
DB_NAME,
|
||||
"-p",
|
||||
&format!("{}", postgres_port),
|
||||
&format!("{postgres_port}"),
|
||||
];
|
||||
let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();
|
||||
let envs = [
|
||||
@@ -263,7 +269,7 @@ impl StorageController {
|
||||
"-h",
|
||||
"localhost",
|
||||
"-p",
|
||||
&format!("{}", postgres_port),
|
||||
&format!("{postgres_port}"),
|
||||
"-U",
|
||||
&username(),
|
||||
"-O",
|
||||
@@ -425,7 +431,7 @@ impl StorageController {
|
||||
// from `LocalEnv`'s config file (`.neon/config`).
|
||||
tokio::fs::write(
|
||||
&pg_data_path.join("postgresql.conf"),
|
||||
format!("port = {}\nfsync=off\n", postgres_port),
|
||||
format!("port = {postgres_port}\nfsync=off\n"),
|
||||
)
|
||||
.await?;
|
||||
|
||||
@@ -477,7 +483,7 @@ impl StorageController {
|
||||
self.setup_database(postgres_port).await?;
|
||||
}
|
||||
|
||||
let database_url = format!("postgresql://localhost:{}/{DB_NAME}", postgres_port);
|
||||
let database_url = format!("postgresql://localhost:{postgres_port}/{DB_NAME}");
|
||||
|
||||
// We support running a startup SQL script to fiddle with the database before we launch storcon.
|
||||
// This is used by the test suite.
|
||||
@@ -508,7 +514,7 @@ impl StorageController {
|
||||
drop(client);
|
||||
conn.await??;
|
||||
|
||||
let addr = format!("{}:{}", host, listen_port);
|
||||
let addr = format!("{host}:{listen_port}");
|
||||
let address_for_peers = Uri::builder()
|
||||
.scheme(scheme)
|
||||
.authority(addr.clone())
|
||||
@@ -557,6 +563,10 @@ impl StorageController {
|
||||
args.push("--use-local-compute-notifications".to_string());
|
||||
}
|
||||
|
||||
if let Some(value) = self.config.kick_secondary_downloads {
|
||||
args.push(format!("--kick-secondary-downloads={value}"));
|
||||
}
|
||||
|
||||
if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() {
|
||||
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
|
||||
}
|
||||
@@ -628,6 +638,28 @@ impl StorageController {
|
||||
args.push("--timelines-onto-safekeepers".to_string());
|
||||
}
|
||||
|
||||
// neon_local is used in test environments where we often have less than 3 safekeepers.
|
||||
if self.config.timeline_safekeeper_count.is_some() || self.env.safekeepers.len() < 3 {
|
||||
let sk_cnt = self
|
||||
.config
|
||||
.timeline_safekeeper_count
|
||||
.unwrap_or(self.env.safekeepers.len());
|
||||
|
||||
args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
|
||||
}
|
||||
|
||||
let mut envs = vec![
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
];
|
||||
|
||||
if let Some(posthog_config) = &self.config.posthog_config {
|
||||
envs.push((
|
||||
"POSTHOG_CONFIG".to_string(),
|
||||
serde_json::to_string(posthog_config)?,
|
||||
));
|
||||
}
|
||||
|
||||
println!("Starting storage controller");
|
||||
|
||||
background_process::start_process(
|
||||
@@ -635,10 +667,7 @@ impl StorageController {
|
||||
&instance_dir,
|
||||
&self.env.storage_controller_bin(),
|
||||
args,
|
||||
vec![
|
||||
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
|
||||
],
|
||||
envs,
|
||||
background_process::InitialPidFile::Create(self.pid_file(start_args.instance_id)),
|
||||
&start_args.start_timeout,
|
||||
|| async {
|
||||
@@ -802,9 +831,9 @@ impl StorageController {
|
||||
builder = builder.json(&body)
|
||||
}
|
||||
if let Some(private_key) = &self.private_key {
|
||||
println!("Getting claims for path {}", path);
|
||||
println!("Getting claims for path {path}");
|
||||
if let Some(required_claims) = Self::get_claims_for_path(&path)? {
|
||||
println!("Got claims {:?} for path {}", required_claims, path);
|
||||
println!("Got claims {required_claims:?} for path {path}");
|
||||
let jwt_token = encode_from_key_file(&required_claims, private_key)?;
|
||||
builder = builder.header(
|
||||
reqwest::header::AUTHORIZATION,
|
||||
|
||||
@@ -36,6 +36,10 @@ enum Command {
|
||||
listen_pg_addr: String,
|
||||
#[arg(long)]
|
||||
listen_pg_port: u16,
|
||||
#[arg(long)]
|
||||
listen_grpc_addr: Option<String>,
|
||||
#[arg(long)]
|
||||
listen_grpc_port: Option<u16>,
|
||||
|
||||
#[arg(long)]
|
||||
listen_http_addr: String,
|
||||
@@ -418,6 +422,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
node_id,
|
||||
listen_pg_addr,
|
||||
listen_pg_port,
|
||||
listen_grpc_addr,
|
||||
listen_grpc_port,
|
||||
listen_http_addr,
|
||||
listen_http_port,
|
||||
listen_https_port,
|
||||
@@ -431,6 +437,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
node_id,
|
||||
listen_pg_addr,
|
||||
listen_pg_port,
|
||||
listen_grpc_addr,
|
||||
listen_grpc_port,
|
||||
listen_http_addr,
|
||||
listen_http_port,
|
||||
listen_https_port,
|
||||
@@ -641,7 +649,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
response
|
||||
.new_shards
|
||||
.iter()
|
||||
.map(|s| format!("{:?}", s))
|
||||
.map(|s| format!("{s:?}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
);
|
||||
@@ -763,8 +771,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
println!("Tenant {tenant_id}");
|
||||
let mut table = comfy_table::Table::new();
|
||||
table.add_row(["Policy", &format!("{:?}", policy)]);
|
||||
table.add_row(["Stripe size", &format!("{:?}", stripe_size)]);
|
||||
table.add_row(["Policy", &format!("{policy:?}")]);
|
||||
table.add_row(["Stripe size", &format!("{stripe_size:?}")]);
|
||||
table.add_row(["Config", &serde_json::to_string_pretty(&config).unwrap()]);
|
||||
println!("{table}");
|
||||
println!("Shards:");
|
||||
@@ -781,7 +789,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
let secondary = shard
|
||||
.node_secondary
|
||||
.iter()
|
||||
.map(|n| format!("{}", n))
|
||||
.map(|n| format!("{n}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",");
|
||||
|
||||
@@ -855,7 +863,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
} else {
|
||||
// Make it obvious to the user that since they've omitted an AZ, we're clearing it
|
||||
eprintln!("Clearing preferred AZ for tenant {}", tenant_id);
|
||||
eprintln!("Clearing preferred AZ for tenant {tenant_id}");
|
||||
}
|
||||
|
||||
// Construct a request that modifies all the tenant's shards
|
||||
@@ -1126,8 +1134,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
Err((tenant_shard_id, from, to, error)) => {
|
||||
failure += 1;
|
||||
println!(
|
||||
"Failed to migrate {} from node {} to node {}: {}",
|
||||
tenant_shard_id, from, to, error
|
||||
"Failed to migrate {tenant_shard_id} from node {from} to node {to}: {error}"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1269,8 +1276,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
concurrency,
|
||||
} => {
|
||||
let mut path = format!(
|
||||
"/v1/tenant/{}/timeline/{}/download_heatmap_layers",
|
||||
tenant_shard_id, timeline_id,
|
||||
"/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
|
||||
);
|
||||
|
||||
if let Some(c) = concurrency {
|
||||
@@ -1295,8 +1301,7 @@ async fn watch_tenant_shard(
|
||||
) -> anyhow::Result<()> {
|
||||
if let Some(until_migrated_to) = until_migrated_to {
|
||||
println!(
|
||||
"Waiting for tenant shard {} to be migrated to node {}",
|
||||
tenant_shard_id, until_migrated_to
|
||||
"Waiting for tenant shard {tenant_shard_id} to be migrated to node {until_migrated_to}"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1319,7 +1324,7 @@ async fn watch_tenant_shard(
|
||||
"attached: {} secondary: {} {}",
|
||||
shard
|
||||
.node_attached
|
||||
.map(|n| format!("{}", n))
|
||||
.map(|n| format!("{n}"))
|
||||
.unwrap_or("none".to_string()),
|
||||
shard
|
||||
.node_secondary
|
||||
@@ -1333,15 +1338,12 @@ async fn watch_tenant_shard(
|
||||
"(reconciler idle)"
|
||||
}
|
||||
);
|
||||
println!("{}", summary);
|
||||
println!("{summary}");
|
||||
|
||||
// Maybe drop out if we finished migration
|
||||
if let Some(until_migrated_to) = until_migrated_to {
|
||||
if shard.node_attached == Some(until_migrated_to) && !shard.is_reconciling {
|
||||
println!(
|
||||
"Tenant shard {} is now on node {}",
|
||||
tenant_shard_id, until_migrated_to
|
||||
);
|
||||
println!("Tenant shard {tenant_shard_id} is now on node {until_migrated_to}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,3 +95,4 @@ echo "Start compute node"
|
||||
-b /usr/local/bin/postgres \
|
||||
--compute-id "compute-${RANDOM}" \
|
||||
--config "${CONFIG_FILE}"
|
||||
--dev
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
"timestamp": "2022-10-12T18:00:00.000Z",
|
||||
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8c",
|
||||
"suspend_timeout_seconds": -1,
|
||||
|
||||
"cluster": {
|
||||
"cluster_id": "docker_compose",
|
||||
|
||||
396
docs/rfcs/040-Endpoint-Persistent-Unlogged-Files-Storage.md
Normal file
396
docs/rfcs/040-Endpoint-Persistent-Unlogged-Files-Storage.md
Normal file
@@ -0,0 +1,396 @@
|
||||
# Memo: Endpoint Persistent Unlogged Files Storage
|
||||
Created on 2024-11-05
|
||||
Implemented on N/A
|
||||
|
||||
## Summary
|
||||
A design for a storage system that allows storage of files required to make
|
||||
Neon's Endpoints have a better experience at or after a reboot.
|
||||
|
||||
## Motivation
|
||||
Several systems inside PostgreSQL (and Neon) need some persistent storage for
|
||||
optimal workings across reboots and restarts, but still work without.
|
||||
Examples are the query-level statistics files of `pg_stat_statements` in
|
||||
`pg_stat/pg_stat_statements.stat`, and `pg_prewarm`'s `autoprewarm.blocks`.
|
||||
We need a storage system that can store and manage these files for each
|
||||
Endpoint, without necessarily granting users access to an unlimited storage
|
||||
device.
|
||||
|
||||
## Goals
|
||||
- Store known files for Endpoints with reasonable persistence.
|
||||
_Data loss in this service, while annoying and bad for UX, won't lose any
|
||||
customer's data._
|
||||
|
||||
## Non Goals (if relevant)
|
||||
- This storage system does not need branching, file versioning, or other such
|
||||
features. The files are as ephemeral to the timeline of the data as the
|
||||
Endpoints that host the data.
|
||||
- This storage system does not need to store _all_ user files, only 'known'
|
||||
user files.
|
||||
- This storage system does not need to be hosted fully inside Computes.
|
||||
_Instead, this will be a separate component similar to Pageserver,
|
||||
SafeKeeper, the S3 proxy used for dynamically loaded extensions, etc._
|
||||
|
||||
## Impacted components
|
||||
- Compute needs new code to load and store these files in its lifetime.
|
||||
- Control Plane needs to consider this new storage system when signalling
|
||||
the deletion of an Endpoint, Timeline, or Tenant.
|
||||
- Control Plane needs to consider this new storage system when it resets
|
||||
or re-assigns an endpoint's timeline/branch state.
|
||||
|
||||
A new service is created: the Endpoint Persistent Unlogged Files Storage
|
||||
service. This could be integrated in e.g. Pageserver or Control Plane, or a
|
||||
separately hosted service.
|
||||
|
||||
## Proposed implementation
|
||||
Endpoint-related data files are managed by a newly designed service (which
|
||||
optionally is integrated in an existing service like Pageserver or Control
|
||||
Plane), which stores data directly into S3 or any blob storage of choice.
|
||||
|
||||
Upon deletion of the Endpoint, or reassignment of the endpoint to a different
|
||||
branch, this ephemeral data is dropped: the data stored may not match the
|
||||
state of the branch's data after reassignment, and on endpoint deletion the
|
||||
data won't have any use to the user.
|
||||
|
||||
Compute gets credentials (JWT token with Tenant, Timeline & Endpoint claims)
|
||||
which it can use to authenticate to this new service and retrieve and store
|
||||
data associated with this endpoint. This limited scope reduces leaks of data
|
||||
across endpoints and timeline resets, and limits the ability of endpoints to
|
||||
mess with other endpoints' data.
|
||||
|
||||
The path of this endpoint data in S3 is initially as follows:
|
||||
|
||||
s3://<regional-epufs-bucket>/
|
||||
tenants/
|
||||
<hex-tenant-id>/
|
||||
tenants/
|
||||
<hex-timeline-id>/
|
||||
endpoints/
|
||||
<endpoint-id>/
|
||||
pgdata/
|
||||
<file_path_in_pgdatadir>
|
||||
|
||||
For other blob storages an equivalent or similar path can be constructed.
|
||||
|
||||
### Reliability, failure modes and corner cases (if relevant)
|
||||
Reliability is important, but not critical to the workings of Neon. The data
|
||||
stored in this service will, when lost, reduce performance, but won't be a
|
||||
cause of permanent data loss - only operational metadata is stored.
|
||||
|
||||
Most, if not all, blob storage services have sufficiently high persistence
|
||||
guarantees to cater our need for persistence and uptime. The only concern with
|
||||
blob storages is that the access latency is generally higher than local disk,
|
||||
but for the object types stored (cache state, ...) I don't think this will be
|
||||
much of an issue.
|
||||
|
||||
### Interaction/Sequence diagram (if relevant)
|
||||
|
||||
In these diagrams you can replace S3 with any persistent storage device of
|
||||
choice, but S3 is chosen as representative name: The well-known and short name
|
||||
of AWS' blob storage. Azure Blob Storage should work too, but it has a much
|
||||
longer name making it less practical for the diagrams.
|
||||
|
||||
Write data:
|
||||
|
||||
```http
|
||||
POST /tenants/<tenant-id>/timelines/<tl-id>/endpoints/<endpoint-id>/pgdata/<the-pgdata-path>
|
||||
Host: epufs.svc.neon.local
|
||||
|
||||
<<<
|
||||
|
||||
200 OK
|
||||
{
|
||||
"version": "<opaque>", # opaque file version token, changes when the file contents change
|
||||
"size": <bytes>,
|
||||
}
|
||||
```
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant co as Compute
|
||||
participant ep as EPUFS
|
||||
participant s3 as Blob Storage
|
||||
|
||||
co-->ep: Connect with credentials
|
||||
co->>+ep: Store Unlogged Persistent File
|
||||
opt is authenticated
|
||||
ep->>s3: Write UPF to S3
|
||||
end
|
||||
ep->>-co: OK / Failure / Auth Failure
|
||||
co-->ep: Cancel connection
|
||||
```
|
||||
|
||||
Read data: (optional with cache-relevant request parameters, e.g. If-Modified-Since)
|
||||
```http
|
||||
GET /tenants/<tenant-id>/timelines/<tl-id>/endpoints/<endpoint-id>/pgdata/<the-pgdata-path>
|
||||
Host: epufs.svc.neon.local
|
||||
|
||||
<<<
|
||||
|
||||
200 OK
|
||||
|
||||
<file data>
|
||||
```
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant co as Compute
|
||||
participant ep as EPUFS
|
||||
participant s3 as Blob Storage
|
||||
|
||||
co->>+ep: Read Unlogged Persistent File
|
||||
opt is authenticated
|
||||
ep->>+s3: Request UPF from storage
|
||||
s3->>-ep: Receive UPF from storage
|
||||
end
|
||||
ep->>-co: OK(response) / Failure(storage, auth, ...)
|
||||
```
|
||||
|
||||
Compute Startup:
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant co as Compute
|
||||
participant ps as Pageserver
|
||||
participant ep as EPUFS
|
||||
participant es as Extension server
|
||||
|
||||
note over co: Bind endpoint ep-xxx
|
||||
par Get basebackup
|
||||
co->>+ps: Request basebackup @ LSN
|
||||
ps-)ps: Construct basebackup
|
||||
ps->>-co: Receive basebackup TAR @ LSN
|
||||
and Get startup-critical Unlogged Persistent Files
|
||||
co->>+ep: Get all UPFs of endpoint ep-xxx
|
||||
ep-)ep: Retrieve and gather all UPFs
|
||||
ep->>-co: TAR of UPFs
|
||||
and Get startup-critical extensions
|
||||
loop For every startup-critical extension
|
||||
co->>es: Get critical extension
|
||||
es->>co: Receive critical extension
|
||||
end
|
||||
end
|
||||
note over co: Start compute
|
||||
```
|
||||
|
||||
CPlane ops:
|
||||
```http
|
||||
DELETE /tenants/<tenant-id>/timelines/<timeline-id>/endpoints/<endpoint-id>
|
||||
Host: epufs.svc.neon.local
|
||||
|
||||
<<<
|
||||
|
||||
200 OK
|
||||
{
|
||||
"tenant": "<tenant-id>",
|
||||
"timeline": "<timeline-id>",
|
||||
"endpoint": "<endpoint-id>",
|
||||
"deleted": {
|
||||
"files": <count>,
|
||||
"bytes": <count>,
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
```http
|
||||
DELETE /tenants/<tenant-id>/timelines/<timeline-id>
|
||||
Host: epufs.svc.neon.local
|
||||
|
||||
<<<
|
||||
|
||||
200 OK
|
||||
{
|
||||
"tenant": "<tenant-id>",
|
||||
"timeline": "<timeline-id>",
|
||||
"deleted": {
|
||||
"files": <count>,
|
||||
"bytes": <count>,
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
```http
|
||||
DELETE /tenants/<tenant-id>
|
||||
Host: epufs.svc.neon.local
|
||||
|
||||
<<<
|
||||
|
||||
200 OK
|
||||
{
|
||||
"tenant": "<tenant-id>",
|
||||
"deleted": {
|
||||
"files": <count>,
|
||||
"bytes": <count>,
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
participant cp as Control Plane
|
||||
participant ep as EPUFS
|
||||
participant s3 as Blob Storage
|
||||
|
||||
alt Tenant deleted
|
||||
cp-)ep: Tenant deleted
|
||||
loop For every object associated with removed tenant
|
||||
ep->>s3: Remove data of deleted tenant from Storage
|
||||
end
|
||||
opt
|
||||
ep-)cp: Tenant cleanup complete
|
||||
end
|
||||
alt Timeline deleted
|
||||
cp-)ep: Timeline deleted
|
||||
loop For every object associated with removed timeline
|
||||
ep->>s3: Remove data of deleted timeline from Storage
|
||||
end
|
||||
opt
|
||||
ep-)cp: Timeline cleanup complete
|
||||
end
|
||||
else Endpoint reassigned or removed
|
||||
cp->>+ep: Endpoint reassigned
|
||||
loop For every object associated with reassigned/removed endpoint
|
||||
ep->>s3: Remove data from Storage
|
||||
end
|
||||
ep->>-cp: Cleanup complete
|
||||
end
|
||||
```
|
||||
|
||||
### Scalability (if relevant)
|
||||
|
||||
Provisionally: As this service is going to be part of compute startup, this
|
||||
service should be able to quickly respond to all requests. Therefore this
|
||||
service is deployed to every AZ we host Computes in, and Computes communicate
|
||||
(generally) only to the EPUFS endpoint of the AZ they're hosted in.
|
||||
|
||||
Local caching of frequently restarted endpoints' data or metadata may be
|
||||
needed for best performance. However, due to the regional nature of stored
|
||||
data but zonal nature of the service deployment, we should be careful when we
|
||||
implement any local caching, as it is possible that computes in AZ 1 will
|
||||
update data originally written and thus cached by AZ 2. Cache version tests
|
||||
and invalidation is therefore required if we want to roll out caching to this
|
||||
service, which is too broad a scope for an MVC. This is why caching is left
|
||||
out of scope for this RFC, and should be considered separately after this RFC
|
||||
is implemented.
|
||||
|
||||
### Security implications (if relevant)
|
||||
This service must be able to authenticate users at least by Tenant ID,
|
||||
Timeline ID and Endpoint ID. This will use the existing JWT infrastructure of
|
||||
Compute, which will be upgraded to the extent needed to support Timeline- and
|
||||
Endpoint-based claims.
|
||||
|
||||
The service requires unlimited access to (a prefix of) a blob storage bucket,
|
||||
and thus must be hosted outside the Compute VM sandbox.
|
||||
|
||||
A service that generates pre-signed request URLs for Compute to download the
|
||||
data from that URL is likely problematic, too: Compute would be able to write
|
||||
unlimited data to the bucket, or exfiltrate this signed URL to get read/write
|
||||
access to specific objects in this bucket, which would still effectively give
|
||||
users access to the S3 bucket (but with improved access logging).
|
||||
|
||||
There may be a use case for transferring data associated with one endpoint to
|
||||
another endpoint (e.g. to make one endpoint warm its caches with the state of
|
||||
another endpoint), but that's not currently in scope, and specific needs may
|
||||
be solved through out-of-line communication of data or pre-signed URLs.
|
||||
|
||||
### Unresolved questions (if relevant)
|
||||
Caching of files is not in the implementation scope of the document, but
|
||||
should at some future point be considered to maximize performance.
|
||||
|
||||
## Alternative implementation (if relevant)
|
||||
Several ideas have come up to solve this issue:
|
||||
|
||||
### Use AUXfile
|
||||
One prevalent idea was to WAL-log the files using our AUXfile mechanism.
|
||||
|
||||
Benefits:
|
||||
|
||||
+ We already have this storage mechanism
|
||||
|
||||
Demerits:
|
||||
|
||||
- It isn't available on read replicas
|
||||
- Additional WAL will be consumed during shutdown and after the shutdown
|
||||
checkpoint, which needs PG modifications to work without panics.
|
||||
- It increases the data we need to manage in our versioned storage, thus
|
||||
causing higher storage costs with higher retention due to duplication at
|
||||
the storage layer.
|
||||
|
||||
### Sign URLs for read/write operations, instead of proxying them
|
||||
|
||||
Benefits:
|
||||
|
||||
+ The service can be implemented with a much reduced IO budget
|
||||
|
||||
Demerits:
|
||||
|
||||
- Users could get access to these signed credentials
|
||||
- Not all blob storage services may implement URL signing
|
||||
|
||||
### Give endpoints each their own directly accessed block volume
|
||||
|
||||
Benefits:
|
||||
|
||||
+ Easier to integrate for PostgreSQL
|
||||
|
||||
Demerits:
|
||||
|
||||
- Little control on data size and contents
|
||||
- Potentially problematic as we'd need to store data all across the pgdata
|
||||
directory.
|
||||
- EBS is not a good candidate
|
||||
- Attaches in 10s of seconds, if not more; i.e. too cold to start
|
||||
- Shared EBS volumes are a no-go, as you'd have to schedule the endpoint
|
||||
with users of the same EBS volumes, which can't work with VM migration
|
||||
- EBS storage costs are very high (>80$/kilotenant when using a
|
||||
volume/tenant)
|
||||
- EBS volumes can't be mounted across AZ boundaries
|
||||
- Bucket per endpoint is unfeasible
|
||||
- S3 buckets are priced at $20/month per 1k, which we could better spend
|
||||
on developers.
|
||||
- Allocating service accounts takes time (100s of ms), and service accounts
|
||||
are a limited resource, too; so they're not a good candidate to allocate
|
||||
on a per-endpoint basis.
|
||||
- Giving credentials limited to prefix has similar issues as the pre-signed
|
||||
URL approach.
|
||||
- Bucket DNS lookup will fill DNS caches and put pressure on DNS lookup
|
||||
much more than our current systems would.
|
||||
- Volumes bound by hypervisor are unlikely
|
||||
- This requires significant investment and increased software on the
|
||||
hypervisor.
|
||||
- It is unclear if we can attach volumes after boot, i.e. for pooled
|
||||
instances.
|
||||
|
||||
### Put the files into a table
|
||||
|
||||
Benefits:
|
||||
|
||||
+ Mostly already available in PostgreSQL
|
||||
|
||||
Demerits:
|
||||
|
||||
- Uses WAL
|
||||
- Can't be used after shutdown checkpoint
|
||||
- Needs a RW endpoint, and table & catalog access to write to this data
|
||||
- Gets hit with DB size limitations
|
||||
- Depending on user acces:
|
||||
- Inaccessible:
|
||||
The user doesn't have control over database size caused by
|
||||
these systems.
|
||||
- Accessible:
|
||||
The user can corrupt these files and cause the system to crash while
|
||||
user-corrupted files are present, thus increasing on-call overhead.
|
||||
|
||||
## Definition of Done (if relevant)
|
||||
|
||||
This project is done if we have:
|
||||
|
||||
- One S3 bucket equivalent per region, which stores this per-endpoint data.
|
||||
- A new service endpoint in at least every AZ, which indirectly grants
|
||||
endpoints access to the data stored for these endpoints in these buckets.
|
||||
- Compute writes & reads temp-data at shutdown and startup, respectively, for
|
||||
at least the pg_prewarm or lfc_prewarm state files.
|
||||
- Cleanup of endpoint data is triggered when the endpoint is deleted or is
|
||||
detached from its current timeline.
|
||||
179
docs/rfcs/044-feature-flag.md
Normal file
179
docs/rfcs/044-feature-flag.md
Normal file
@@ -0,0 +1,179 @@
|
||||
# Storage Feature Flags
|
||||
|
||||
In this RFC, we will describe how we will implement per-tenant feature flags.
|
||||
|
||||
## PostHog as Feature Flag Service
|
||||
|
||||
Before we start, let's talk about how current feature flag services work. PostHog is the feature flag service we are currently using across multiple user-facing components in the company. PostHog has two modes of operation: HTTP evaluation and server-side local evaluation.
|
||||
|
||||
Let's assume we have a storage feature flag called gc-compaction and we want to roll it out to scale-tier users with resident size >= 10GB and <= 100GB.
|
||||
|
||||
### Define User Profiles
|
||||
|
||||
The first step is to synchronize our user profiles to the PostHog service. We can simply assume that each tenant is a user in PostHog. Each user profile has some properties associated with it. In our case, it will be: plan type (free, scale, enterprise, etc); resident size (in bytes); primary pageserver (string); region (string).
|
||||
|
||||
### Define Feature Flags
|
||||
|
||||
We would create a feature flag called gc-compaction in PostHog with 4 variants: disabled, stage-1, stage-2, fully-enabled. We will flip the feature flags from disabled to fully-enabled stage by stage for some percentage of our users.
|
||||
|
||||
### Option 1: HTTP Evaluation Mode
|
||||
|
||||
When using PostHog's HTTP evaluation mode, the client will make request to the PostHog service, asking for the value of a feature flag for a specific user.
|
||||
|
||||
* Control plane will report the plan type to PostHog each time it attaches a tenant to the storcon or when the user upgrades/downgrades. It calls the PostHog profile API to associate tenant ID with the plan type. Assume we have X active tenants and such attach or plan change event happens each week, that would be 4X profile update requests per month.
|
||||
* Pageservers will report the resident size and the primary pageserver to the PostHog service. Assume we report resident size every 24 hours, that would be 30X requests per month.
|
||||
* Each tenant will request the state of the feature flag every 1 hour, that's 720X requests per month.
|
||||
* The Rust client would be easy to implement as we only need to call the `/decide` API on PostHog.
|
||||
|
||||
Using the HTTP evaluation mode we will issue 754X requests a month.
|
||||
|
||||
### Option 2: Local Evaluation Mode
|
||||
|
||||
When using PostHog's HTTP evaluation mode, the client (usually the server in a browser/server architecture) will poll the feature flag configuration every 30s (default in the Python client) from PostHog. Such configuration contains data like:
|
||||
|
||||
<details>
|
||||
|
||||
<summary>Example JSON response from the PostHog local evaluation API</summary>
|
||||
|
||||
```
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Beta Feature",
|
||||
"key": "person-flag",
|
||||
"is_simple_flag": True,
|
||||
"active": True,
|
||||
"filters": {
|
||||
"groups": [
|
||||
{
|
||||
"properties": [
|
||||
{
|
||||
"key": "location",
|
||||
"operator": "exact",
|
||||
"value": ["Straße"],
|
||||
"type": "person",
|
||||
}
|
||||
],
|
||||
"rollout_percentage": 100,
|
||||
},
|
||||
{
|
||||
"properties": [
|
||||
{
|
||||
"key": "star",
|
||||
"operator": "exact",
|
||||
"value": ["ſun"],
|
||||
"type": "person",
|
||||
}
|
||||
],
|
||||
"rollout_percentage": 100,
|
||||
},
|
||||
],
|
||||
},
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
Note that the API only contains information like "under what condition => rollout percentage". The user is responsible to provide the properties required to the client for local evaluation, and the PostHog service (web UI) cannot know if a feature is enabled for the tenant or not until the client uses the `capture` API to report the result back. To control the rollout percentage, the user ID gets mapped to a float number in `[0, 1)` on a consistent hash ring. All values <= the percentage will get the feature enabled or set to the desired value.
|
||||
|
||||
To use the local evaluation mode, the system needs:
|
||||
|
||||
* Assume each pageserver will poll PostHog for the local evaluation JSON every 5 minutes (instead of the 30s default as it's too frequent). That's 8640Y per month, Y is the number of pageservers. Local evaluation requests cost 10x more than the normal decide request, so that's 86400Y request units to bill.
|
||||
* Storcon needs to store the plan type in the database and pass that information to the pageserver when attaching the tenant.
|
||||
* Storcon also needs to update PostHog with the active tenants, for example, when the tenant gets detached/attached. Assume each active tenant gets detached/attached every week, that would be 4X requests per month.
|
||||
* We do not need to update bill type or resident size to PostHog as all these are evaluated locally.
|
||||
* After each local evaluation of the feature flag, we need to call PostHog's capture event API to update the result of the evaluation that the feature is enabled. We can do this when the flag gets changed compared with the last cached state in memory. That would be at least 4X (assume we do deployment every week so the cache gets cleared) and maybe an additional multiplifier of 10 assume we have 10 active features.
|
||||
|
||||
In this case, we will issue 86400Y + 40X requests per month.
|
||||
|
||||
Assume X = 1,000,000 and Y = 100,
|
||||
|
||||
| | HTTP Evaluation | Local Evaluation |
|
||||
|---|---|---|
|
||||
| Latency of propagating the conditions/properties for feature flag | 24 hours | available locally |
|
||||
| Latency of applying the feature flag | 1 hour | 5 minutes |
|
||||
| Can properties be reported from different services | Yes | No |
|
||||
| Do we need to sync billing info etc to pageserver | No | Yes |
|
||||
| Cost | 75400$ / month | 4864$ / month |
|
||||
|
||||
# Our Solution
|
||||
|
||||
We will use PostHog _only_ as an UI to configure the feature flags. Whether a feature is enabled or not can only be queried through storcon/pageserver instead of using the PostHog UI. (We could report it back to PostHog via `capture_event` but it costs $$$.) This allows us to ramp up the feature flag functionality fast at first. At the same time, it would also give us the option to migrate to our own solution once we want to have more properties and more complex evaluation rules in our system.
|
||||
|
||||
* We will create several fake users (tenants) in PostHog that contains all the properties we will use for evaluating a feature flag (i.e., resident size, billing type, pageserver id, etc.)
|
||||
* We will use PostHog's local evaluation API to poll the configuration of the feature flags and evaluate them locally on each of the pageserver.
|
||||
* The evaluation result will not be reported back to PostHog.
|
||||
* Storcon needs to pull some information from cplane database.
|
||||
* To know if a feature is currently enabled or not, we need to call the storcon/pageserver API; and we won't be able to know if a feature has been enabled on a tenant before easily: we need to look at the Grafana logs.
|
||||
|
||||
We only need to pay for the 86400Y local evaluation requests (that would be setting Y=0 in solution 2 => $864/month, and even less if we proxy it through storcon).
|
||||
|
||||
## Implementation
|
||||
|
||||
* Pageserver: implement a PostHog local evaluation client. The client will be shared across all tenants on the pageserver with a single API: `evaluate(tenant_id, feature_flag, properties) -> json`.
|
||||
* Storcon: if we need plan type as the evaluation condition, pull it from cplane database.
|
||||
* Storcon/Pageserver: implement an HTTP API `:tenant_id/feature/:feature` to retrieve the current feature flag status.
|
||||
* Storcon/Pageserver: a loop to update the feature flag spec on both storcon and pageserver. Pageserver loop will only be activated if storcon does not push the specs to the pageserver.
|
||||
|
||||
## Difference from Tenant Config
|
||||
|
||||
* Feature flags can be modified by percentage, and the default config for each feature flag can be modified in UI without going through the release process.
|
||||
* Feature flags are more flexible and won't be persisted anywhere and will be passed as plain JSON over the wire so that do not need to handle backward/forward compatibility as in tenant config.
|
||||
* The expectation of tenant config is that once we add a flag we cannot remove it (or it will be hard to remove), but feature flags are more flexible.
|
||||
|
||||
# Final Implementation
|
||||
|
||||
* We added a new crate `posthog_lite_client` that supports local feature evaluations.
|
||||
* We set up two projects "Storage (staging)" and "Storage (production)" in the PostHog console.
|
||||
* Each pageserver reports 10 fake tenants to PostHog so that we can get all combinations of regions (and other properties) in the PostHog UI.
|
||||
* Supported properties: AZ, neon_region, pageserver, tenant_id.
|
||||
* You may use "Pageserver Feature Flags" dashboard to see the evaluation status.
|
||||
* The feature flag spec is polled on storcon every 30s (in each of the region) and storcon will propagate the spec to the pageservers.
|
||||
* The pageserver housekeeping loop updates the tenant-specific properties (e.g., remote size) for evaluation.
|
||||
|
||||
Each tenant has a `feature_resolver` object. After you add a feature flag in the PostHog console, you can retrieve it with:
|
||||
|
||||
```rust
|
||||
// Boolean flag
|
||||
self
|
||||
.feature_resolver
|
||||
.evaluate_boolean("flag")
|
||||
.is_ok()
|
||||
// Multivariate flag
|
||||
self
|
||||
.feature_resolver
|
||||
.evaluate_multivariate("gc-comapction-strategy")
|
||||
.ok();
|
||||
```
|
||||
|
||||
The user needs to handle the case where the evaluation result is an error. This can occur in a variety of cases:
|
||||
|
||||
* During the pageserver start, the feature flag spec has not been retrieved.
|
||||
* No condition group is matched.
|
||||
* The feature flag spec contains an operand/operation not supported by the lite PostHog library.
|
||||
|
||||
For boolean flags, the return value is `Result<(), Error>`. `Ok(())` means the flag is evaluated to true. Otherwise,
|
||||
there is either an error in evaluation or it does not match any groups.
|
||||
|
||||
For multivariate flags, the return value is `Result<String, Error>`. `Ok(variant)` indicates the flag is evaluated
|
||||
to a variant. Otherwise, there is either an error in evaluation or it does not match any groups.
|
||||
|
||||
The evaluation logic is documented in the PostHog lite library. It compares the consistent hash of a flag key + tenant_id
|
||||
with the rollout percentage and determines which tenant to roll out a specific feature.
|
||||
|
||||
Users can use the feature flag evaluation API to get the flag evaluation result of a specific tenant for debugging purposes.
|
||||
|
||||
```
|
||||
curl http://localhost:9898/v1/tenant/:tenant_id/feature_flag?flag=:key&as=multivariate/boolean"
|
||||
```
|
||||
|
||||
By default, the storcon pushes the feature flag specs to the pageservers every 30 seconds, which means that a change in feature flag in the
|
||||
PostHog UI will propagate to the pageservers within 30 seconds.
|
||||
|
||||
# Future Works
|
||||
|
||||
* Support dynamic tenant properties like logical size as the evaluation condition.
|
||||
* Support properties like `plan_type` (needs cplane to pass it down).
|
||||
* Report feature flag evaluation result back to PostHog (if the cost is okay).
|
||||
* Fast feature flag evaluation cache on critical paths (e.g., cache a feature flag result in `AtomicBool` and use it on the read path).
|
||||
399
docs/rfcs/2025-03-17-compute-prewarm.md
Normal file
399
docs/rfcs/2025-03-17-compute-prewarm.md
Normal file
@@ -0,0 +1,399 @@
|
||||
# Compute rolling restart with prewarm
|
||||
|
||||
Created on 2025-03-17
|
||||
Implemented on _TBD_
|
||||
Author: Alexey Kondratov (@ololobus)
|
||||
|
||||
## Summary
|
||||
|
||||
This RFC describes an approach to reduce performance degradation due to missing caches after compute node restart, i.e.:
|
||||
|
||||
1. Rolling restart of the running instance via 'warm' replica.
|
||||
2. Auto-prewarm compute caches after unplanned restart or scale-to-zero.
|
||||
|
||||
## Motivation
|
||||
|
||||
Neon currently implements several features that guarantee high uptime of compute nodes:
|
||||
|
||||
1. Storage high-availability (HA), i.e. each tenant shard has a secondary pageserver location, so we can quickly switch over compute to it in case of primary pageserver failure.
|
||||
2. Fast compute provisioning, i.e. we have a fleet of pre-created empty computes, that are ready to serve workload, so restarting unresponsive compute is very fast.
|
||||
3. Preemptive NeonVM compute provisioning in case of k8s node unavailability.
|
||||
|
||||
This helps us to be well-within the uptime SLO of 99.95% most of the time. Problems begin when we go up to multi-TB workloads and 32-64 CU computes.
|
||||
During restart, compute loses all caches: LFC, shared buffers, file system cache. Depending on the workload, it can take a lot of time to warm up the caches,
|
||||
so that performance could be degraded and might be even unacceptable for certain workloads. The latter means that although current approach works well for small to
|
||||
medium workloads, we still have to do some additional work to avoid performance degradation after restart of large instances.
|
||||
|
||||
## Non Goals
|
||||
|
||||
- Details of the persistence storage for prewarm data are out of scope, there is a separate RFC for that: <https://github.com/neondatabase/neon/pull/9661>.
|
||||
- Complete compute/Postgres HA setup and flow. Although it was originally in scope of this RFC, during preliminary research it appeared to be a rabbit hole, so it's worth of a separate RFC.
|
||||
- Low-level implementation details for Postgres replica-to-primary promotion. There are a lot of things to think and care about: how to start walproposer, [logical replication failover](https://www.postgresql.org/docs/current/logical-replication-failover.html), and so on, but it's worth of at least a separate one-pager design document if not RFC.
|
||||
|
||||
## Impacted components
|
||||
|
||||
Postgres, compute_ctl, Control plane, Endpoint storage for unlogged storage of compute files.
|
||||
For the latter, we will need to implement a uniform abstraction layer on top of S3, ABS, etc., but
|
||||
S3 is used in text interchangeably with 'endpoint storage' for simplicity.
|
||||
|
||||
## Proposed implementation
|
||||
|
||||
### compute_ctl spec changes and auto-prewarm
|
||||
|
||||
We are going to extend the current compute spec with the following attributes
|
||||
|
||||
```rust
|
||||
struct ComputeSpec {
|
||||
/// [All existing attributes]
|
||||
...
|
||||
/// Whether to do auto-prewarm at start or not.
|
||||
/// Default to `false`.
|
||||
pub lfc_auto_prewarm: bool
|
||||
/// Interval in seconds between automatic dumps of
|
||||
/// LFC state into S3. Default `None`, which means 'off'.
|
||||
pub lfc_dump_interval_sec: Option<i32>
|
||||
}
|
||||
```
|
||||
|
||||
When `lfc_dump_interval_sec` is set to `N`, `compute_ctl` will periodically dump the LFC state
|
||||
and store it in S3, so that it could be used either for auto-prewarm after restart or by replica
|
||||
during the rolling restart. For enabling periodic dumping, we should consider the following value
|
||||
`lfc_dump_interval_sec=300` (5 minutes), same as in the upstream's `pg_prewarm.autoprewarm_interval`.
|
||||
|
||||
When `lfc_auto_prewarm` is set to `true`, `compute_ctl` will start prewarming the LFC upon restart
|
||||
iif some of the previous states is present in S3.
|
||||
|
||||
### compute_ctl API
|
||||
|
||||
1. `POST /store_lfc_state` -- dump LFC state using Postgres SQL interface and store result in S3.
|
||||
This has to be a blocking call, i.e. it will return only after the state is stored in S3.
|
||||
If there is any concurrent request in progress, we should return `429 Too Many Requests`,
|
||||
and let the caller to retry.
|
||||
|
||||
2. `GET /dump_lfc_state` -- dump LFC state using Postgres SQL interface and return it as is
|
||||
in text format suitable for the future restore/prewarm. This API is not strictly needed at
|
||||
the end state, but could be useful for a faster prototyping of a complete rolling restart flow
|
||||
with prewarm, as it doesn't require persistent for LFC state storage.
|
||||
|
||||
3. `POST /restore_lfc_state` -- restore/prewarm LFC state with request
|
||||
|
||||
```yaml
|
||||
RestoreLFCStateRequest:
|
||||
oneOf:
|
||||
- type: object
|
||||
required:
|
||||
- lfc_state
|
||||
properties:
|
||||
lfc_state:
|
||||
type: string
|
||||
description: Raw LFC content dumped with GET `/dump_lfc_state`
|
||||
- type: object
|
||||
required:
|
||||
- lfc_cache_key
|
||||
properties:
|
||||
lfc_cache_key:
|
||||
type: string
|
||||
description: |
|
||||
endpoint_id of the source endpoint on the same branch
|
||||
to use as a 'donor' for LFC content. Compute will look up
|
||||
LFC content dump in S3 using this key and do prewarm.
|
||||
```
|
||||
|
||||
where `lfc_state` and `lfc_cache_key` are mutually exclusive.
|
||||
|
||||
The actual prewarming will happen asynchronously, so the caller need to check the
|
||||
prewarm status using the compute's standard `GET /status` API.
|
||||
|
||||
4. `GET /status` -- extend existing API with following attributes
|
||||
|
||||
```rust
|
||||
struct ComputeStatusResponse {
|
||||
// [All existing attributes]
|
||||
...
|
||||
pub prewarm_state: PrewarmState
|
||||
}
|
||||
|
||||
/// Compute prewarm state. Will be stored in the shared Compute state
|
||||
/// in compute_ctl
|
||||
struct PrewarmState {
|
||||
pub status: PrewarmStatus
|
||||
/// Total number of pages to prewarm
|
||||
pub pages_total: i64
|
||||
/// Number of pages prewarmed so far
|
||||
pub pages_processed: i64
|
||||
/// Optional prewarm error
|
||||
pub error: Option<String>
|
||||
}
|
||||
|
||||
pub enum PrewarmStatus {
|
||||
/// Prewarming was never requested on this compute
|
||||
Off,
|
||||
/// Prewarming was requested, but not started yet
|
||||
Pending,
|
||||
/// Prewarming is in progress. The caller should follow
|
||||
/// `PrewarmState::progress`.
|
||||
InProgress,
|
||||
/// Prewarming has been successfully completed
|
||||
Completed,
|
||||
/// Prewarming failed. The caller should look at
|
||||
/// `PrewarmState::error` for the reason.
|
||||
Failed,
|
||||
/// It is intended to be used by auto-prewarm if none of
|
||||
/// the previous LFC states is available in S3.
|
||||
/// This is a distinct state from the `Failed` because
|
||||
/// technically it's not a failure and could happen if
|
||||
/// compute was restart before it dumped anything into S3,
|
||||
/// or just after the initial rollout of the feature.
|
||||
Skipped,
|
||||
}
|
||||
```
|
||||
|
||||
5. `POST /promote` -- this is a **blocking** API call to promote compute replica into primary.
|
||||
This API should be very similar to the existing `POST /configure` API, i.e. accept the
|
||||
spec (primary spec, because originally compute was started as replica). It's a distinct
|
||||
API method because semantics and response codes are different:
|
||||
|
||||
- If promotion is done successfully, it will return `200 OK`.
|
||||
- If compute is already primary, the call will be no-op and `compute_ctl`
|
||||
will return `412 Precondition Failed`.
|
||||
- If, for some reason, second request reaches compute that is in progress of promotion,
|
||||
it will respond with `429 Too Many Requests`.
|
||||
- If compute hit any permanent failure during promotion `500 Internal Server Error`
|
||||
will be returned.
|
||||
|
||||
### Control plane operations
|
||||
|
||||
The complete flow will be present as a sequence diagram in the next section, but here
|
||||
we just want to list some important steps that have to be done by control plane during
|
||||
the rolling restart via warm replica, but without much of low-level implementation details.
|
||||
|
||||
1. Register the 'intent' of the instance restart, but not yet interrupt any workload at
|
||||
primary and also accept new connections. This may require some endpoint state machine
|
||||
changes, e.g. introduction of the `pending_restart` state. Being in this state also
|
||||
**mustn't prevent any other operations except restart**: suspend, live-reconfiguration
|
||||
(e.g. due to notify-attach call from the storage controller), deletion.
|
||||
|
||||
2. Start new replica compute on the same timeline and start prewarming it. This process
|
||||
may take quite a while, so the same concurrency considerations as in 1. should be applied
|
||||
here as well.
|
||||
|
||||
3. When warm replica is ready, control plane should:
|
||||
|
||||
3.1. Terminate the primary compute. Starting from here, **this is a critical section**,
|
||||
if anything goes off, the only option is to start the primary normally and proceed
|
||||
with auto-prewarm.
|
||||
|
||||
3.2. Send cache invalidation message to all proxies, notifying them that all new connections
|
||||
should request and wait for the new connection details. At this stage, proxy has to also
|
||||
drop any existing connections to the old primary, so they didn't do stale reads.
|
||||
|
||||
3.3. Attach warm replica compute to the primary endpoint inside control plane metadata
|
||||
database.
|
||||
|
||||
3.4. Promote replica to primary.
|
||||
|
||||
3.5. When everything is done, finalize the endpoint state to be just `active`.
|
||||
|
||||
### Complete rolling restart flow
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
|
||||
autonumber
|
||||
|
||||
participant proxy as Neon proxy
|
||||
|
||||
participant cplane as Control plane
|
||||
|
||||
participant primary as Compute (primary)
|
||||
box Compute (replica)
|
||||
participant ctl as compute_ctl
|
||||
participant pg as Postgres
|
||||
end
|
||||
|
||||
box Endpoint unlogged storage
|
||||
participant s3proxy as Endpoint storage service
|
||||
participant s3 as S3/ABS/etc.
|
||||
end
|
||||
|
||||
|
||||
cplane ->> primary: POST /store_lfc_state
|
||||
primary -->> cplane: 200 OK
|
||||
|
||||
cplane ->> ctl: POST /restore_lfc_state
|
||||
activate ctl
|
||||
ctl -->> cplane: 202 Accepted
|
||||
|
||||
activate cplane
|
||||
cplane ->> ctl: GET /status: poll prewarm status
|
||||
ctl ->> s3proxy: GET /read_file
|
||||
s3proxy ->> s3: read file
|
||||
s3 -->> s3proxy: file content
|
||||
s3proxy -->> ctl: 200 OK: file content
|
||||
|
||||
proxy ->> cplane: GET /proxy_wake_compute
|
||||
cplane -->> proxy: 200 OK: old primary conninfo
|
||||
|
||||
ctl ->> pg: prewarm LFC
|
||||
activate pg
|
||||
pg -->> ctl: prewarm is completed
|
||||
deactivate pg
|
||||
|
||||
ctl -->> cplane: 200 OK: prewarm is completed
|
||||
deactivate ctl
|
||||
deactivate cplane
|
||||
|
||||
cplane -->> cplane: reassign replica compute to endpoint,<br>start terminating the old primary compute
|
||||
activate cplane
|
||||
cplane ->> proxy: invalidate caches
|
||||
|
||||
proxy ->> cplane: GET /proxy_wake_compute
|
||||
|
||||
cplane -x primary: POST /terminate
|
||||
primary -->> cplane: 200 OK
|
||||
note over primary: old primary<br>compute terminated
|
||||
|
||||
cplane ->> ctl: POST /promote
|
||||
activate ctl
|
||||
ctl ->> pg: pg_ctl promote
|
||||
activate pg
|
||||
pg -->> ctl: done
|
||||
deactivate pg
|
||||
ctl -->> cplane: 200 OK
|
||||
deactivate ctl
|
||||
|
||||
cplane -->> cplane: finalize operation
|
||||
cplane -->> proxy: 200 OK: new primary conninfo
|
||||
deactivate cplane
|
||||
```
|
||||
|
||||
### Network bandwidth and prewarm speed
|
||||
|
||||
It's currently known that pageserver can sustain about 3000 RPS per shard for a few running computes.
|
||||
Large tenants are usually split into 8 shards, so the final formula may look like this:
|
||||
|
||||
```text
|
||||
8 shards * 3000 RPS * 8 KB =~ 190 MB/s
|
||||
```
|
||||
|
||||
so depending on the LFC size, prewarming will take at least:
|
||||
|
||||
- ~5s for 1 GB
|
||||
- ~50s for 10 GB
|
||||
- ~5m for 100 GB
|
||||
- \>1h for 1 TB
|
||||
|
||||
In total, one pageserver is normally capped by 30k RPS, so it obviously can't sustain many computes
|
||||
doing prewarm at the same time. Later, we may need an additional mechanism for computes to throttle
|
||||
the prewarming requests gracefully.
|
||||
|
||||
### Reliability, failure modes and corner cases
|
||||
|
||||
We consider following failures while implementing this RFC:
|
||||
|
||||
1. Compute got interrupted/crashed/restarted during prewarm. The caller -- control plane -- should
|
||||
detect that and start prewarm from the beginning.
|
||||
|
||||
2. Control plane promotion request timed out or hit network issues. If it never reached the
|
||||
compute, control plane should just repeat it. If it did reach the compute, then during
|
||||
retry control plane can hit `409` as previous request triggered the promotion already.
|
||||
In this case, control plane need to retry until either `200` or
|
||||
permanent error `500` is returned.
|
||||
|
||||
3. Compute got interrupted/crashed/restarted during promotion. At restart it will ask for
|
||||
a spec from control plane, and its content should signal compute to start as **primary**,
|
||||
so it's expected that control plane will continue polling for certain period of time and
|
||||
will discover that compute is ready to accept connections if restart is fast enough.
|
||||
|
||||
4. Any other unexpected failure or timeout during prewarming. This **failure mustn't be fatal**,
|
||||
control plane has to report failure, terminate replica and keep primary running.
|
||||
|
||||
5. Any other unexpected failure or timeout during promotion. Unfortunately, at this moment
|
||||
we already have the primary node stopped, so the only option is to start primary again
|
||||
and proceed with auto-prewarm.
|
||||
|
||||
6. Any unexpected failure during auto-prewarm. This **failure mustn't be fatal**,
|
||||
`compute_ctl` has to report the failure, but do not crash the compute.
|
||||
|
||||
7. Control plane failed to confirm that old primary has terminated. This can happen, especially
|
||||
in the future HA setup. In this case, control plane has to ensure that it sent VM deletion
|
||||
and pod termination requests to k8s, so long-term we do not have two running primaries
|
||||
on the same timeline.
|
||||
|
||||
### Security implications
|
||||
|
||||
There are two security implications to consider:
|
||||
|
||||
1. Access to `compute_ctl` API. It has to be accessible from the outside of compute, so all
|
||||
new API methods have to be exposed on the **external** HTTP port and **must** be authenticated
|
||||
with JWT.
|
||||
|
||||
2. Read/write only your own LFC state data in S3. Although it's not really a security concern,
|
||||
since LFC state is just a mapping of blocks present in LFC at certain moment in time;
|
||||
it still has to be highly restricted, so that i) only computes on the same timeline can
|
||||
read S3 state; ii) each compute can only write to the path that contains it's `endpoint_id`.
|
||||
Both of this must be validated by Endpoint storage service using the JWT token provided by `compute_ctl`.
|
||||
|
||||
### Unresolved questions
|
||||
|
||||
#### Billing, metrics and monitoring
|
||||
|
||||
Currently, we only label computes with `endpoint_id` after attaching them to the endpoint.
|
||||
In this proposal, this means that temporary replica will remain unlabelled until it's promoted
|
||||
to primary. We can also hide it from users in the control plane API, but what to do with
|
||||
billing and monitoring is still unclear.
|
||||
|
||||
We can probably mark it as 'billable' and tag with `project_id`, so it will be billed, but
|
||||
not interfere in any way with the current primary monitoring.
|
||||
|
||||
Another thing to consider is how logs and metrics export will switch to the new compute.
|
||||
It's expected that OpenTelemetry collector will auto-discover the new compute and start
|
||||
scraping metrics from it.
|
||||
|
||||
#### Auto-prewarm
|
||||
|
||||
It's still an open question whether we need auto-prewarm at all. The author's gut-feeling is
|
||||
that yes, we need it, but might be not for all workloads, so it could end up exposed as a
|
||||
user-controllable knob on the endpoint. There are two arguments for that:
|
||||
|
||||
1. Auto-prewarm existing in upstream's `pg_prewarm`, _probably for a reason_.
|
||||
|
||||
2. There are still could be 2 flows when we cannot perform the rolling restart via the warm
|
||||
replica: i) any failure or interruption during promotion; ii) wake up after scale-to-zero.
|
||||
The latter might be challenged as well, i.e. one can argue that auto-prewarm may and will
|
||||
compete with user-workload for storage resources. This is correct, but it might as well
|
||||
reduce the time to get warm LFC and good performance.
|
||||
|
||||
#### Low-level details of the replica promotion
|
||||
|
||||
There are many things to consider here, but three items just off the top of my head:
|
||||
|
||||
1. How to properly start the `walproposer` inside Postgres.
|
||||
|
||||
2. What to do with logical replication. Currently, we do not include logical replication slots
|
||||
inside basebackup, because nobody advances them at replica, so they just prevent the WAL
|
||||
deletion. Yet, we do need to have them at primary after promotion. Starting with Postgres 17,
|
||||
there is a new feature called
|
||||
[logical replication failover](https://www.postgresql.org/docs/current/logical-replication-failover.html)
|
||||
and `synchronized_standby_slots` setting, but we need a plan for the older versions. Should we
|
||||
request a new basebackup during promotion?
|
||||
|
||||
3. How do we guarantee that replica will receive all the latest WAL from safekeepers? Do some
|
||||
'shallow' version of sync safekeepers without data copying? Or just a standard version of
|
||||
sync safekeepers?
|
||||
|
||||
## Alternative implementation
|
||||
|
||||
The proposal already assumes one of the alternatives -- do not have any persistent storage for
|
||||
LFC state. This is possible to implement faster with the proposed API, but it means that
|
||||
we do not implement auto-prewarm yet.
|
||||
|
||||
## Definition of Done
|
||||
|
||||
At the end of implementing this RFC we should have two high-level settings that enable:
|
||||
|
||||
1. Auto-prewarm of user computes upon restart.
|
||||
2. Perform primary compute restart via the warm replica promotion.
|
||||
|
||||
It also has to be decided what's the criteria for enabling one or both of these flows for
|
||||
certain clients.
|
||||
@@ -374,7 +374,7 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
|
||||
let request = Request::builder()
|
||||
.uri(format!("/{tenant}/{timeline}/{endpoint}/sub/path/key"))
|
||||
.method(method)
|
||||
.header("Authorization", format!("Bearer {}", token))
|
||||
.header("Authorization", format!("Bearer {token}"))
|
||||
.body(Body::empty())
|
||||
.unwrap();
|
||||
let status = ServiceExt::ready(&mut app)
|
||||
|
||||
@@ -31,13 +31,12 @@ struct Args {
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
struct Config {
|
||||
#[serde(default = "listen")]
|
||||
listen: std::net::SocketAddr,
|
||||
pemfile: camino::Utf8PathBuf,
|
||||
#[serde(flatten)]
|
||||
storage_config: remote_storage::RemoteStorageConfig,
|
||||
storage_kind: remote_storage::TypedRemoteStorageKind,
|
||||
#[serde(default = "max_upload_file_limit")]
|
||||
max_upload_file_limit: usize,
|
||||
}
|
||||
@@ -70,7 +69,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
let listener = tokio::net::TcpListener::bind(config.listen).await.unwrap();
|
||||
info!("listening on {}", listener.local_addr().unwrap());
|
||||
|
||||
let storage = remote_storage::GenericRemoteStorage::from_config(&config.storage_config).await?;
|
||||
let storage =
|
||||
remote_storage::GenericRemoteStorage::from_storage_kind(config.storage_kind).await?;
|
||||
let cancel = tokio_util::sync::CancellationToken::new();
|
||||
if !args.no_s3_check_on_startup {
|
||||
app::check_storage_permissions(&storage, cancel.clone()).await?;
|
||||
|
||||
@@ -12,6 +12,7 @@ jsonwebtoken.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
regex.workspace = true
|
||||
url.workspace = true
|
||||
|
||||
utils = { path = "../utils" }
|
||||
remote_storage = { version = "0.1", path = "../remote_storage/" }
|
||||
|
||||
@@ -16,6 +16,7 @@ pub static COMPUTE_AUDIENCE: &str = "compute";
|
||||
pub enum ComputeClaimsScope {
|
||||
/// An admin-scoped token allows access to all of `compute_ctl`'s authorized
|
||||
/// facilities.
|
||||
#[serde(rename = "compute_ctl:admin")]
|
||||
Admin,
|
||||
}
|
||||
|
||||
@@ -24,7 +25,7 @@ impl FromStr for ComputeClaimsScope {
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"admin" => Ok(ComputeClaimsScope::Admin),
|
||||
"compute_ctl:admin" => Ok(ComputeClaimsScope::Admin),
|
||||
_ => Err(anyhow::anyhow!("invalid compute claims scope \"{s}\"")),
|
||||
}
|
||||
}
|
||||
@@ -80,3 +81,23 @@ pub struct SetRoleGrantsRequest {
|
||||
pub privileges: Vec<Privilege>,
|
||||
pub role: PgIdent,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::requests::ComputeClaimsScope;
|
||||
|
||||
/// Confirm that whether we parse the scope by string or through serde, the
|
||||
/// same values parse to the same enum variant.
|
||||
#[test]
|
||||
fn compute_request_scopes() {
|
||||
const ADMIN_SCOPE: &str = "compute_ctl:admin";
|
||||
|
||||
let from_serde: ComputeClaimsScope =
|
||||
serde_json::from_str(&format!("\"{ADMIN_SCOPE}\"")).unwrap();
|
||||
let from_str = ComputeClaimsScope::from_str(ADMIN_SCOPE).unwrap();
|
||||
|
||||
assert_eq!(from_serde, from_str);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,6 +83,16 @@ pub struct ComputeStatusResponse {
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum TerminateMode {
|
||||
#[default]
|
||||
/// wait 30s till returning from /terminate to allow control plane to get the error
|
||||
Fast,
|
||||
/// return from /terminate immediately as soon as all components are terminated
|
||||
Immediate,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ComputeStatus {
|
||||
@@ -103,11 +113,16 @@ pub enum ComputeStatus {
|
||||
// control-plane to terminate it.
|
||||
Failed,
|
||||
// Termination requested
|
||||
TerminationPending,
|
||||
TerminationPending { mode: TerminateMode },
|
||||
// Terminated Postgres
|
||||
Terminated,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
pub struct TerminateResponse {
|
||||
pub lsn: Option<utils::lsn::Lsn>,
|
||||
}
|
||||
|
||||
impl Display for ComputeStatus {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
@@ -117,7 +132,7 @@ impl Display for ComputeStatus {
|
||||
ComputeStatus::Running => f.write_str("running"),
|
||||
ComputeStatus::Configuration => f.write_str("configuration"),
|
||||
ComputeStatus::Failed => f.write_str("failed"),
|
||||
ComputeStatus::TerminationPending => f.write_str("termination-pending"),
|
||||
ComputeStatus::TerminationPending { .. } => f.write_str("termination-pending"),
|
||||
ComputeStatus::Terminated => f.write_str("terminated"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,11 +4,14 @@
|
||||
//! provide it by calling the compute_ctl's `/compute_ctl` endpoint, or
|
||||
//! compute_ctl can fetch it by calling the control plane's API.
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
|
||||
use anyhow::anyhow;
|
||||
use indexmap::IndexMap;
|
||||
use regex::Regex;
|
||||
use remote_storage::RemotePath;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use url::Url;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -181,6 +184,11 @@ pub struct ComputeSpec {
|
||||
/// Download LFC state from endpoint_storage and pass it to Postgres on startup
|
||||
#[serde(default)]
|
||||
pub autoprewarm: bool,
|
||||
|
||||
/// Suspend timeout in seconds.
|
||||
///
|
||||
/// We use this value to derive other values, such as the installed extensions metric.
|
||||
pub suspend_timeout_seconds: i64,
|
||||
}
|
||||
|
||||
/// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
|
||||
@@ -429,6 +437,47 @@ pub struct JwksSettings {
|
||||
pub jwt_audience: Option<String>,
|
||||
}
|
||||
|
||||
/// Protocol used to connect to a Pageserver. Parsed from the connstring scheme.
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
pub enum PageserverProtocol {
|
||||
/// The original protocol based on libpq and COPY. Uses postgresql:// or postgres:// scheme.
|
||||
#[default]
|
||||
Libpq,
|
||||
/// A newer, gRPC-based protocol. Uses grpc:// scheme.
|
||||
Grpc,
|
||||
}
|
||||
|
||||
impl PageserverProtocol {
|
||||
/// Parses the protocol from a connstring scheme. Defaults to Libpq if no scheme is given.
|
||||
/// Errors if the connstring is an invalid URL.
|
||||
pub fn from_connstring(connstring: &str) -> anyhow::Result<Self> {
|
||||
let scheme = match Url::parse(connstring) {
|
||||
Ok(url) => url.scheme().to_lowercase(),
|
||||
Err(url::ParseError::RelativeUrlWithoutBase) => return Ok(Self::default()),
|
||||
Err(err) => return Err(anyhow!("invalid connstring URL: {err}")),
|
||||
};
|
||||
match scheme.as_str() {
|
||||
"postgresql" | "postgres" => Ok(Self::Libpq),
|
||||
"grpc" => Ok(Self::Grpc),
|
||||
scheme => Err(anyhow!("invalid protocol scheme: {scheme}")),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the URL scheme for the protocol, for use in connstrings.
|
||||
pub fn scheme(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Libpq => "postgresql",
|
||||
Self::Grpc => "grpc",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for PageserverProtocol {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(self.scheme())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::File;
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
"timestamp": "2021-05-23T18:25:43.511Z",
|
||||
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
|
||||
"suspend_timeout_seconds": 3600,
|
||||
|
||||
"cluster": {
|
||||
"cluster_id": "test-cluster-42",
|
||||
|
||||
@@ -71,7 +71,7 @@ impl Runtime {
|
||||
debug!("thread panicked: {:?}", e);
|
||||
let mut result = ctx.result.lock();
|
||||
if result.0 == -1 {
|
||||
*result = (256, format!("thread panicked: {:?}", e));
|
||||
*result = (256, format!("thread panicked: {e:?}"));
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -419,13 +419,13 @@ pub fn now() -> u64 {
|
||||
with_thread_context(|ctx| ctx.clock.get().unwrap().now())
|
||||
}
|
||||
|
||||
pub fn exit(code: i32, msg: String) {
|
||||
pub fn exit(code: i32, msg: String) -> ! {
|
||||
with_thread_context(|ctx| {
|
||||
ctx.allow_panic.store(true, Ordering::SeqCst);
|
||||
let mut result = ctx.result.lock();
|
||||
*result = (code, msg);
|
||||
panic!("exit");
|
||||
});
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn get_thread_ctx() -> Arc<ThreadContext> {
|
||||
|
||||
@@ -47,8 +47,8 @@ impl Debug for AnyMessage {
|
||||
match self {
|
||||
AnyMessage::None => write!(f, "None"),
|
||||
AnyMessage::InternalConnect => write!(f, "InternalConnect"),
|
||||
AnyMessage::Just32(v) => write!(f, "Just32({})", v),
|
||||
AnyMessage::ReplCell(v) => write!(f, "ReplCell({:?})", v),
|
||||
AnyMessage::Just32(v) => write!(f, "Just32({v})"),
|
||||
AnyMessage::ReplCell(v) => write!(f, "ReplCell({v:?})"),
|
||||
AnyMessage::Bytes(v) => write!(f, "Bytes({})", hex::encode(v)),
|
||||
AnyMessage::LSN(v) => write!(f, "LSN({})", Lsn(*v)),
|
||||
}
|
||||
|
||||
@@ -582,14 +582,14 @@ pub fn attach_openapi_ui(
|
||||
deepLinking: true,
|
||||
showExtensions: true,
|
||||
showCommonExtensions: true,
|
||||
url: "{}",
|
||||
url: "{spec_mount_path}",
|
||||
}})
|
||||
window.ui = ui;
|
||||
}};
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"#, spec_mount_path))).unwrap())
|
||||
"#))).unwrap())
|
||||
})
|
||||
)
|
||||
}
|
||||
@@ -696,7 +696,7 @@ mod tests {
|
||||
let remote_addr = SocketAddr::new(IpAddr::from_str("127.0.0.1").unwrap(), 80);
|
||||
let mut service = builder.build(remote_addr);
|
||||
if let Err(e) = poll_fn(|ctx| service.poll_ready(ctx)).await {
|
||||
panic!("request service is not ready: {:?}", e);
|
||||
panic!("request service is not ready: {e:?}");
|
||||
}
|
||||
|
||||
let mut req: Request<Body> = Request::default();
|
||||
@@ -716,7 +716,7 @@ mod tests {
|
||||
let remote_addr = SocketAddr::new(IpAddr::from_str("127.0.0.1").unwrap(), 80);
|
||||
let mut service = builder.build(remote_addr);
|
||||
if let Err(e) = poll_fn(|ctx| service.poll_ready(ctx)).await {
|
||||
panic!("request service is not ready: {:?}", e);
|
||||
panic!("request service is not ready: {e:?}");
|
||||
}
|
||||
|
||||
let req: Request<Body> = Request::default();
|
||||
|
||||
@@ -86,7 +86,7 @@ impl ShmemHandle {
|
||||
// somewhat smaller than that, because with anything close to that, you'll run out of
|
||||
// memory anyway.
|
||||
if max_size >= 1 << 48 {
|
||||
panic!("max size {} too large", max_size);
|
||||
panic!("max size {max_size} too large");
|
||||
}
|
||||
if initial_size > max_size {
|
||||
panic!("initial size {initial_size} larger than max size {max_size}");
|
||||
@@ -279,7 +279,7 @@ mod tests {
|
||||
fn assert_range(ptr: *const u8, expected: u8, range: Range<usize>) {
|
||||
for i in range {
|
||||
let b = unsafe { *(ptr.add(i)) };
|
||||
assert_eq!(expected, b, "unexpected byte at offset {}", i);
|
||||
assert_eq!(expected, b, "unexpected byte at offset {i}");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -17,7 +17,9 @@ anyhow.workspace = true
|
||||
bytes.workspace = true
|
||||
byteorder.workspace = true
|
||||
utils.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
postgres_ffi_types.workspace = true
|
||||
postgres_versioninfo.workspace = true
|
||||
posthog_client_lite.workspace = true
|
||||
enum-map.workspace = true
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
@@ -28,12 +30,13 @@ humantime-serde.workspace = true
|
||||
chrono = { workspace = true, features = ["serde"] }
|
||||
itertools.workspace = true
|
||||
storage_broker.workspace = true
|
||||
camino = {workspace = true, features = ["serde1"]}
|
||||
camino = { workspace = true, features = ["serde1"] }
|
||||
remote_storage.workspace = true
|
||||
postgres_backend.workspace = true
|
||||
nix = {workspace = true, optional = true}
|
||||
nix = { workspace = true, optional = true }
|
||||
reqwest.workspace = true
|
||||
rand.workspace = true
|
||||
tracing.workspace = true
|
||||
tracing-utils.workspace = true
|
||||
once_cell.workspace = true
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ use camino::Utf8PathBuf;
|
||||
mod tests;
|
||||
|
||||
use const_format::formatcp;
|
||||
use posthog_client_lite::PostHogClientConfig;
|
||||
pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
|
||||
pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
|
||||
pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
|
||||
@@ -12,6 +13,7 @@ pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LI
|
||||
pub const DEFAULT_GRPC_LISTEN_PORT: u16 = 51051; // storage-broker already uses 50051
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::num::{NonZeroU64, NonZeroUsize};
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
@@ -24,16 +26,17 @@ use utils::logging::LogFormat;
|
||||
use crate::models::{ImageCompressionAlgorithm, LsnLease};
|
||||
|
||||
// Certain metadata (e.g. externally-addressable name, AZ) is delivered
|
||||
// as a separate structure. This information is not neeed by the pageserver
|
||||
// as a separate structure. This information is not needed by the pageserver
|
||||
// itself, it is only used for registering the pageserver with the control
|
||||
// plane and/or storage controller.
|
||||
//
|
||||
#[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct NodeMetadata {
|
||||
#[serde(rename = "host")]
|
||||
pub postgres_host: String,
|
||||
#[serde(rename = "port")]
|
||||
pub postgres_port: u16,
|
||||
pub grpc_host: Option<String>,
|
||||
pub grpc_port: Option<u16>,
|
||||
pub http_host: String,
|
||||
pub http_port: u16,
|
||||
pub https_port: Option<u16>,
|
||||
@@ -44,19 +47,81 @@ pub struct NodeMetadata {
|
||||
pub other: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
/// PostHog integration config.
|
||||
impl Display for NodeMetadata {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"postgresql://{}:{} ",
|
||||
self.postgres_host, self.postgres_port
|
||||
)?;
|
||||
if let Some(grpc_host) = &self.grpc_host {
|
||||
let grpc_port = self.grpc_port.unwrap_or_default();
|
||||
write!(f, "grpc://{grpc_host}:{grpc_port} ")?;
|
||||
}
|
||||
write!(f, "http://{}:{} ", self.http_host, self.http_port)?;
|
||||
write!(f, "other:{:?}", self.other)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// PostHog integration config. This is used in pageserver, storcon, and neon_local.
|
||||
/// Ensure backward compatibility when adding new fields.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PostHogConfig {
|
||||
/// PostHog project ID
|
||||
pub project_id: String,
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub project_id: Option<String>,
|
||||
/// Server-side (private) API key
|
||||
pub server_api_key: String,
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub server_api_key: Option<String>,
|
||||
/// Client-side (public) API key
|
||||
pub client_api_key: String,
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub client_api_key: Option<String>,
|
||||
/// Private API URL
|
||||
pub private_api_url: String,
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub private_api_url: Option<String>,
|
||||
/// Public API URL
|
||||
pub public_api_url: String,
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub public_api_url: Option<String>,
|
||||
/// Refresh interval for the feature flag spec.
|
||||
/// The storcon will push the feature flag spec to the pageserver. If the pageserver does not receive
|
||||
/// the spec for `refresh_interval`, it will fetch the spec from the PostHog API.
|
||||
#[serde(default)]
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub refresh_interval: Option<Duration>,
|
||||
}
|
||||
|
||||
impl PostHogConfig {
|
||||
pub fn try_into_posthog_config(self) -> Result<PostHogClientConfig, &'static str> {
|
||||
let Some(project_id) = self.project_id else {
|
||||
return Err("project_id is required");
|
||||
};
|
||||
let Some(server_api_key) = self.server_api_key else {
|
||||
return Err("server_api_key is required");
|
||||
};
|
||||
let Some(client_api_key) = self.client_api_key else {
|
||||
return Err("client_api_key is required");
|
||||
};
|
||||
let Some(private_api_url) = self.private_api_url else {
|
||||
return Err("private_api_url is required");
|
||||
};
|
||||
let Some(public_api_url) = self.public_api_url else {
|
||||
return Err("public_api_url is required");
|
||||
};
|
||||
Ok(PostHogClientConfig {
|
||||
project_id,
|
||||
server_api_key,
|
||||
client_api_key,
|
||||
private_api_url,
|
||||
public_api_url,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// `pageserver.toml`
|
||||
@@ -337,17 +402,26 @@ pub struct TimelineImportConfig {
|
||||
pub struct BasebackupCacheConfig {
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub cleanup_period: Duration,
|
||||
// FIXME: Support max_size_bytes.
|
||||
// pub max_size_bytes: usize,
|
||||
pub max_size_entries: i64,
|
||||
/// Maximum total size of basebackup cache entries on disk in bytes.
|
||||
/// The cache may slightly exceed this limit because we do not know
|
||||
/// the exact size of the cache entry untill it's written to disk.
|
||||
pub max_total_size_bytes: u64,
|
||||
// TODO(diko): support max_entry_size_bytes.
|
||||
// pub max_entry_size_bytes: u64,
|
||||
pub max_size_entries: usize,
|
||||
/// Size of the channel used to send prepare requests to the basebackup cache worker.
|
||||
/// If exceeded, new prepare requests will be dropped.
|
||||
pub prepare_channel_size: usize,
|
||||
}
|
||||
|
||||
impl Default for BasebackupCacheConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
cleanup_period: Duration::from_secs(60),
|
||||
// max_size_bytes: 1024 * 1024 * 1024, // 1 GiB
|
||||
max_size_entries: 1000,
|
||||
max_total_size_bytes: 1024 * 1024 * 1024, // 1 GiB
|
||||
// max_entry_size_bytes: 16 * 1024 * 1024, // 16 MiB
|
||||
max_size_entries: 10000,
|
||||
prepare_channel_size: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -792,7 +866,7 @@ pub mod tenant_conf_defaults {
|
||||
// By default ingest enough WAL for two new L0 layers before checking if new image
|
||||
// image layers should be created.
|
||||
pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
|
||||
pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
|
||||
pub const DEFAULT_GC_COMPACTION_ENABLED: bool = true;
|
||||
pub const DEFAULT_GC_COMPACTION_VERIFICATION: bool = true;
|
||||
pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
|
||||
pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
|
||||
|
||||
@@ -14,6 +14,8 @@ fn test_node_metadata_v1_backward_compatibilty() {
|
||||
NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: 23,
|
||||
grpc_host: None,
|
||||
grpc_port: None,
|
||||
http_host: "localhost".to_string(),
|
||||
http_port: 42,
|
||||
https_port: None,
|
||||
@@ -37,6 +39,35 @@ fn test_node_metadata_v2_backward_compatibilty() {
|
||||
NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: 23,
|
||||
grpc_host: None,
|
||||
grpc_port: None,
|
||||
http_host: "localhost".to_string(),
|
||||
http_port: 42,
|
||||
https_port: Some(123),
|
||||
other: HashMap::new(),
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_metadata_v3_backward_compatibilty() {
|
||||
let v3 = serde_json::to_vec(&serde_json::json!({
|
||||
"host": "localhost",
|
||||
"port": 23,
|
||||
"grpc_host": "localhost",
|
||||
"grpc_port": 51,
|
||||
"http_host": "localhost",
|
||||
"http_port": 42,
|
||||
"https_port": 123,
|
||||
}));
|
||||
|
||||
assert_eq!(
|
||||
serde_json::from_slice::<NodeMetadata>(&v3.unwrap()).unwrap(),
|
||||
NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: 23,
|
||||
grpc_host: Some("localhost".to_string()),
|
||||
grpc_port: Some(51),
|
||||
http_host: "localhost".to_string(),
|
||||
http_port: 42,
|
||||
https_port: Some(123),
|
||||
|
||||
@@ -52,6 +52,8 @@ pub struct NodeRegisterRequest {
|
||||
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_pg_port: u16,
|
||||
pub listen_grpc_addr: Option<String>,
|
||||
pub listen_grpc_port: Option<u16>,
|
||||
|
||||
pub listen_http_addr: String,
|
||||
pub listen_http_port: u16,
|
||||
@@ -101,6 +103,8 @@ pub struct TenantLocateResponseShard {
|
||||
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_pg_port: u16,
|
||||
pub listen_grpc_addr: Option<String>,
|
||||
pub listen_grpc_port: Option<u16>,
|
||||
|
||||
pub listen_http_addr: String,
|
||||
pub listen_http_port: u16,
|
||||
@@ -152,6 +156,8 @@ pub struct NodeDescribeResponse {
|
||||
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_pg_port: u16,
|
||||
pub listen_grpc_addr: Option<String>,
|
||||
pub listen_grpc_port: Option<u16>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@@ -540,6 +546,11 @@ pub struct TimelineImportRequest {
|
||||
pub sk_set: Vec<NodeId>,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, serde::Deserialize, Clone)]
|
||||
pub struct TimelineSafekeeperMigrateRequest {
|
||||
pub new_sk_set: Vec<NodeId>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use serde_json;
|
||||
@@ -571,8 +582,7 @@ mod test {
|
||||
let err = serde_json::from_value::<TenantCreateRequest>(create_request).unwrap_err();
|
||||
assert!(
|
||||
err.to_string().contains("unknown field `unknown_field`"),
|
||||
"expect unknown field `unknown_field` error, got: {}",
|
||||
err
|
||||
"expect unknown field `unknown_field` error, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@ use std::ops::Range;
|
||||
use anyhow::{Result, bail};
|
||||
use byteorder::{BE, ByteOrder};
|
||||
use bytes::Bytes;
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::{Oid, RepOriginId};
|
||||
use postgres_ffi_types::forknum::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi_types::{Oid, RepOriginId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::const_assert;
|
||||
|
||||
@@ -194,7 +194,7 @@ impl Key {
|
||||
/// will be rejected on the write path.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_valid_key_on_write_path_strong(&self) -> bool {
|
||||
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
||||
use postgres_ffi_types::constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
||||
if !self.is_i128_representable() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::ops::Range;
|
||||
|
||||
use itertools::Itertools;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
use crate::key::Key;
|
||||
use crate::shard::{ShardCount, ShardIdentity};
|
||||
@@ -269,9 +268,13 @@ impl KeySpace {
|
||||
/// Partition a key space into roughly chunks of roughly 'target_size' bytes
|
||||
/// in each partition.
|
||||
///
|
||||
pub fn partition(&self, shard_identity: &ShardIdentity, target_size: u64) -> KeyPartitioning {
|
||||
// Assume that each value is 8k in size.
|
||||
let target_nblocks = (target_size / BLCKSZ as u64) as u32;
|
||||
pub fn partition(
|
||||
&self,
|
||||
shard_identity: &ShardIdentity,
|
||||
target_size: u64,
|
||||
block_size: u64,
|
||||
) -> KeyPartitioning {
|
||||
let target_nblocks = (target_size / block_size) as u32;
|
||||
|
||||
let mut parts = Vec::new();
|
||||
let mut current_part = Vec::new();
|
||||
@@ -331,8 +334,7 @@ impl KeySpace {
|
||||
std::cmp::max(range.start, prev.start) < std::cmp::min(range.end, prev.end);
|
||||
assert!(
|
||||
!overlap,
|
||||
"Attempt to merge ovelapping keyspaces: {:?} overlaps {:?}",
|
||||
prev, range
|
||||
"Attempt to merge ovelapping keyspaces: {prev:?} overlaps {range:?}"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1101,7 +1103,7 @@ mod tests {
|
||||
// total range contains at least one shard-local page
|
||||
let all_nonzero = fragments.iter().all(|f| f.0 > 0);
|
||||
if !all_nonzero {
|
||||
eprintln!("Found a zero-length fragment: {:?}", fragments);
|
||||
eprintln!("Found a zero-length fragment: {fragments:?}");
|
||||
}
|
||||
assert!(all_nonzero);
|
||||
} else {
|
||||
|
||||
@@ -5,11 +5,10 @@ pub mod controller_api;
|
||||
pub mod key;
|
||||
pub mod keyspace;
|
||||
pub mod models;
|
||||
pub mod record;
|
||||
pub mod pagestream_api;
|
||||
pub mod reltag;
|
||||
pub mod shard;
|
||||
/// Public API types
|
||||
pub mod upcall_api;
|
||||
pub mod value;
|
||||
|
||||
pub mod config;
|
||||
|
||||
@@ -5,16 +5,13 @@ pub mod utilization;
|
||||
use core::ops::Range;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::io::{BufRead, Read};
|
||||
use std::num::{NonZeroU32, NonZeroU64, NonZeroUsize};
|
||||
use std::str::FromStr;
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use byteorder::{BigEndian, ReadBytesExt};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
#[cfg(feature = "testing")]
|
||||
use camino::Utf8PathBuf;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use postgres_versioninfo::PgMajorVersion;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_with::serde_as;
|
||||
pub use utilization::PageserverUtilization;
|
||||
@@ -24,8 +21,9 @@ use utils::{completion, serde_system_time};
|
||||
|
||||
use crate::config::Ratio;
|
||||
use crate::key::{CompactKey, Key};
|
||||
use crate::reltag::RelTag;
|
||||
use crate::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize, TenantShardId};
|
||||
use crate::shard::{
|
||||
DEFAULT_STRIPE_SIZE, ShardCount, ShardIdentity, ShardStripeSize, TenantShardId,
|
||||
};
|
||||
|
||||
/// The state of a tenant in this pageserver.
|
||||
///
|
||||
@@ -403,7 +401,7 @@ pub enum TimelineCreateRequestMode {
|
||||
// inherits the ancestor's pg_version. Earlier code wasn't
|
||||
// using a flattened enum, so, it was an accepted field, and
|
||||
// we continue to accept it by having it here.
|
||||
pg_version: Option<u32>,
|
||||
pg_version: Option<PgMajorVersion>,
|
||||
#[serde(default, skip_serializing_if = "std::ops::Not::not")]
|
||||
read_only: bool,
|
||||
},
|
||||
@@ -415,7 +413,7 @@ pub enum TimelineCreateRequestMode {
|
||||
Bootstrap {
|
||||
#[serde(default)]
|
||||
existing_initdb_timeline_id: Option<TimelineId>,
|
||||
pg_version: Option<u32>,
|
||||
pg_version: Option<PgMajorVersion>,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -479,7 +477,7 @@ pub struct TenantShardSplitResponse {
|
||||
}
|
||||
|
||||
/// Parameters that apply to all shards in a tenant. Used during tenant creation.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
#[derive(Clone, Copy, Serialize, Deserialize, Debug)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
pub struct ShardParameters {
|
||||
pub count: ShardCount,
|
||||
@@ -501,6 +499,15 @@ impl Default for ShardParameters {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ShardIdentity> for ShardParameters {
|
||||
fn from(identity: ShardIdentity) -> Self {
|
||||
Self {
|
||||
count: identity.count,
|
||||
stripe_size: identity.stripe_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Eq, PartialEq)]
|
||||
pub enum FieldPatch<T> {
|
||||
Upsert(T),
|
||||
@@ -1187,7 +1194,7 @@ impl Display for ImageCompressionAlgorithm {
|
||||
ImageCompressionAlgorithm::Disabled => write!(f, "disabled"),
|
||||
ImageCompressionAlgorithm::Zstd { level } => {
|
||||
if let Some(level) = level {
|
||||
write!(f, "zstd({})", level)
|
||||
write!(f, "zstd({level})")
|
||||
} else {
|
||||
write!(f, "zstd")
|
||||
}
|
||||
@@ -1578,7 +1585,7 @@ pub struct TimelineInfo {
|
||||
pub last_received_msg_lsn: Option<Lsn>,
|
||||
/// the timestamp (in microseconds) of the last received message
|
||||
pub last_received_msg_ts: Option<u128>,
|
||||
pub pg_version: u32,
|
||||
pub pg_version: PgMajorVersion,
|
||||
|
||||
pub state: TimelineState,
|
||||
|
||||
@@ -1907,219 +1914,6 @@ pub struct ScanDisposableKeysResponse {
|
||||
pub not_disposable_count: usize,
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
pub enum PagestreamFeMessage {
|
||||
Exists(PagestreamExistsRequest),
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentRequest),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestRequest),
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(Debug, strum_macros::EnumProperty)]
|
||||
pub enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
GetPage(PagestreamGetPageResponse),
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentResponse),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestResponse),
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
#[repr(u8)]
|
||||
enum PagestreamFeMessageTag {
|
||||
Exists = 0,
|
||||
Nblocks = 1,
|
||||
GetPage = 2,
|
||||
DbSize = 3,
|
||||
GetSlruSegment = 4,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 99,
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
#[repr(u8)]
|
||||
enum PagestreamBeMessageTag {
|
||||
Exists = 100,
|
||||
Nblocks = 101,
|
||||
GetPage = 102,
|
||||
Error = 103,
|
||||
DbSize = 104,
|
||||
GetSlruSegment = 105,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 199,
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamFeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
match value {
|
||||
0 => Ok(PagestreamFeMessageTag::Exists),
|
||||
1 => Ok(PagestreamFeMessageTag::Nblocks),
|
||||
2 => Ok(PagestreamFeMessageTag::GetPage),
|
||||
3 => Ok(PagestreamFeMessageTag::DbSize),
|
||||
4 => Ok(PagestreamFeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
99 => Ok(PagestreamFeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
match value {
|
||||
100 => Ok(PagestreamBeMessageTag::Exists),
|
||||
101 => Ok(PagestreamBeMessageTag::Nblocks),
|
||||
102 => Ok(PagestreamBeMessageTag::GetPage),
|
||||
103 => Ok(PagestreamBeMessageTag::Error),
|
||||
104 => Ok(PagestreamBeMessageTag::DbSize),
|
||||
105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
199 => Ok(PagestreamBeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A GetPage request contains two LSN values:
|
||||
//
|
||||
// request_lsn: Get the page version at this point in time. Lsn::Max is a special value that means
|
||||
// "get the latest version present". It's used by the primary server, which knows that no one else
|
||||
// is writing WAL. 'not_modified_since' must be set to a proper value even if request_lsn is
|
||||
// Lsn::Max. Standby servers use the current replay LSN as the request LSN.
|
||||
//
|
||||
// not_modified_since: Hint to the pageserver that the client knows that the page has not been
|
||||
// modified between 'not_modified_since' and the request LSN. It's always correct to set
|
||||
// 'not_modified_since equal' to 'request_lsn' (unless Lsn::Max is used as the 'request_lsn'), but
|
||||
// passing an earlier LSN can speed up the request, by allowing the pageserver to process the
|
||||
// request without waiting for 'request_lsn' to arrive.
|
||||
//
|
||||
// The now-defunct V1 interface contained only one LSN, and a boolean 'latest' flag. The V1 interface was
|
||||
// sufficient for the primary; the 'lsn' was equivalent to the 'not_modified_since' value, and
|
||||
// 'latest' was set to true. The V2 interface was added because there was no correct way for a
|
||||
// standby to request a page at a particular non-latest LSN, and also include the
|
||||
// 'not_modified_since' hint. That led to an awkward choice of either using an old LSN in the
|
||||
// request, if the standby knows that the page hasn't been modified since, and risk getting an error
|
||||
// if that LSN has fallen behind the GC horizon, or requesting the current replay LSN, which could
|
||||
// require the pageserver unnecessarily to wait for the WAL to arrive up to that point. The new V2
|
||||
// interface allows sending both LSNs, and let the pageserver do the right thing. There was no
|
||||
// difference in the responses between V1 and V2.
|
||||
//
|
||||
// V3 version of protocol adds request ID to all requests. This request ID is also included in response
|
||||
// as well as other fields from requests, which allows to verify that we receive response for our request.
|
||||
// We copy fields from request to response to make checking more reliable: request ID is formed from process ID
|
||||
// and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
|
||||
//
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum PagestreamProtocolVersion {
|
||||
V2,
|
||||
V3,
|
||||
}
|
||||
|
||||
pub type RequestId = u64;
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamRequest {
|
||||
pub reqid: RequestId,
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamExistsRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamNblocksRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetPageRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
pub blkno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamDbSizeRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetSlruSegmentRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub kind: u8,
|
||||
pub segno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsResponse {
|
||||
pub req: PagestreamExistsRequest,
|
||||
pub exists: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksResponse {
|
||||
pub req: PagestreamNblocksRequest,
|
||||
pub n_blocks: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageResponse {
|
||||
pub req: PagestreamGetPageRequest,
|
||||
pub page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetSlruSegmentResponse {
|
||||
pub req: PagestreamGetSlruSegmentRequest,
|
||||
pub segment: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamErrorResponse {
|
||||
pub req: PagestreamRequest,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeResponse {
|
||||
pub req: PagestreamDbSizeRequest,
|
||||
pub db_size: i64,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct PagestreamTestRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub batch_key: u64,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamTestResponse {
|
||||
pub req: PagestreamTestRequest,
|
||||
}
|
||||
|
||||
// This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
|
||||
// that require pageserver-internal types. It is sufficient to get the total size.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@@ -2131,506 +1925,6 @@ pub struct TenantHistorySize {
|
||||
pub size: Option<u64>,
|
||||
}
|
||||
|
||||
impl PagestreamFeMessage {
|
||||
/// Serialize a compute -> pageserver message. This is currently only used in testing
|
||||
/// tools. Always uses protocol version 3.
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
match self {
|
||||
Self::Exists(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Exists as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
}
|
||||
|
||||
Self::Nblocks(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Nblocks as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
}
|
||||
|
||||
Self::GetPage(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetPage as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
bytes.put_u32(req.blkno);
|
||||
}
|
||||
|
||||
Self::DbSize(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::DbSize as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.dbnode);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetSlruSegment as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(req.kind);
|
||||
bytes.put_u32(req.segno);
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Test as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(req.batch_key);
|
||||
let message = req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn parse<R: std::io::Read>(
|
||||
body: &mut R,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
) -> anyhow::Result<PagestreamFeMessage> {
|
||||
// these correspond to the NeonMessageTag enum in pagestore_client.h
|
||||
//
|
||||
// TODO: consider using protobuf or serde bincode for less error prone
|
||||
// serialization.
|
||||
let msg_tag = body.read_u8()?;
|
||||
let (reqid, request_lsn, not_modified_since) = match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => (
|
||||
0,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
PagestreamProtocolVersion::V3 => (
|
||||
body.read_u64::<BigEndian>()?,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
};
|
||||
|
||||
match PagestreamFeMessageTag::try_from(msg_tag)
|
||||
.map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))?
|
||||
{
|
||||
PagestreamFeMessageTag::Exists => {
|
||||
Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::Nblocks => {
|
||||
Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetPage => {
|
||||
Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
blkno: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::DbSize => {
|
||||
Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetSlruSegment => Ok(PagestreamFeMessage::GetSlruSegment(
|
||||
PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind: body.read_u8()?,
|
||||
segno: body.read_u32::<BigEndian>()?,
|
||||
},
|
||||
)),
|
||||
#[cfg(feature = "testing")]
|
||||
PagestreamFeMessageTag::Test => Ok(PagestreamFeMessage::Test(PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key: body.read_u64::<BigEndian>()?,
|
||||
message: {
|
||||
let len = body.read_u64::<BigEndian>()?;
|
||||
let mut buf = vec![0; len as usize];
|
||||
body.read_exact(&mut buf)?;
|
||||
String::from_utf8(buf)?
|
||||
},
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PagestreamBeMessage {
|
||||
pub fn serialize(&self, protocol_version: PagestreamProtocolVersion) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
PagestreamProtocolVersion::V3 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.req.blkno);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put_u64(resp.req.reqid);
|
||||
bytes.put_u64(resp.req.request_lsn.0);
|
||||
bytes.put_u64(resp.req.not_modified_since.0);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.dbnode);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(resp.req.kind);
|
||||
bytes.put_u32(resp.req.segno);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
|
||||
let mut buf = buf.reader();
|
||||
let msg_tag = buf.read_u8()?;
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
let ok =
|
||||
match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
|
||||
Tag::Exists => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let exists = buf.read_u8()? != 0;
|
||||
Self::Exists(PagestreamExistsResponse {
|
||||
req: PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
exists,
|
||||
})
|
||||
}
|
||||
Tag::Nblocks => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
Self::Nblocks(PagestreamNblocksResponse {
|
||||
req: PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
n_blocks,
|
||||
})
|
||||
}
|
||||
Tag::GetPage => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let blkno = buf.read_u32::<BigEndian>()?;
|
||||
let mut page = vec![0; 8192]; // TODO: use MaybeUninit
|
||||
buf.read_exact(&mut page)?;
|
||||
Self::GetPage(PagestreamGetPageResponse {
|
||||
req: PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
blkno,
|
||||
},
|
||||
page: page.into(),
|
||||
})
|
||||
}
|
||||
Tag::Error => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let mut msg = Vec::new();
|
||||
buf.read_until(0, &mut msg)?;
|
||||
let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
|
||||
let rust_str = cstring.to_str()?;
|
||||
Self::Error(PagestreamErrorResponse {
|
||||
req: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
message: rust_str.to_owned(),
|
||||
})
|
||||
}
|
||||
Tag::DbSize => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let dbnode = buf.read_u32::<BigEndian>()?;
|
||||
let db_size = buf.read_i64::<BigEndian>()?;
|
||||
Self::DbSize(PagestreamDbSizeResponse {
|
||||
req: PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode,
|
||||
},
|
||||
db_size,
|
||||
})
|
||||
}
|
||||
Tag::GetSlruSegment => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let kind = buf.read_u8()?;
|
||||
let segno = buf.read_u32::<BigEndian>()?;
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
let mut segment = vec![0; n_blocks as usize * BLCKSZ as usize];
|
||||
buf.read_exact(&mut segment)?;
|
||||
Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
|
||||
req: PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind,
|
||||
segno,
|
||||
},
|
||||
segment: segment.into(),
|
||||
})
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Tag::Test => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let batch_key = buf.read_u64::<BigEndian>()?;
|
||||
let len = buf.read_u64::<BigEndian>()?;
|
||||
let mut msg = vec![0; len as usize];
|
||||
buf.read_exact(&mut msg)?;
|
||||
let message = String::from_utf8(msg)?;
|
||||
Self::Test(PagestreamTestResponse {
|
||||
req: PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key,
|
||||
message,
|
||||
},
|
||||
})
|
||||
}
|
||||
};
|
||||
let remaining = buf.into_inner();
|
||||
if !remaining.is_empty() {
|
||||
anyhow::bail!(
|
||||
"remaining bytes in msg with tag={msg_tag}: {}",
|
||||
remaining.len()
|
||||
);
|
||||
}
|
||||
Ok(ok)
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Exists(_) => "Exists",
|
||||
Self::Nblocks(_) => "Nblocks",
|
||||
Self::GetPage(_) => "GetPage",
|
||||
Self::Error(_) => "Error",
|
||||
Self::DbSize(_) => "DbSize",
|
||||
Self::GetSlruSegment(_) => "GetSlruSegment",
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(_) => "Test",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PageTraceEvent {
|
||||
pub key: CompactKey,
|
||||
@@ -2656,68 +1950,6 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_pagestream() {
|
||||
// Test serialization/deserialization of PagestreamFeMessage
|
||||
let messages = vec![
|
||||
PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(4),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
blkno: 7,
|
||||
}),
|
||||
PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
dbnode: 7,
|
||||
}),
|
||||
];
|
||||
for msg in messages {
|
||||
let bytes = msg.serialize();
|
||||
let reconstructed =
|
||||
PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V3)
|
||||
.unwrap();
|
||||
assert!(msg == reconstructed);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tenantinfo_serde() {
|
||||
// Test serialization/deserialization of TenantInfo
|
||||
@@ -2791,8 +2023,7 @@ mod tests {
|
||||
let err = serde_json::from_value::<TenantConfigRequest>(config_request).unwrap_err();
|
||||
assert!(
|
||||
err.to_string().contains("unknown field `unknown_field`"),
|
||||
"expect unknown field `unknown_field` error, got: {}",
|
||||
err
|
||||
"expect unknown field `unknown_field` error, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
798
libs/pageserver_api/src/pagestream_api.rs
Normal file
798
libs/pageserver_api/src/pagestream_api.rs
Normal file
@@ -0,0 +1,798 @@
|
||||
//! Rust definitions of the libpq-based pagestream API
|
||||
//!
|
||||
//! See also the C implementation of the same API in pgxn/neon/pagestore_client.h
|
||||
|
||||
use std::io::{BufRead, Read};
|
||||
|
||||
use crate::reltag::RelTag;
|
||||
|
||||
use byteorder::{BigEndian, ReadBytesExt};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
/// Block size.
|
||||
///
|
||||
/// XXX: We assume 8k block size in the SLRU fetch API. It's not great to hardcode
|
||||
/// that in the protocol, because Postgres supports different block sizes as a compile
|
||||
/// time option.
|
||||
const BLCKSZ: usize = 8192;
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
pub enum PagestreamFeMessage {
|
||||
Exists(PagestreamExistsRequest),
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentRequest),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestRequest),
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(Debug, strum_macros::EnumProperty)]
|
||||
pub enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
GetPage(PagestreamGetPageResponse),
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentResponse),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestResponse),
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
#[repr(u8)]
|
||||
enum PagestreamFeMessageTag {
|
||||
Exists = 0,
|
||||
Nblocks = 1,
|
||||
GetPage = 2,
|
||||
DbSize = 3,
|
||||
GetSlruSegment = 4,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 99,
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
#[repr(u8)]
|
||||
enum PagestreamBeMessageTag {
|
||||
Exists = 100,
|
||||
Nblocks = 101,
|
||||
GetPage = 102,
|
||||
Error = 103,
|
||||
DbSize = 104,
|
||||
GetSlruSegment = 105,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 199,
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamFeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
match value {
|
||||
0 => Ok(PagestreamFeMessageTag::Exists),
|
||||
1 => Ok(PagestreamFeMessageTag::Nblocks),
|
||||
2 => Ok(PagestreamFeMessageTag::GetPage),
|
||||
3 => Ok(PagestreamFeMessageTag::DbSize),
|
||||
4 => Ok(PagestreamFeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
99 => Ok(PagestreamFeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
match value {
|
||||
100 => Ok(PagestreamBeMessageTag::Exists),
|
||||
101 => Ok(PagestreamBeMessageTag::Nblocks),
|
||||
102 => Ok(PagestreamBeMessageTag::GetPage),
|
||||
103 => Ok(PagestreamBeMessageTag::Error),
|
||||
104 => Ok(PagestreamBeMessageTag::DbSize),
|
||||
105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
199 => Ok(PagestreamBeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A GetPage request contains two LSN values:
|
||||
//
|
||||
// request_lsn: Get the page version at this point in time. Lsn::Max is a special value that means
|
||||
// "get the latest version present". It's used by the primary server, which knows that no one else
|
||||
// is writing WAL. 'not_modified_since' must be set to a proper value even if request_lsn is
|
||||
// Lsn::Max. Standby servers use the current replay LSN as the request LSN.
|
||||
//
|
||||
// not_modified_since: Hint to the pageserver that the client knows that the page has not been
|
||||
// modified between 'not_modified_since' and the request LSN. It's always correct to set
|
||||
// 'not_modified_since equal' to 'request_lsn' (unless Lsn::Max is used as the 'request_lsn'), but
|
||||
// passing an earlier LSN can speed up the request, by allowing the pageserver to process the
|
||||
// request without waiting for 'request_lsn' to arrive.
|
||||
//
|
||||
// The now-defunct V1 interface contained only one LSN, and a boolean 'latest' flag. The V1 interface was
|
||||
// sufficient for the primary; the 'lsn' was equivalent to the 'not_modified_since' value, and
|
||||
// 'latest' was set to true. The V2 interface was added because there was no correct way for a
|
||||
// standby to request a page at a particular non-latest LSN, and also include the
|
||||
// 'not_modified_since' hint. That led to an awkward choice of either using an old LSN in the
|
||||
// request, if the standby knows that the page hasn't been modified since, and risk getting an error
|
||||
// if that LSN has fallen behind the GC horizon, or requesting the current replay LSN, which could
|
||||
// require the pageserver unnecessarily to wait for the WAL to arrive up to that point. The new V2
|
||||
// interface allows sending both LSNs, and let the pageserver do the right thing. There was no
|
||||
// difference in the responses between V1 and V2.
|
||||
//
|
||||
// V3 version of protocol adds request ID to all requests. This request ID is also included in response
|
||||
// as well as other fields from requests, which allows to verify that we receive response for our request.
|
||||
// We copy fields from request to response to make checking more reliable: request ID is formed from process ID
|
||||
// and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
|
||||
//
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum PagestreamProtocolVersion {
|
||||
V2,
|
||||
V3,
|
||||
}
|
||||
|
||||
pub type RequestId = u64;
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamRequest {
|
||||
pub reqid: RequestId,
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamExistsRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamNblocksRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetPageRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
pub blkno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamDbSizeRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetSlruSegmentRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub kind: u8,
|
||||
pub segno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsResponse {
|
||||
pub req: PagestreamExistsRequest,
|
||||
pub exists: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksResponse {
|
||||
pub req: PagestreamNblocksRequest,
|
||||
pub n_blocks: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageResponse {
|
||||
pub req: PagestreamGetPageRequest,
|
||||
pub page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetSlruSegmentResponse {
|
||||
pub req: PagestreamGetSlruSegmentRequest,
|
||||
pub segment: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamErrorResponse {
|
||||
pub req: PagestreamRequest,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeResponse {
|
||||
pub req: PagestreamDbSizeRequest,
|
||||
pub db_size: i64,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct PagestreamTestRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub batch_key: u64,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamTestResponse {
|
||||
pub req: PagestreamTestRequest,
|
||||
}
|
||||
|
||||
impl PagestreamFeMessage {
|
||||
/// Serialize a compute -> pageserver message. This is currently only used in testing
|
||||
/// tools. Always uses protocol version 3.
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
match self {
|
||||
Self::Exists(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Exists as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
}
|
||||
|
||||
Self::Nblocks(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Nblocks as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
}
|
||||
|
||||
Self::GetPage(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetPage as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
bytes.put_u32(req.blkno);
|
||||
}
|
||||
|
||||
Self::DbSize(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::DbSize as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.dbnode);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetSlruSegment as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(req.kind);
|
||||
bytes.put_u32(req.segno);
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Test as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(req.batch_key);
|
||||
let message = req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn parse<R: std::io::Read>(
|
||||
body: &mut R,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
) -> anyhow::Result<PagestreamFeMessage> {
|
||||
// these correspond to the NeonMessageTag enum in pagestore_client.h
|
||||
//
|
||||
// TODO: consider using protobuf or serde bincode for less error prone
|
||||
// serialization.
|
||||
let msg_tag = body.read_u8()?;
|
||||
let (reqid, request_lsn, not_modified_since) = match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => (
|
||||
0,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
PagestreamProtocolVersion::V3 => (
|
||||
body.read_u64::<BigEndian>()?,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
};
|
||||
|
||||
match PagestreamFeMessageTag::try_from(msg_tag)
|
||||
.map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))?
|
||||
{
|
||||
PagestreamFeMessageTag::Exists => {
|
||||
Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::Nblocks => {
|
||||
Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetPage => {
|
||||
Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
blkno: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::DbSize => {
|
||||
Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetSlruSegment => Ok(PagestreamFeMessage::GetSlruSegment(
|
||||
PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind: body.read_u8()?,
|
||||
segno: body.read_u32::<BigEndian>()?,
|
||||
},
|
||||
)),
|
||||
#[cfg(feature = "testing")]
|
||||
PagestreamFeMessageTag::Test => Ok(PagestreamFeMessage::Test(PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key: body.read_u64::<BigEndian>()?,
|
||||
message: {
|
||||
let len = body.read_u64::<BigEndian>()?;
|
||||
let mut buf = vec![0; len as usize];
|
||||
body.read_exact(&mut buf)?;
|
||||
String::from_utf8(buf)?
|
||||
},
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PagestreamBeMessage {
|
||||
pub fn serialize(&self, protocol_version: PagestreamProtocolVersion) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
PagestreamProtocolVersion::V3 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.req.blkno);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put_u64(resp.req.reqid);
|
||||
bytes.put_u64(resp.req.request_lsn.0);
|
||||
bytes.put_u64(resp.req.not_modified_since.0);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.dbnode);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(resp.req.kind);
|
||||
bytes.put_u32(resp.req.segno);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
|
||||
let mut buf = buf.reader();
|
||||
let msg_tag = buf.read_u8()?;
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
let ok =
|
||||
match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
|
||||
Tag::Exists => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let exists = buf.read_u8()? != 0;
|
||||
Self::Exists(PagestreamExistsResponse {
|
||||
req: PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
exists,
|
||||
})
|
||||
}
|
||||
Tag::Nblocks => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
Self::Nblocks(PagestreamNblocksResponse {
|
||||
req: PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
n_blocks,
|
||||
})
|
||||
}
|
||||
Tag::GetPage => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let blkno = buf.read_u32::<BigEndian>()?;
|
||||
let mut page = vec![0; 8192]; // TODO: use MaybeUninit
|
||||
buf.read_exact(&mut page)?;
|
||||
Self::GetPage(PagestreamGetPageResponse {
|
||||
req: PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
blkno,
|
||||
},
|
||||
page: page.into(),
|
||||
})
|
||||
}
|
||||
Tag::Error => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let mut msg = Vec::new();
|
||||
buf.read_until(0, &mut msg)?;
|
||||
let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
|
||||
let rust_str = cstring.to_str()?;
|
||||
Self::Error(PagestreamErrorResponse {
|
||||
req: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
message: rust_str.to_owned(),
|
||||
})
|
||||
}
|
||||
Tag::DbSize => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let dbnode = buf.read_u32::<BigEndian>()?;
|
||||
let db_size = buf.read_i64::<BigEndian>()?;
|
||||
Self::DbSize(PagestreamDbSizeResponse {
|
||||
req: PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode,
|
||||
},
|
||||
db_size,
|
||||
})
|
||||
}
|
||||
Tag::GetSlruSegment => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let kind = buf.read_u8()?;
|
||||
let segno = buf.read_u32::<BigEndian>()?;
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
let mut segment = vec![0; n_blocks as usize * BLCKSZ];
|
||||
buf.read_exact(&mut segment)?;
|
||||
Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
|
||||
req: PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind,
|
||||
segno,
|
||||
},
|
||||
segment: segment.into(),
|
||||
})
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Tag::Test => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let batch_key = buf.read_u64::<BigEndian>()?;
|
||||
let len = buf.read_u64::<BigEndian>()?;
|
||||
let mut msg = vec![0; len as usize];
|
||||
buf.read_exact(&mut msg)?;
|
||||
let message = String::from_utf8(msg)?;
|
||||
Self::Test(PagestreamTestResponse {
|
||||
req: PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key,
|
||||
message,
|
||||
},
|
||||
})
|
||||
}
|
||||
};
|
||||
let remaining = buf.into_inner();
|
||||
if !remaining.is_empty() {
|
||||
anyhow::bail!(
|
||||
"remaining bytes in msg with tag={msg_tag}: {}",
|
||||
remaining.len()
|
||||
);
|
||||
}
|
||||
Ok(ok)
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Exists(_) => "Exists",
|
||||
Self::Nblocks(_) => "Nblocks",
|
||||
Self::GetPage(_) => "GetPage",
|
||||
Self::Error(_) => "Error",
|
||||
Self::DbSize(_) => "DbSize",
|
||||
Self::GetSlruSegment(_) => "GetSlruSegment",
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(_) => "Test",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_pagestream() {
|
||||
// Test serialization/deserialization of PagestreamFeMessage
|
||||
let messages = vec![
|
||||
PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(4),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
blkno: 7,
|
||||
}),
|
||||
PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
dbnode: 7,
|
||||
}),
|
||||
];
|
||||
for msg in messages {
|
||||
let bytes = msg.serialize();
|
||||
let reconstructed =
|
||||
PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V3)
|
||||
.unwrap();
|
||||
assert!(msg == reconstructed);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
|
||||
use postgres_ffi::Oid;
|
||||
use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
|
||||
use postgres_ffi::relfile_utils::{MAIN_FORKNUM, forkname_to_number, forknumber_to_name};
|
||||
use postgres_ffi_types::Oid;
|
||||
use postgres_ffi_types::constants::GLOBALTABLESPACE_OID;
|
||||
use postgres_ffi_types::forknum::{MAIN_FORKNUM, forkname_to_number, forknumber_to_name};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
///
|
||||
|
||||
@@ -35,8 +35,9 @@ use std::hash::{Hash, Hasher};
|
||||
|
||||
#[doc(inline)]
|
||||
pub use ::utils::shard::*;
|
||||
use postgres_ffi::relfile_utils::INIT_FORKNUM;
|
||||
use postgres_ffi_types::forknum::INIT_FORKNUM;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::critical;
|
||||
|
||||
use crate::key::Key;
|
||||
use crate::models::ShardParameters;
|
||||
@@ -179,7 +180,7 @@ impl ShardIdentity {
|
||||
|
||||
/// For use when creating ShardIdentity instances for new shards, where a creation request
|
||||
/// specifies the ShardParameters that apply to all shards.
|
||||
pub fn from_params(number: ShardNumber, params: &ShardParameters) -> Self {
|
||||
pub fn from_params(number: ShardNumber, params: ShardParameters) -> Self {
|
||||
Self {
|
||||
number,
|
||||
count: params.count,
|
||||
@@ -188,6 +189,17 @@ impl ShardIdentity {
|
||||
}
|
||||
}
|
||||
|
||||
/// Asserts that the given shard identities are equal. Changes to shard parameters will likely
|
||||
/// result in data corruption.
|
||||
pub fn assert_equal(&self, other: ShardIdentity) {
|
||||
if self != &other {
|
||||
// TODO: for now, we're conservative and just log errors in production. Turn this into a
|
||||
// real assertion when we're confident it doesn't misfire, and also reject requests that
|
||||
// attempt to change it with an error response.
|
||||
critical!("shard identity mismatch: {self:?} != {other:?}");
|
||||
}
|
||||
}
|
||||
|
||||
fn is_broken(&self) -> bool {
|
||||
self.layout == LAYOUT_BROKEN
|
||||
}
|
||||
|
||||
@@ -23,22 +23,12 @@ pub struct ReAttachRequest {
|
||||
pub register: Option<NodeRegisterRequest>,
|
||||
}
|
||||
|
||||
fn default_mode() -> LocationConfigMode {
|
||||
LocationConfigMode::AttachedSingle
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct ReAttachResponseTenant {
|
||||
pub id: TenantShardId,
|
||||
/// Mandatory if LocationConfigMode is None or set to an Attached* mode
|
||||
pub r#gen: Option<u32>,
|
||||
|
||||
/// Default value only for backward compat: this field should be set
|
||||
#[serde(default = "default_mode")]
|
||||
pub mode: LocationConfigMode,
|
||||
|
||||
// Default value only for backward compat: this field should be set
|
||||
#[serde(default = "ShardStripeSize::default")]
|
||||
pub stripe_size: ShardStripeSize,
|
||||
}
|
||||
#[derive(Serialize, Deserialize)]
|
||||
|
||||
@@ -939,7 +939,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackendReader<IO> {
|
||||
FeMessage::CopyFail => Err(CopyStreamHandlerEnd::CopyFail),
|
||||
FeMessage::Terminate => Err(CopyStreamHandlerEnd::Terminate),
|
||||
_ => Err(CopyStreamHandlerEnd::from(ConnectionError::Protocol(
|
||||
ProtocolError::Protocol(format!("unexpected message in COPY stream {:?}", msg)),
|
||||
ProtocolError::Protocol(format!("unexpected message in COPY stream {msg:?}")),
|
||||
))),
|
||||
},
|
||||
None => Err(CopyStreamHandlerEnd::EOF),
|
||||
|
||||
@@ -61,7 +61,7 @@ async fn simple_select() {
|
||||
// so spawn it off to run on its own.
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
|
||||
@@ -137,7 +137,7 @@ async fn simple_select_ssl() {
|
||||
// so spawn it off to run on its own.
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
eprintln!("connection error: {e}");
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
@@ -223,7 +223,7 @@ mod tests_pg_connection_config {
|
||||
assert_eq!(cfg.port(), 123);
|
||||
assert_eq!(cfg.raw_address(), "stub.host.example:123");
|
||||
assert_eq!(
|
||||
format!("{:?}", cfg),
|
||||
format!("{cfg:?}"),
|
||||
"PgConnectionConfig { host: Domain(\"stub.host.example\"), port: 123, password: None }"
|
||||
);
|
||||
}
|
||||
@@ -239,7 +239,7 @@ mod tests_pg_connection_config {
|
||||
assert_eq!(cfg.port(), 123);
|
||||
assert_eq!(cfg.raw_address(), "[::1]:123");
|
||||
assert_eq!(
|
||||
format!("{:?}", cfg),
|
||||
format!("{cfg:?}"),
|
||||
"PgConnectionConfig { host: Ipv6(::1), port: 123, password: None }"
|
||||
);
|
||||
}
|
||||
@@ -252,7 +252,7 @@ mod tests_pg_connection_config {
|
||||
assert_eq!(cfg.port(), 123);
|
||||
assert_eq!(cfg.raw_address(), "stub.host.example:123");
|
||||
assert_eq!(
|
||||
format!("{:?}", cfg),
|
||||
format!("{cfg:?}"),
|
||||
"PgConnectionConfig { host: Domain(\"stub.host.example\"), port: 123, password: Some(REDACTED-STRING) }"
|
||||
);
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user