mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-29 08:10:38 +00:00
Compare commits
69 Commits
diko/baseb
...
vlad/debug
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
95bb6ce2e4 | ||
|
|
858f5f2ddc | ||
|
|
a4fdef69ad | ||
|
|
dae1b58964 | ||
|
|
e91a410472 | ||
|
|
9553a2670e | ||
|
|
5e2c444525 | ||
|
|
8d711229c1 | ||
|
|
0e490f3be7 | ||
|
|
7e41ef1bec | ||
|
|
74d2d233e4 | ||
|
|
7916aa26e0 | ||
|
|
52ab8f3e65 | ||
|
|
3d822dbbde | ||
|
|
af46b5286f | ||
|
|
47f7efee06 | ||
|
|
868c38f522 | ||
|
|
c8b2ac93cf | ||
|
|
b2954d16ff | ||
|
|
79485e7c3a | ||
|
|
eaf1ab21c4 | ||
|
|
6508f4e5c1 | ||
|
|
a298d2c29b | ||
|
|
8b197de7ff | ||
|
|
57edf217b7 | ||
|
|
ab1335cba0 | ||
|
|
15d079cd41 | ||
|
|
dc1625cd8e | ||
|
|
a6d4de25cd | ||
|
|
ec1452a559 | ||
|
|
1950ccfe33 | ||
|
|
2ca6665f4a | ||
|
|
fa954671b2 | ||
|
|
6f4ffdb48b | ||
|
|
3f676df3d5 | ||
|
|
20f4febce1 | ||
|
|
762905cf8d | ||
|
|
830ef35ed3 | ||
|
|
d8d62fb7cb | ||
|
|
e6a404c66d | ||
|
|
7e711ede44 | ||
|
|
e95f2f9a67 | ||
|
|
5a045e7d52 | ||
|
|
67fbc0582e | ||
|
|
3af6b3a2bf | ||
|
|
04013929cb | ||
|
|
83069f6ca1 | ||
|
|
7d4f662fbf | ||
|
|
a5cac52e26 | ||
|
|
dfa055f4be | ||
|
|
a4c76740c0 | ||
|
|
f2e96b2323 | ||
|
|
dee73f0cb4 | ||
|
|
edf51688bc | ||
|
|
4a8f3508f9 | ||
|
|
48052477b4 | ||
|
|
d81353b2d1 | ||
|
|
143500dc4f | ||
|
|
1a5f7ce6ad | ||
|
|
01ccb34118 | ||
|
|
f669e18477 | ||
|
|
632cde7f13 | ||
|
|
118e13438d | ||
|
|
fc136eec8f | ||
|
|
818e5130f1 | ||
|
|
c243521ae5 | ||
|
|
5303c71589 | ||
|
|
d146897415 | ||
|
|
d63815fa40 |
78
.github/workflows/_build-and-test-locally.yml
vendored
78
.github/workflows/_build-and-test-locally.yml
vendored
@@ -38,6 +38,11 @@ on:
|
||||
required: false
|
||||
default: 1
|
||||
type: number
|
||||
rerun-failed:
|
||||
description: 'rerun failed tests to ignore flaky tests'
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
|
||||
defaults:
|
||||
run:
|
||||
@@ -99,11 +104,10 @@ jobs:
|
||||
|
||||
# Set some environment variables used by all the steps.
|
||||
#
|
||||
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
|
||||
# It also includes --features, if any
|
||||
# CARGO_FLAGS is extra options to pass to all "cargo" subcommands.
|
||||
#
|
||||
# CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
|
||||
# because "cargo metadata" doesn't accept --release or --debug options
|
||||
# CARGO_PROFILE is passed to "cargo build", "cargo test" etc, but not to
|
||||
# "cargo metadata", because it doesn't accept --release or --debug options.
|
||||
#
|
||||
# We run tests with addtional features, that are turned off by default (e.g. in release builds), see
|
||||
# corresponding Cargo.toml files for their descriptions.
|
||||
@@ -112,16 +116,16 @@ jobs:
|
||||
ARCH: ${{ inputs.arch }}
|
||||
SANITIZERS: ${{ inputs.sanitizers }}
|
||||
run: |
|
||||
CARGO_FEATURES="--features testing"
|
||||
CARGO_FLAGS="--locked --features testing"
|
||||
if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
|
||||
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
|
||||
CARGO_FLAGS="--locked"
|
||||
CARGO_PROFILE=""
|
||||
elif [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix=""
|
||||
CARGO_FLAGS="--locked"
|
||||
CARGO_PROFILE=""
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=""
|
||||
CARGO_FLAGS="--locked --release"
|
||||
CARGO_PROFILE="--release"
|
||||
fi
|
||||
if [[ $SANITIZERS == 'enabled' ]]; then
|
||||
make_vars="WITH_SANITIZERS=yes"
|
||||
@@ -131,8 +135,8 @@ jobs:
|
||||
{
|
||||
echo "cov_prefix=${cov_prefix}"
|
||||
echo "make_vars=${make_vars}"
|
||||
echo "CARGO_FEATURES=${CARGO_FEATURES}"
|
||||
echo "CARGO_FLAGS=${CARGO_FLAGS}"
|
||||
echo "CARGO_PROFILE=${CARGO_PROFILE}"
|
||||
echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
|
||||
} >> $GITHUB_ENV
|
||||
|
||||
@@ -184,34 +188,18 @@ jobs:
|
||||
path: pg_install/v17
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
|
||||
|
||||
- name: Build postgres v14
|
||||
if: steps.cache_pg_14.outputs.cache-hit != 'true'
|
||||
run: mold -run make ${make_vars} postgres-v14 -j$(nproc)
|
||||
|
||||
- name: Build postgres v15
|
||||
if: steps.cache_pg_15.outputs.cache-hit != 'true'
|
||||
run: mold -run make ${make_vars} postgres-v15 -j$(nproc)
|
||||
|
||||
- name: Build postgres v16
|
||||
if: steps.cache_pg_16.outputs.cache-hit != 'true'
|
||||
run: mold -run make ${make_vars} postgres-v16 -j$(nproc)
|
||||
|
||||
- name: Build postgres v17
|
||||
if: steps.cache_pg_17.outputs.cache-hit != 'true'
|
||||
run: mold -run make ${make_vars} postgres-v17 -j$(nproc)
|
||||
|
||||
- name: Build neon extensions
|
||||
run: mold -run make ${make_vars} neon-pg-ext -j$(nproc)
|
||||
- name: Build all
|
||||
# Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables
|
||||
run: mold -run make ${make_vars} all -j$(nproc) CARGO_BUILD_FLAGS="$CARGO_FLAGS"
|
||||
|
||||
- name: Build walproposer-lib
|
||||
run: mold -run make ${make_vars} walproposer-lib -j$(nproc)
|
||||
|
||||
- name: Run cargo build
|
||||
env:
|
||||
WITH_TESTS: ${{ inputs.sanitizers != 'enabled' && '--tests' || '' }}
|
||||
- name: Build unit tests
|
||||
if: inputs.sanitizers != 'enabled'
|
||||
run: |
|
||||
export ASAN_OPTIONS=detect_leaks=0
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins ${WITH_TESTS}
|
||||
${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_PROFILE --tests
|
||||
|
||||
# Do install *before* running rust tests because they might recompile the
|
||||
# binaries with different features/flags.
|
||||
@@ -223,7 +211,7 @@ jobs:
|
||||
# Install target binaries
|
||||
mkdir -p /tmp/neon/bin/
|
||||
binaries=$(
|
||||
${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
|
||||
${cov_prefix} cargo metadata $CARGO_FLAGS --format-version=1 --no-deps |
|
||||
jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
|
||||
)
|
||||
for bin in $binaries; do
|
||||
@@ -240,7 +228,7 @@ jobs:
|
||||
mkdir -p /tmp/neon/test_bin/
|
||||
|
||||
test_exe_paths=$(
|
||||
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
|
||||
${cov_prefix} cargo test $CARGO_FLAGS $CARGO_PROFILE --message-format=json --no-run |
|
||||
jq -r '.executable | select(. != null)'
|
||||
)
|
||||
for bin in $test_exe_paths; do
|
||||
@@ -274,10 +262,10 @@ jobs:
|
||||
export LD_LIBRARY_PATH
|
||||
|
||||
#nextest does not yet support running doctests
|
||||
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
|
||||
${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_PROFILE
|
||||
|
||||
# run all non-pageserver tests
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E '!package(pageserver)'
|
||||
|
||||
# run pageserver tests
|
||||
# (When developing new pageserver features gated by config fields, we commonly make the rust
|
||||
@@ -286,13 +274,13 @@ jobs:
|
||||
# pageserver tests from non-pageserver tests cuts down the time it takes for this CI step.)
|
||||
NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=tokio-epoll-uring \
|
||||
${cov_prefix} \
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(pageserver)'
|
||||
cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(pageserver)'
|
||||
|
||||
# Run separate tests for real S3
|
||||
export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
|
||||
export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
|
||||
export REMOTE_STORAGE_S3_REGION=eu-central-1
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(remote_storage)' -E 'test(test_real_s3)'
|
||||
|
||||
# Run separate tests for real Azure Blob Storage
|
||||
# XXX: replace region with `eu-central-1`-like region
|
||||
@@ -301,17 +289,17 @@ jobs:
|
||||
export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
|
||||
export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
|
||||
export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||
${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_PROFILE -E 'package(remote_storage)' -E 'test(test_real_azure)'
|
||||
|
||||
- name: Install postgres binaries
|
||||
run: |
|
||||
# Use tar to copy files matching the pattern, preserving the paths in the destionation
|
||||
tar c \
|
||||
pg_install/v* \
|
||||
pg_install/build/*/src/test/regress/*.so \
|
||||
pg_install/build/*/src/test/regress/pg_regress \
|
||||
pg_install/build/*/src/test/isolation/isolationtester \
|
||||
pg_install/build/*/src/test/isolation/pg_isolation_regress \
|
||||
build/*/src/test/regress/*.so \
|
||||
build/*/src/test/regress/pg_regress \
|
||||
build/*/src/test/isolation/isolationtester \
|
||||
build/*/src/test/isolation/pg_isolation_regress \
|
||||
| tar x -C /tmp/neon
|
||||
|
||||
- name: Upload Neon artifact
|
||||
@@ -379,7 +367,7 @@ jobs:
|
||||
- name: Pytest regression tests
|
||||
continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 75 || 180 }}
|
||||
timeout-minutes: ${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 75 || 180 }}
|
||||
with:
|
||||
build_type: ${{ inputs.build-type }}
|
||||
test_selection: regress
|
||||
@@ -387,14 +375,14 @@ jobs:
|
||||
run_with_real_s3: true
|
||||
real_s3_bucket: neon-github-ci-tests
|
||||
real_s3_region: eu-central-1
|
||||
rerun_failed: ${{ inputs.test-run-count == 1 }}
|
||||
rerun_failed: ${{ inputs.rerun-failed }}
|
||||
pg_version: ${{ matrix.pg_version }}
|
||||
sanitizers: ${{ inputs.sanitizers }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
# `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
|
||||
# Attempt to stop tests gracefully to generate test reports
|
||||
# until they are forcibly stopped by the stricter `timeout-minutes` limit.
|
||||
extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }} --count=${{ inputs.test-run-count }}
|
||||
extra_params: --session-timeout=${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 3000 || 10200 }} --count=${{ inputs.test-run-count }}
|
||||
${{ inputs.test-selection != '' && format('-k "{0}"', inputs.test-selection) || '' }}
|
||||
env:
|
||||
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
16
.github/workflows/build-macos.yml
vendored
16
.github/workflows/build-macos.yml
vendored
@@ -110,7 +110,7 @@ jobs:
|
||||
|
||||
build-walproposer-lib:
|
||||
if: |
|
||||
inputs.pg_versions != '[]' || inputs.rebuild_everything ||
|
||||
contains(inputs.pg_versions, 'v17') || inputs.rebuild_everything ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
|
||||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
|
||||
github.ref_name == 'main'
|
||||
@@ -144,7 +144,7 @@ jobs:
|
||||
id: cache_walproposer_lib
|
||||
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
|
||||
with:
|
||||
path: pg_install/build/walproposer-lib
|
||||
path: build/walproposer-lib
|
||||
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
|
||||
|
||||
- name: Checkout submodule vendor/postgres-v17
|
||||
@@ -169,11 +169,11 @@ jobs:
|
||||
run:
|
||||
make walproposer-lib -j$(sysctl -n hw.ncpu)
|
||||
|
||||
- name: Upload "pg_install/build/walproposer-lib" artifact
|
||||
- name: Upload "build/walproposer-lib" artifact
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: pg_install--build--walproposer-lib
|
||||
path: pg_install/build/walproposer-lib
|
||||
name: build--walproposer-lib
|
||||
path: build/walproposer-lib
|
||||
# The artifact is supposed to be used by the next job in the same workflow,
|
||||
# so there’s no need to store it for too long.
|
||||
retention-days: 1
|
||||
@@ -226,11 +226,11 @@ jobs:
|
||||
name: pg_install--v17
|
||||
path: pg_install/v17
|
||||
|
||||
- name: Download "pg_install/build/walproposer-lib" artifact
|
||||
- name: Download "build/walproposer-lib" artifact
|
||||
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
|
||||
with:
|
||||
name: pg_install--build--walproposer-lib
|
||||
path: pg_install/build/walproposer-lib
|
||||
name: build--walproposer-lib
|
||||
path: build/walproposer-lib
|
||||
|
||||
# `actions/download-artifact` doesn't preserve permissions:
|
||||
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
|
||||
|
||||
@@ -58,6 +58,7 @@ jobs:
|
||||
test-cfg: ${{ inputs.pg-versions }}
|
||||
test-selection: ${{ inputs.test-selection }}
|
||||
test-run-count: ${{ fromJson(inputs.run-count) }}
|
||||
rerun-failed: false
|
||||
secrets: inherit
|
||||
|
||||
create-test-report:
|
||||
|
||||
26
.github/workflows/build_and_test.yml
vendored
26
.github/workflows/build_and_test.yml
vendored
@@ -199,6 +199,28 @@ jobs:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
secrets: inherit
|
||||
|
||||
validate-compute-manifest:
|
||||
runs-on: ubuntu-22.04
|
||||
needs: [ meta, check-permissions ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: '24'
|
||||
|
||||
- name: Validate manifest against schema
|
||||
run: |
|
||||
make -C compute manifest-schema-validation
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ meta, build-build-tools-image ]
|
||||
# We do need to run this in `.*-rc-pr` because of hotfixes.
|
||||
@@ -648,7 +670,7 @@ jobs:
|
||||
ghcr.io/neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64
|
||||
|
||||
compute-node-image-arch:
|
||||
needs: [ check-permissions, build-build-tools-image, meta ]
|
||||
needs: [ check-permissions, meta ]
|
||||
if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
@@ -721,7 +743,6 @@ jobs:
|
||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
PG_VERSION=${{ matrix.version.pg }}
|
||||
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
|
||||
DEBIAN_VERSION=${{ matrix.version.debian }}
|
||||
provenance: false
|
||||
push: true
|
||||
@@ -741,7 +762,6 @@ jobs:
|
||||
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
|
||||
PG_VERSION=${{ matrix.version.pg }}
|
||||
BUILD_TAG=${{ needs.meta.outputs.release-tag || needs.meta.outputs.build-tag }}
|
||||
TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
|
||||
DEBIAN_VERSION=${{ matrix.version.debian }}
|
||||
provenance: false
|
||||
push: true
|
||||
|
||||
151
.github/workflows/build_and_test_fully.yml
vendored
Normal file
151
.github/workflows/build_and_test_fully.yml
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
name: Build and Test Fully
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 3 * * *' # run once a day, timezone is utc
|
||||
workflow_dispatch:
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow per any non-`main` branch.
|
||||
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
RUST_BACKTRACE: 1
|
||||
COPT: '-Werror'
|
||||
|
||||
jobs:
|
||||
tag:
|
||||
runs-on: [ self-hosted, small ]
|
||||
container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
|
||||
outputs:
|
||||
build-tag: ${{steps.build-tag.outputs.tag}}
|
||||
|
||||
steps:
|
||||
# Need `fetch-depth: 0` to count the number of commits in the branch
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get build tag
|
||||
run: |
|
||||
echo run:$GITHUB_RUN_ID
|
||||
echo ref:$GITHUB_REF_NAME
|
||||
echo rev:$(git rev-list --count HEAD)
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
|
||||
echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
|
||||
echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'"
|
||||
echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
shell: bash
|
||||
id: build-tag
|
||||
|
||||
build-build-tools-image:
|
||||
uses: ./.github/workflows/build-build-tools-image.yml
|
||||
secrets: inherit
|
||||
|
||||
build-and-test-locally:
|
||||
needs: [ tag, build-build-tools-image ]
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: [ x64, arm64 ]
|
||||
build-type: [ debug, release ]
|
||||
uses: ./.github/workflows/_build-and-test-locally.yml
|
||||
with:
|
||||
arch: ${{ matrix.arch }}
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
rerun-failed: false
|
||||
test-cfg: '[{"pg_version":"v14", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v15", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v16", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "with-lfc"},
|
||||
{"pg_version":"v14", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v15", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v16", "lfc_state": "without-lfc"},
|
||||
{"pg_version":"v17", "lfc_state": "withouts-lfc"}]'
|
||||
secrets: inherit
|
||||
|
||||
|
||||
create-test-report:
|
||||
needs: [ build-and-test-locally, build-build-tools-image ]
|
||||
if: ${{ !cancelled() }}
|
||||
permissions:
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
statuses: write
|
||||
contents: write
|
||||
pull-requests: write
|
||||
outputs:
|
||||
report-url: ${{ steps.create-allure-report.outputs.report-url }}
|
||||
|
||||
runs-on: [ self-hosted, small ]
|
||||
container:
|
||||
image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Create Allure report
|
||||
if: ${{ !cancelled() }}
|
||||
id: create-allure-report
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
store-test-results-into-db: true
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
|
||||
|
||||
- uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
|
||||
if: ${{ !cancelled() }}
|
||||
with:
|
||||
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
|
||||
retries: 5
|
||||
script: |
|
||||
const report = {
|
||||
reportUrl: "${{ steps.create-allure-report.outputs.report-url }}",
|
||||
reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
|
||||
}
|
||||
|
||||
const coverage = {}
|
||||
|
||||
const script = require("./scripts/comment-test-report.js")
|
||||
await script({
|
||||
github,
|
||||
context,
|
||||
fetch,
|
||||
report,
|
||||
coverage,
|
||||
})
|
||||
@@ -79,6 +79,7 @@ jobs:
|
||||
build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
|
||||
build-tag: ${{ needs.tag.outputs.build-tag }}
|
||||
build-type: ${{ matrix.build-type }}
|
||||
rerun-failed: false
|
||||
test-cfg: '[{"pg_version":"v17"}]'
|
||||
sanitizers: enabled
|
||||
secrets: inherit
|
||||
|
||||
11
.github/workflows/large_oltp_benchmark.yml
vendored
11
.github/workflows/large_oltp_benchmark.yml
vendored
@@ -33,11 +33,19 @@ jobs:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
include:
|
||||
# test only read-only custom scripts in new branch without database maintenance
|
||||
- target: new_branch
|
||||
custom_scripts: select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3
|
||||
test_maintenance: false
|
||||
# test all custom scripts in new branch with database maintenance
|
||||
- target: new_branch
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
test_maintenance: true
|
||||
# test all custom scripts in reuse branch with database maintenance
|
||||
- target: reuse_branch
|
||||
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
|
||||
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
|
||||
test_maintenance: true
|
||||
max-parallel: 1 # we want to run each benchmark sequentially to not have noisy neighbors on shared storage (PS, SK)
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
@@ -145,6 +153,7 @@ jobs:
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Benchmark database maintenance
|
||||
if: ${{ matrix.test_maintenance == 'true' }}
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
|
||||
175
.github/workflows/large_oltp_growth.yml
vendored
Normal file
175
.github/workflows/large_oltp_growth.yml
vendored
Normal file
@@ -0,0 +1,175 @@
|
||||
name: large oltp growth
|
||||
# workflow to grow the reuse branch of large oltp benchmark continuously (about 16 GB per run)
|
||||
|
||||
on:
|
||||
# uncomment to run on push for debugging your PR
|
||||
# push:
|
||||
# branches: [ bodobolero/increase_large_oltp_workload ]
|
||||
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
# ┌───────────── minute (0 - 59)
|
||||
# │ ┌───────────── hour (0 - 23)
|
||||
# │ │ ┌───────────── day of the month (1 - 31)
|
||||
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
|
||||
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
|
||||
- cron: '0 6 * * *' # 06:00 UTC
|
||||
- cron: '0 8 * * *' # 08:00 UTC
|
||||
- cron: '0 10 * * *' # 10:00 UTC
|
||||
- cron: '0 12 * * *' # 12:00 UTC
|
||||
- cron: '0 14 * * *' # 14:00 UTC
|
||||
- cron: '0 16 * * *' # 16:00 UTC
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
|
||||
defaults:
|
||||
run:
|
||||
shell: bash -euxo pipefail {0}
|
||||
|
||||
concurrency:
|
||||
# Allow only one workflow globally because we need dedicated resources which only exist once
|
||||
group: large-oltp-growth
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
oltp:
|
||||
strategy:
|
||||
fail-fast: false # allow other variants to continue even if one fails
|
||||
matrix:
|
||||
include:
|
||||
# for now only grow the reuse branch, not the other branches.
|
||||
- target: reuse_branch
|
||||
custom_scripts:
|
||||
- grow_action_blocks.sql
|
||||
- grow_action_kwargs.sql
|
||||
- grow_device_fingerprint_event.sql
|
||||
- grow_edges.sql
|
||||
- grow_hotel_rate_mapping.sql
|
||||
- grow_ocr_pipeline_results_version.sql
|
||||
- grow_priceline_raw_response.sql
|
||||
- grow_relabled_transactions.sql
|
||||
- grow_state_values.sql
|
||||
- grow_values.sql
|
||||
- grow_vertices.sql
|
||||
- update_accounting_coding_body_tracking_category_selection.sql
|
||||
- update_action_blocks.sql
|
||||
- update_action_kwargs.sql
|
||||
- update_denormalized_approval_workflow.sql
|
||||
- update_device_fingerprint_event.sql
|
||||
- update_edges.sql
|
||||
- update_heron_transaction_enriched_log.sql
|
||||
- update_heron_transaction_enrichment_requests.sql
|
||||
- update_hotel_rate_mapping.sql
|
||||
- update_incoming_webhooks.sql
|
||||
- update_manual_transaction.sql
|
||||
- update_ml_receipt_matching_log.sql
|
||||
- update_ocr_pipeine_results_version.sql
|
||||
- update_orc_pipeline_step_results.sql
|
||||
- update_orc_pipeline_step_results_version.sql
|
||||
- update_priceline_raw_response.sql
|
||||
- update_quickbooks_transactions.sql
|
||||
- update_raw_finicity_transaction.sql
|
||||
- update_relabeled_transactions.sql
|
||||
- update_state_values.sql
|
||||
- update_stripe_authorization_event_log.sql
|
||||
- update_transaction.sql
|
||||
- update_values.sql
|
||||
- update_vertices.sql
|
||||
max-parallel: 1 # we want to run each growth workload sequentially (for now there is just one)
|
||||
permissions:
|
||||
contents: write
|
||||
statuses: write
|
||||
id-token: write # aws-actions/configure-aws-credentials
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "1h"
|
||||
TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ join(matrix.custom_scripts, ' ') }}
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
PG_VERSION: 16 # pre-determined by pre-determined project
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
PLATFORM: ${{ matrix.target }}
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
|
||||
credentials:
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
options: --init
|
||||
|
||||
steps:
|
||||
- name: Harden the runner (Audit all outbound calls)
|
||||
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
|
||||
with:
|
||||
egress-policy: audit
|
||||
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Configure AWS credentials # necessary to download artefacts
|
||||
uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
|
||||
with:
|
||||
aws-region: eu-central-1
|
||||
role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${{ matrix.target }}" in
|
||||
reuse_branch)
|
||||
CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo >&2 "Unknown target=${{ matrix.target }}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}"
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: pgbench with custom-scripts
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: true
|
||||
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_growth
|
||||
pg_version: ${{ env.PG_VERSION }}
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Create Allure report
|
||||
id: create-allure-report
|
||||
if: ${{ !cancelled() }}
|
||||
uses: ./.github/actions/allure-report-generate
|
||||
with:
|
||||
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
|
||||
with:
|
||||
channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
|
||||
slack-message: |
|
||||
Periodic large oltp tenant growth increase: ${{ job.status }}
|
||||
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
|
||||
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,4 +1,5 @@
|
||||
/artifact_cache
|
||||
/build
|
||||
/pg_install
|
||||
/target
|
||||
/tmp_check
|
||||
|
||||
21
Cargo.lock
generated
21
Cargo.lock
generated
@@ -4255,6 +4255,7 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"tonic 0.13.1",
|
||||
"tracing",
|
||||
"url",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
]
|
||||
@@ -4334,6 +4335,7 @@ dependencies = [
|
||||
"postgres_backend",
|
||||
"postgres_connection",
|
||||
"postgres_ffi",
|
||||
"postgres_ffi_types",
|
||||
"postgres_initdb",
|
||||
"posthog_client_lite",
|
||||
"pprof",
|
||||
@@ -4403,7 +4405,7 @@ dependencies = [
|
||||
"nix 0.30.1",
|
||||
"once_cell",
|
||||
"postgres_backend",
|
||||
"postgres_ffi",
|
||||
"postgres_ffi_types",
|
||||
"rand 0.8.5",
|
||||
"remote_storage",
|
||||
"reqwest",
|
||||
@@ -4465,11 +4467,16 @@ dependencies = [
|
||||
name = "pageserver_page_api"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"futures",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"prost 0.13.5",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tonic 0.13.1",
|
||||
"tonic-build",
|
||||
"utils",
|
||||
@@ -4889,6 +4896,7 @@ dependencies = [
|
||||
"memoffset 0.9.0",
|
||||
"once_cell",
|
||||
"postgres",
|
||||
"postgres_ffi_types",
|
||||
"pprof",
|
||||
"regex",
|
||||
"serde",
|
||||
@@ -4897,6 +4905,14 @@ dependencies = [
|
||||
"utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres_ffi_types"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"thiserror 1.0.69",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "postgres_initdb"
|
||||
version = "0.1.0"
|
||||
@@ -7558,6 +7574,7 @@ dependencies = [
|
||||
"axum",
|
||||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"flate2",
|
||||
"h2 0.4.4",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
@@ -7577,6 +7594,7 @@ dependencies = [
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -8156,6 +8174,7 @@ dependencies = [
|
||||
"futures",
|
||||
"pageserver_api",
|
||||
"postgres_ffi",
|
||||
"postgres_ffi_types",
|
||||
"pprof",
|
||||
"prost 0.13.5",
|
||||
"remote_storage",
|
||||
|
||||
@@ -22,6 +22,7 @@ members = [
|
||||
"libs/http-utils",
|
||||
"libs/pageserver_api",
|
||||
"libs/postgres_ffi",
|
||||
"libs/postgres_ffi_types",
|
||||
"libs/safekeeper_api",
|
||||
"libs/desim",
|
||||
"libs/neon-shmem",
|
||||
@@ -199,7 +200,7 @@ tokio-tar = "0.3"
|
||||
tokio-util = { version = "0.7.10", features = ["io", "rt"] }
|
||||
toml = "0.8"
|
||||
toml_edit = "0.22"
|
||||
tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "prost", "router", "server", "tls-ring", "tls-native-roots"] }
|
||||
tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "gzip", "prost", "router", "server", "tls-ring", "tls-native-roots", "zstd"] }
|
||||
tonic-reflection = { version = "0.13.1", features = ["server"] }
|
||||
tower = { version = "0.5.2", default-features = false }
|
||||
tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] }
|
||||
@@ -259,6 +260,7 @@ pageserver_page_api = { path = "./pageserver/page_api" }
|
||||
postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
|
||||
postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
|
||||
postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
|
||||
postgres_ffi_types = { version = "0.1", path = "./libs/postgres_ffi_types/" }
|
||||
postgres_initdb = { path = "./libs/postgres_initdb" }
|
||||
posthog_client_lite = { version = "0.1", path = "./libs/posthog_client_lite" }
|
||||
pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
|
||||
|
||||
@@ -5,8 +5,6 @@
|
||||
ARG REPOSITORY=ghcr.io/neondatabase
|
||||
ARG IMAGE=build-tools
|
||||
ARG TAG=pinned
|
||||
ARG DEFAULT_PG_VERSION=17
|
||||
ARG STABLE_PG_VERSION=16
|
||||
ARG DEBIAN_VERSION=bookworm
|
||||
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
|
||||
|
||||
@@ -47,7 +45,6 @@ COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
|
||||
ENV BUILD_TYPE=release
|
||||
RUN set -e \
|
||||
&& mold -run make -j $(nproc) -s neon-pg-ext \
|
||||
&& rm -rf pg_install/build \
|
||||
&& tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .
|
||||
|
||||
# Prepare cargo-chef recipe
|
||||
@@ -63,14 +60,11 @@ FROM $REPOSITORY/$IMAGE:$TAG AS build
|
||||
WORKDIR /home/nonroot
|
||||
ARG GIT_VERSION=local
|
||||
ARG BUILD_TAG
|
||||
ARG STABLE_PG_VERSION
|
||||
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
|
||||
COPY --from=pg-build /home/nonroot/pg_install/v17/lib pg_install/v17/lib
|
||||
COPY --from=plan /home/nonroot/recipe.json recipe.json
|
||||
|
||||
ARG ADDITIONAL_RUSTFLAGS=""
|
||||
@@ -97,7 +91,6 @@ RUN set -e \
|
||||
# Build final image
|
||||
#
|
||||
FROM $BASE_IMAGE_SHA
|
||||
ARG DEFAULT_PG_VERSION
|
||||
WORKDIR /data
|
||||
|
||||
RUN set -e \
|
||||
@@ -107,8 +100,6 @@ RUN set -e \
|
||||
libreadline-dev \
|
||||
libseccomp-dev \
|
||||
ca-certificates \
|
||||
# System postgres for use with client libraries (e.g. in storage controller)
|
||||
postgresql-15 \
|
||||
openssl \
|
||||
unzip \
|
||||
curl \
|
||||
|
||||
157
Makefile
157
Makefile
@@ -1,8 +1,18 @@
|
||||
ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
|
||||
|
||||
# Where to install Postgres, default is ./pg_install, maybe useful for package managers
|
||||
# Where to install Postgres, default is ./pg_install, maybe useful for package
|
||||
# managers.
|
||||
POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
|
||||
|
||||
# CARGO_BUILD_FLAGS: Extra flags to pass to `cargo build`. `--locked`
|
||||
# and `--features testing` are popular examples.
|
||||
#
|
||||
# CARGO_PROFILE: You can also set to override the cargo profile to
|
||||
# use. By default, it is derived from BUILD_TYPE.
|
||||
|
||||
# All intermediate build artifacts are stored here.
|
||||
BUILD_DIR := build
|
||||
|
||||
ICU_PREFIX_DIR := /usr/local/icu
|
||||
|
||||
#
|
||||
@@ -16,12 +26,12 @@ ifeq ($(BUILD_TYPE),release)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl
|
||||
PG_CFLAGS += -O2 -g3 $(CFLAGS)
|
||||
PG_LDFLAGS = $(LDFLAGS)
|
||||
# Unfortunately, `--profile=...` is a nightly feature
|
||||
CARGO_BUILD_FLAGS += --release
|
||||
CARGO_PROFILE ?= --profile=release
|
||||
else ifeq ($(BUILD_TYPE),debug)
|
||||
PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
|
||||
PG_CFLAGS += -O0 -g3 $(CFLAGS)
|
||||
PG_LDFLAGS = $(LDFLAGS)
|
||||
CARGO_PROFILE ?= --profile=dev
|
||||
else
|
||||
$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
|
||||
endif
|
||||
@@ -93,7 +103,7 @@ all: neon postgres neon-pg-ext
|
||||
.PHONY: neon
|
||||
neon: postgres-headers walproposer-lib cargo-target-dir
|
||||
+@echo "Compiling Neon"
|
||||
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
|
||||
$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS) $(CARGO_PROFILE)
|
||||
.PHONY: cargo-target-dir
|
||||
cargo-target-dir:
|
||||
# https://github.com/rust-lang/cargo/issues/14281
|
||||
@@ -104,21 +114,20 @@ cargo-target-dir:
|
||||
# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
|
||||
# to avoid the duplication in the future, but it's tolerable for now.
|
||||
#
|
||||
$(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
||||
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)
|
||||
test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
|
||||
$(BUILD_DIR)/%/config.status:
|
||||
mkdir -p $(BUILD_DIR)
|
||||
test -e $(BUILD_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(BUILD_DIR)/CACHEDIR.TAG
|
||||
|
||||
+@echo "Configuring Postgres $* build"
|
||||
@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
|
||||
echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
|
||||
echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
|
||||
exit 1; }
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
|
||||
mkdir -p $(BUILD_DIR)/$*
|
||||
|
||||
VERSION=$*; \
|
||||
EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
|
||||
(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
|
||||
(cd $(BUILD_DIR)/$$VERSION && \
|
||||
env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
|
||||
CFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \
|
||||
$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
|
||||
@@ -130,96 +139,54 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
|
||||
# the "build-all-versions" entry points) where direct mention of PostgreSQL
|
||||
# versions is used.
|
||||
.PHONY: postgres-configure-v17
|
||||
postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
|
||||
postgres-configure-v17: $(BUILD_DIR)/v17/config.status
|
||||
.PHONY: postgres-configure-v16
|
||||
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
|
||||
postgres-configure-v16: $(BUILD_DIR)/v16/config.status
|
||||
.PHONY: postgres-configure-v15
|
||||
postgres-configure-v15: $(POSTGRES_INSTALL_DIR)/build/v15/config.status
|
||||
postgres-configure-v15: $(BUILD_DIR)/v15/config.status
|
||||
.PHONY: postgres-configure-v14
|
||||
postgres-configure-v14: $(POSTGRES_INSTALL_DIR)/build/v14/config.status
|
||||
postgres-configure-v14: $(BUILD_DIR)/v14/config.status
|
||||
|
||||
# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
|
||||
.PHONY: postgres-headers-%
|
||||
postgres-headers-%: postgres-configure-%
|
||||
+@echo "Installing PostgreSQL $* headers"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/include MAKELEVEL=0 install
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/src/include MAKELEVEL=0 install
|
||||
|
||||
# Compile and install PostgreSQL
|
||||
.PHONY: postgres-%
|
||||
postgres-%: postgres-configure-% \
|
||||
postgres-headers-% # to prevent `make install` conflicts with neon's `postgres-headers`
|
||||
+@echo "Compiling PostgreSQL $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 install
|
||||
$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 install
|
||||
+@echo "Compiling libpq $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq install
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/src/interfaces/libpq install
|
||||
+@echo "Compiling pg_prewarm $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_prewarm install
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_prewarm install
|
||||
+@echo "Compiling pg_buffercache $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache install
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_buffercache install
|
||||
+@echo "Compiling pg_visibility $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_visibility install
|
||||
+@echo "Compiling pageinspect $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/contrib/pageinspect install
|
||||
+@echo "Compiling pg_trgm $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_trgm install
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_trgm install
|
||||
+@echo "Compiling amcheck $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/contrib/amcheck install
|
||||
+@echo "Compiling test_decoding $*"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
|
||||
|
||||
.PHONY: postgres-clean-%
|
||||
postgres-clean-%:
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 clean
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_buffercache clean
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect clean
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/interfaces/libpq clean
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/contrib/test_decoding install
|
||||
|
||||
.PHONY: postgres-check-%
|
||||
postgres-check-%: postgres-%
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$* MAKELEVEL=0 check
|
||||
$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 check
|
||||
|
||||
.PHONY: neon-pg-ext-%
|
||||
neon-pg-ext-%: postgres-%
|
||||
+@echo "Compiling neon $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
|
||||
+@echo "Compiling neon-specific Postgres extensions for $*"
|
||||
mkdir -p $(BUILD_DIR)/pgxn-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
|
||||
+@echo "Compiling neon_walredo $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
|
||||
+@echo "Compiling neon_rmgr $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
|
||||
+@echo "Compiling neon_test_utils $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
|
||||
+@echo "Compiling neon_utils $*"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
|
||||
|
||||
.PHONY: neon-pg-clean-ext-%
|
||||
neon-pg-clean-ext-%:
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile clean
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile clean
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile clean
|
||||
-C $(BUILD_DIR)/pgxn-$*\
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/Makefile install
|
||||
|
||||
# Build walproposer as a static library. walproposer source code is located
|
||||
# in the pgxn/neon directory.
|
||||
@@ -233,15 +200,15 @@ neon-pg-clean-ext-%:
|
||||
.PHONY: walproposer-lib
|
||||
walproposer-lib: neon-pg-ext-v17
|
||||
+@echo "Compiling walproposer-lib"
|
||||
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
mkdir -p $(BUILD_DIR)/walproposer-lib
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
|
||||
-C $(BUILD_DIR)/walproposer-lib \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(BUILD_DIR)/walproposer-lib
|
||||
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(BUILD_DIR)/walproposer-lib
|
||||
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgport.a \
|
||||
pg_strong_random.o
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
|
||||
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \
|
||||
checksum_helper.o \
|
||||
cryptohash_openssl.o \
|
||||
hmac_openssl.o \
|
||||
@@ -249,16 +216,10 @@ walproposer-lib: neon-pg-ext-v17
|
||||
parse_manifest.o \
|
||||
scram-common.o
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
|
||||
$(AR) d $(BUILD_DIR)/walproposer-lib/libpgcommon.a \
|
||||
pg_crc32c.o
|
||||
endif
|
||||
|
||||
.PHONY: walproposer-lib-clean
|
||||
walproposer-lib-clean:
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
|
||||
|
||||
.PHONY: neon-pg-ext
|
||||
neon-pg-ext: \
|
||||
neon-pg-ext-v14 \
|
||||
@@ -266,13 +227,6 @@ neon-pg-ext: \
|
||||
neon-pg-ext-v16 \
|
||||
neon-pg-ext-v17
|
||||
|
||||
.PHONY: neon-pg-clean-ext
|
||||
neon-pg-clean-ext: \
|
||||
neon-pg-clean-ext-v14 \
|
||||
neon-pg-clean-ext-v15 \
|
||||
neon-pg-clean-ext-v16 \
|
||||
neon-pg-clean-ext-v17
|
||||
|
||||
# shorthand to build all Postgres versions
|
||||
.PHONY: postgres
|
||||
postgres: \
|
||||
@@ -288,13 +242,6 @@ postgres-headers: \
|
||||
postgres-headers-v16 \
|
||||
postgres-headers-v17
|
||||
|
||||
.PHONY: postgres-clean
|
||||
postgres-clean: \
|
||||
postgres-clean-v14 \
|
||||
postgres-clean-v15 \
|
||||
postgres-clean-v16 \
|
||||
postgres-clean-v17
|
||||
|
||||
.PHONY: postgres-check
|
||||
postgres-check: \
|
||||
postgres-check-v14 \
|
||||
@@ -302,12 +249,6 @@ postgres-check: \
|
||||
postgres-check-v16 \
|
||||
postgres-check-v17
|
||||
|
||||
# This doesn't remove the effects of 'configure'.
|
||||
.PHONY: clean
|
||||
clean: postgres-clean neon-pg-clean-ext
|
||||
$(MAKE) -C compute clean
|
||||
$(CARGO_CMD_PREFIX) cargo clean
|
||||
|
||||
# This removes everything
|
||||
.PHONY: distclean
|
||||
distclean:
|
||||
@@ -320,7 +261,7 @@ fmt:
|
||||
|
||||
postgres-%-pg-bsd-indent: postgres-%
|
||||
+@echo "Compiling pg_bsd_indent"
|
||||
$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/
|
||||
$(MAKE) -C $(BUILD_DIR)/$*/src/tools/pg_bsd_indent/
|
||||
|
||||
# Create typedef list for the core. Note that generally it should be combined with
|
||||
# buildfarm one to cover platform specific stuff.
|
||||
@@ -339,7 +280,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
|
||||
cat $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/typedefs.list |\
|
||||
cat - postgres-$*-typedefs.list | sort | uniq > postgres-$*-typedefs-full.list
|
||||
+@echo note: you might want to run it on selected files/dirs instead.
|
||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
INDENT=$(BUILD_DIR)/$*/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/pgindent --typedefs postgres-$*-typedefs-full.list \
|
||||
$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/ \
|
||||
--excludes $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/exclude_file_patterns
|
||||
@@ -350,9 +291,9 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
|
||||
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
|
||||
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
|
||||
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
|
||||
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
|
||||
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
|
||||
-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
|
||||
-C $(BUILD_DIR)/neon-v17 \
|
||||
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent
|
||||
|
||||
|
||||
|
||||
3
compute/.gitignore
vendored
3
compute/.gitignore
vendored
@@ -3,3 +3,6 @@ etc/neon_collector.yml
|
||||
etc/neon_collector_autoscaling.yml
|
||||
etc/sql_exporter.yml
|
||||
etc/sql_exporter_autoscaling.yml
|
||||
|
||||
# Node.js dependencies
|
||||
node_modules/
|
||||
|
||||
@@ -48,3 +48,11 @@ jsonnetfmt-test:
|
||||
.PHONY: jsonnetfmt-format
|
||||
jsonnetfmt-format:
|
||||
jsonnetfmt --in-place $(jsonnet_files)
|
||||
|
||||
.PHONY: manifest-schema-validation
|
||||
manifest-schema-validation: node_modules
|
||||
node_modules/.bin/jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
|
||||
|
||||
node_modules: package.json
|
||||
npm install
|
||||
touch node_modules
|
||||
|
||||
@@ -77,9 +77,6 @@
|
||||
# build_and_test.yml github workflow for how that's done.
|
||||
|
||||
ARG PG_VERSION
|
||||
ARG REPOSITORY=ghcr.io/neondatabase
|
||||
ARG IMAGE=build-tools
|
||||
ARG TAG=pinned
|
||||
ARG BUILD_TAG
|
||||
ARG DEBIAN_VERSION=bookworm
|
||||
ARG DEBIAN_FLAVOR=${DEBIAN_VERSION}-slim
|
||||
@@ -149,8 +146,11 @@ RUN case $DEBIAN_VERSION in \
|
||||
ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \
|
||||
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
|
||||
libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd curl unzip g++ \
|
||||
libclang-dev \
|
||||
jsonnet \
|
||||
$VERSION_INSTALLS \
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/*
|
||||
&& apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
useradd -ms /bin/bash nonroot -b /home
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1057,17 +1057,10 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg build with nonroot user and cargo installed"
|
||||
# This layer is base and common for layers with `pgrx`
|
||||
# Layer "build-deps with Rust toolchain installed"
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build AS pg-build-nonroot-with-cargo
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN apt update && \
|
||||
apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
|
||||
apt clean && rm -rf /var/lib/apt/lists/* && \
|
||||
useradd -ms /bin/bash nonroot -b /home
|
||||
FROM build-deps AS build-deps-with-cargo
|
||||
|
||||
ENV HOME=/home/nonroot
|
||||
ENV PATH="/home/nonroot/.cargo/bin:$PATH"
|
||||
@@ -1082,13 +1075,29 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
|
||||
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
|
||||
rm rustup-init
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "pg-build with Rust toolchain installed"
|
||||
# This layer is base and common for layers with `pgrx`
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build AS pg-build-with-cargo
|
||||
ARG PG_VERSION
|
||||
|
||||
ENV HOME=/home/nonroot
|
||||
ENV PATH="/home/nonroot/.cargo/bin:$PATH"
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
COPY --from=build-deps-with-cargo /home/nonroot /home/nonroot
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
# Layer "rust extensions"
|
||||
# This layer is used to build `pgrx` deps
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build-nonroot-with-cargo AS rust-extensions-build
|
||||
FROM pg-build-with-cargo AS rust-extensions-build
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN case "${PG_VERSION:?}" in \
|
||||
@@ -1110,7 +1119,7 @@ USER root
|
||||
# and eventually get merged with `rust-extensions-build`
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx12
|
||||
FROM pg-build-with-cargo AS rust-extensions-build-pgrx12
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN cargo install --locked --version 0.12.9 cargo-pgrx && \
|
||||
@@ -1127,7 +1136,7 @@ USER root
|
||||
# and eventually get merged with `rust-extensions-build`
|
||||
#
|
||||
#########################################################################################
|
||||
FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx14
|
||||
FROM pg-build-with-cargo AS rust-extensions-build-pgrx14
|
||||
ARG PG_VERSION
|
||||
|
||||
RUN cargo install --locked --version 0.14.1 cargo-pgrx && \
|
||||
@@ -1144,10 +1153,12 @@ USER root
|
||||
|
||||
FROM build-deps AS pgrag-src
|
||||
ARG PG_VERSION
|
||||
|
||||
WORKDIR /ext-src
|
||||
COPY compute/patches/onnxruntime.patch .
|
||||
|
||||
RUN wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \
|
||||
mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
|
||||
patch -p1 < /ext-src/onnxruntime.patch && \
|
||||
echo "#nothing to test here" > neon-test.sh
|
||||
|
||||
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.1.2.tar.gz -O pgrag.tar.gz && \
|
||||
@@ -1621,18 +1632,7 @@ FROM pg-build AS neon-ext-build
|
||||
ARG PG_VERSION
|
||||
|
||||
COPY pgxn/ pgxn/
|
||||
RUN make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon \
|
||||
-s install && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon_utils \
|
||||
-s install && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon_test_utils \
|
||||
-s install && \
|
||||
make -j $(getconf _NPROCESSORS_ONLN) \
|
||||
-C pgxn/neon_rmgr \
|
||||
-s install
|
||||
RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute
|
||||
|
||||
#########################################################################################
|
||||
#
|
||||
@@ -1722,7 +1722,7 @@ FROM extensions-${EXTENSIONS} AS neon-pg-ext-build
|
||||
# Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
|
||||
FROM build-deps-with-cargo AS compute-tools
|
||||
ARG BUILD_TAG
|
||||
ENV BUILD_TAG=$BUILD_TAG
|
||||
|
||||
@@ -1732,7 +1732,7 @@ COPY --chown=nonroot . .
|
||||
RUN --mount=type=cache,uid=1000,target=/home/nonroot/.cargo/registry \
|
||||
--mount=type=cache,uid=1000,target=/home/nonroot/.cargo/git \
|
||||
--mount=type=cache,uid=1000,target=/home/nonroot/target \
|
||||
mold -run cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy && \
|
||||
cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy && \
|
||||
mkdir target-bin && \
|
||||
cp target/release-line-debug-size-lto/compute_ctl \
|
||||
target/release-line-debug-size-lto/fast_import \
|
||||
@@ -1826,10 +1826,11 @@ RUN rm /usr/local/pgsql/lib/lib*.a
|
||||
# Preprocess the sql_exporter configuration files
|
||||
#
|
||||
#########################################################################################
|
||||
FROM $REPOSITORY/$IMAGE:$TAG AS sql_exporter_preprocessor
|
||||
FROM build-deps AS sql_exporter_preprocessor
|
||||
ARG PG_VERSION
|
||||
|
||||
USER nonroot
|
||||
WORKDIR /home/nonroot
|
||||
|
||||
COPY --chown=nonroot compute compute
|
||||
|
||||
|
||||
@@ -21,6 +21,8 @@ unix_socket_dir=/tmp/
|
||||
unix_socket_mode=0777
|
||||
; required for pgbouncer_exporter
|
||||
ignore_startup_parameters=extra_float_digits
|
||||
; pidfile for graceful termination
|
||||
pidfile=/tmp/pgbouncer.pid
|
||||
|
||||
;; Disable connection logging. It produces a lot of logs that no one looks at,
|
||||
;; and we can get similar log entries from the proxy too. We had incidents in
|
||||
|
||||
209
compute/manifest.schema.json
Normal file
209
compute/manifest.schema.json
Normal file
@@ -0,0 +1,209 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "Neon Compute Manifest Schema",
|
||||
"description": "Schema for Neon compute node configuration manifest",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"pg_settings": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"common": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"client_connection_check_interval": {
|
||||
"type": "string",
|
||||
"description": "Check for client disconnection interval in milliseconds"
|
||||
},
|
||||
"effective_io_concurrency": {
|
||||
"type": "string",
|
||||
"description": "Effective IO concurrency setting"
|
||||
},
|
||||
"fsync": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to force fsync to disk"
|
||||
},
|
||||
"hot_standby": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether hot standby is enabled"
|
||||
},
|
||||
"idle_in_transaction_session_timeout": {
|
||||
"type": "string",
|
||||
"description": "Timeout for idle transactions in milliseconds"
|
||||
},
|
||||
"listen_addresses": {
|
||||
"type": "string",
|
||||
"description": "Addresses to listen on"
|
||||
},
|
||||
"log_connections": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to log connections"
|
||||
},
|
||||
"log_disconnections": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to log disconnections"
|
||||
},
|
||||
"log_temp_files": {
|
||||
"type": "string",
|
||||
"description": "Size threshold for logging temporary files in KB"
|
||||
},
|
||||
"log_error_verbosity": {
|
||||
"type": "string",
|
||||
"enum": ["terse", "verbose", "default"],
|
||||
"description": "Error logging verbosity level"
|
||||
},
|
||||
"log_min_error_statement": {
|
||||
"type": "string",
|
||||
"description": "Minimum error level for statement logging"
|
||||
},
|
||||
"maintenance_io_concurrency": {
|
||||
"type": "string",
|
||||
"description": "Maintenance IO concurrency setting"
|
||||
},
|
||||
"max_connections": {
|
||||
"type": "string",
|
||||
"description": "Maximum number of connections"
|
||||
},
|
||||
"max_replication_flush_lag": {
|
||||
"type": "string",
|
||||
"description": "Maximum replication flush lag"
|
||||
},
|
||||
"max_replication_slots": {
|
||||
"type": "string",
|
||||
"description": "Maximum number of replication slots"
|
||||
},
|
||||
"max_replication_write_lag": {
|
||||
"type": "string",
|
||||
"description": "Maximum replication write lag"
|
||||
},
|
||||
"max_wal_senders": {
|
||||
"type": "string",
|
||||
"description": "Maximum number of WAL senders"
|
||||
},
|
||||
"max_wal_size": {
|
||||
"type": "string",
|
||||
"description": "Maximum WAL size"
|
||||
},
|
||||
"neon.unstable_extensions": {
|
||||
"type": "string",
|
||||
"description": "List of unstable extensions"
|
||||
},
|
||||
"neon.protocol_version": {
|
||||
"type": "string",
|
||||
"description": "Neon protocol version"
|
||||
},
|
||||
"password_encryption": {
|
||||
"type": "string",
|
||||
"description": "Password encryption method"
|
||||
},
|
||||
"restart_after_crash": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to restart after crash"
|
||||
},
|
||||
"superuser_reserved_connections": {
|
||||
"type": "string",
|
||||
"description": "Number of reserved connections for superuser"
|
||||
},
|
||||
"synchronous_standby_names": {
|
||||
"type": "string",
|
||||
"description": "Names of synchronous standby servers"
|
||||
},
|
||||
"wal_keep_size": {
|
||||
"type": "string",
|
||||
"description": "WAL keep size"
|
||||
},
|
||||
"wal_level": {
|
||||
"type": "string",
|
||||
"description": "WAL level"
|
||||
},
|
||||
"wal_log_hints": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to log hints in WAL"
|
||||
},
|
||||
"wal_sender_timeout": {
|
||||
"type": "string",
|
||||
"description": "WAL sender timeout in milliseconds"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"client_connection_check_interval",
|
||||
"effective_io_concurrency",
|
||||
"fsync",
|
||||
"hot_standby",
|
||||
"idle_in_transaction_session_timeout",
|
||||
"listen_addresses",
|
||||
"log_connections",
|
||||
"log_disconnections",
|
||||
"log_temp_files",
|
||||
"log_error_verbosity",
|
||||
"log_min_error_statement",
|
||||
"maintenance_io_concurrency",
|
||||
"max_connections",
|
||||
"max_replication_flush_lag",
|
||||
"max_replication_slots",
|
||||
"max_replication_write_lag",
|
||||
"max_wal_senders",
|
||||
"max_wal_size",
|
||||
"neon.unstable_extensions",
|
||||
"neon.protocol_version",
|
||||
"password_encryption",
|
||||
"restart_after_crash",
|
||||
"superuser_reserved_connections",
|
||||
"synchronous_standby_names",
|
||||
"wal_keep_size",
|
||||
"wal_level",
|
||||
"wal_log_hints",
|
||||
"wal_sender_timeout"
|
||||
]
|
||||
},
|
||||
"replica": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hot_standby": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether hot standby is enabled for replicas"
|
||||
}
|
||||
},
|
||||
"required": ["hot_standby"]
|
||||
},
|
||||
"per_version": {
|
||||
"type": "object",
|
||||
"patternProperties": {
|
||||
"^1[4-7]$": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"common": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"io_combine_limit": {
|
||||
"type": "string",
|
||||
"description": "IO combine limit"
|
||||
}
|
||||
}
|
||||
},
|
||||
"replica": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"recovery_prefetch": {
|
||||
"type": "string",
|
||||
"enum": ["on", "off"],
|
||||
"description": "Whether to enable recovery prefetch for PostgreSQL replicas"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": ["common", "replica", "per_version"]
|
||||
}
|
||||
},
|
||||
"required": ["pg_settings"]
|
||||
}
|
||||
@@ -105,17 +105,17 @@ pg_settings:
|
||||
# Neon hot standby ignores pages that are not in the shared_buffers
|
||||
recovery_prefetch: "off"
|
||||
16:
|
||||
common:
|
||||
common: {}
|
||||
replica:
|
||||
# prefetching of blocks referenced in WAL doesn't make sense for us
|
||||
# Neon hot standby ignores pages that are not in the shared_buffers
|
||||
recovery_prefetch: "off"
|
||||
15:
|
||||
common:
|
||||
common: {}
|
||||
replica:
|
||||
# prefetching of blocks referenced in WAL doesn't make sense for us
|
||||
# Neon hot standby ignores pages that are not in the shared_buffers
|
||||
recovery_prefetch: "off"
|
||||
14:
|
||||
common:
|
||||
replica:
|
||||
common: {}
|
||||
replica: {}
|
||||
|
||||
37
compute/package-lock.json
generated
Normal file
37
compute/package-lock.json
generated
Normal file
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"name": "neon-compute",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "neon-compute",
|
||||
"dependencies": {
|
||||
"@sourcemeta/jsonschema": "9.3.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@sourcemeta/jsonschema": {
|
||||
"version": "9.3.4",
|
||||
"resolved": "https://registry.npmjs.org/@sourcemeta/jsonschema/-/jsonschema-9.3.4.tgz",
|
||||
"integrity": "sha512-hkujfkZAIGXUs4U//We9faZW8LZ4/H9LqagRYsFSulH/VLcKPNhZyCTGg7AhORuzm27zqENvKpnX4g2FzudYFw==",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
],
|
||||
"license": "AGPL-3.0",
|
||||
"os": [
|
||||
"darwin",
|
||||
"linux",
|
||||
"win32"
|
||||
],
|
||||
"bin": {
|
||||
"jsonschema": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sourcemeta"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
7
compute/package.json
Normal file
7
compute/package.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"name": "neon-compute",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@sourcemeta/jsonschema": "9.3.4"
|
||||
}
|
||||
}
|
||||
15
compute/patches/onnxruntime.patch
Normal file
15
compute/patches/onnxruntime.patch
Normal file
@@ -0,0 +1,15 @@
|
||||
diff --git a/cmake/deps.txt b/cmake/deps.txt
|
||||
index d213b09034..229de2ebf0 100644
|
||||
--- a/cmake/deps.txt
|
||||
+++ b/cmake/deps.txt
|
||||
@@ -22,7 +22,9 @@ dlpack;https://github.com/dmlc/dlpack/archive/refs/tags/v0.6.zip;4d565dd2e5b3132
|
||||
# it contains changes on top of 3.4.0 which are required to fix build issues.
|
||||
# Until the 3.4.1 release this is the best option we have.
|
||||
# Issue link: https://gitlab.com/libeigen/eigen/-/issues/2744
|
||||
-eigen;https://gitlab.com/libeigen/eigen/-/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;be8be39fdbc6e60e94fa7870b280707069b5b81a
|
||||
+# Moved to github mirror to avoid gitlab issues.Add commentMore actions
|
||||
+# Issue link: https://github.com/bazelbuild/bazel-central-registry/issues/4355
|
||||
+eigen;https://github.com/eigen-mirror/eigen/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;61418a349000ba7744a3ad03cf5071f22ebf860a
|
||||
flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v23.5.26.zip;59422c3b5e573dd192fead2834d25951f1c1670c
|
||||
fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494
|
||||
fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1
|
||||
@@ -124,6 +124,10 @@ struct Cli {
|
||||
/// Interval in seconds for collecting installed extensions statistics
|
||||
#[arg(long, default_value = "3600")]
|
||||
pub installed_extensions_collection_interval: u64,
|
||||
|
||||
/// Run in development mode, skipping VM-specific operations like process termination
|
||||
#[arg(long, action = clap::ArgAction::SetTrue)]
|
||||
pub dev: bool,
|
||||
}
|
||||
|
||||
impl Cli {
|
||||
@@ -159,7 +163,7 @@ fn main() -> Result<()> {
|
||||
.build()?;
|
||||
let _rt_guard = runtime.enter();
|
||||
|
||||
runtime.block_on(init())?;
|
||||
runtime.block_on(init(cli.dev))?;
|
||||
|
||||
// enable core dumping for all child processes
|
||||
setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
|
||||
@@ -198,13 +202,13 @@ fn main() -> Result<()> {
|
||||
deinit_and_exit(exit_code);
|
||||
}
|
||||
|
||||
async fn init() -> Result<()> {
|
||||
async fn init(dev_mode: bool) -> Result<()> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL).await?;
|
||||
|
||||
let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
|
||||
thread::spawn(move || {
|
||||
for sig in signals.forever() {
|
||||
handle_exit_signal(sig);
|
||||
handle_exit_signal(sig, dev_mode);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -263,9 +267,9 @@ fn deinit_and_exit(exit_code: Option<i32>) -> ! {
|
||||
/// When compute_ctl is killed, send also termination signal to sync-safekeepers
|
||||
/// to prevent leakage. TODO: it is better to convert compute_ctl to async and
|
||||
/// wait for termination which would be easy then.
|
||||
fn handle_exit_signal(sig: i32) {
|
||||
fn handle_exit_signal(sig: i32, dev_mode: bool) {
|
||||
info!("received {sig} termination signal");
|
||||
forward_termination_signal();
|
||||
forward_termination_signal(dev_mode);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ use url::Url;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::measured_stream::MeasuredReader;
|
||||
use utils::pid_file;
|
||||
|
||||
use crate::configurator::launch_configurator;
|
||||
use crate::disk_quota::set_disk_quota;
|
||||
@@ -44,6 +45,7 @@ use crate::lsn_lease::launch_lsn_lease_bg_task_for_static;
|
||||
use crate::metrics::COMPUTE_CTL_UP;
|
||||
use crate::monitor::launch_monitor;
|
||||
use crate::pg_helpers::*;
|
||||
use crate::pgbouncer::*;
|
||||
use crate::rsyslog::{
|
||||
PostgresLogsRsyslogConfig, configure_audit_rsyslog, configure_postgres_logs_export,
|
||||
launch_pgaudit_gc,
|
||||
@@ -161,6 +163,10 @@ pub struct ComputeState {
|
||||
pub lfc_prewarm_state: LfcPrewarmState,
|
||||
pub lfc_offload_state: LfcOffloadState,
|
||||
|
||||
/// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if
|
||||
/// mode == ComputeMode::Primary. None otherwise
|
||||
pub terminate_flush_lsn: Option<Lsn>,
|
||||
|
||||
pub metrics: ComputeMetrics,
|
||||
}
|
||||
|
||||
@@ -176,6 +182,7 @@ impl ComputeState {
|
||||
metrics: ComputeMetrics::default(),
|
||||
lfc_prewarm_state: LfcPrewarmState::default(),
|
||||
lfc_offload_state: LfcOffloadState::default(),
|
||||
terminate_flush_lsn: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -215,6 +222,46 @@ pub struct ParsedSpec {
|
||||
pub endpoint_storage_token: Option<String>,
|
||||
}
|
||||
|
||||
impl ParsedSpec {
|
||||
pub fn validate(&self) -> Result<(), String> {
|
||||
// Only Primary nodes are using safekeeper_connstrings, and at the moment
|
||||
// this method only validates that part of the specs.
|
||||
if self.spec.mode != ComputeMode::Primary {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// While it seems like a good idea to check for an odd number of entries in
|
||||
// the safekeepers connection string, changes to the list of safekeepers might
|
||||
// incur appending a new server to a list of 3, in which case a list of 4
|
||||
// entries is okay in production.
|
||||
//
|
||||
// Still we want unique entries, and at least one entry in the vector
|
||||
if self.safekeeper_connstrings.is_empty() {
|
||||
return Err(String::from("safekeeper_connstrings is empty"));
|
||||
}
|
||||
|
||||
// check for uniqueness of the connection strings in the set
|
||||
let mut connstrings = self.safekeeper_connstrings.clone();
|
||||
|
||||
connstrings.sort();
|
||||
let mut previous = &connstrings[0];
|
||||
|
||||
for current in connstrings.iter().skip(1) {
|
||||
// duplicate entry?
|
||||
if current == previous {
|
||||
return Err(format!(
|
||||
"duplicate entry in safekeeper_connstrings: {}!",
|
||||
current,
|
||||
));
|
||||
}
|
||||
|
||||
previous = current;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
type Error = String;
|
||||
fn try_from(spec: ComputeSpec) -> Result<Self, String> {
|
||||
@@ -244,6 +291,7 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
} else {
|
||||
spec.safekeeper_connstrings.clone()
|
||||
};
|
||||
|
||||
let storage_auth_token = spec.storage_auth_token.clone();
|
||||
let tenant_id: TenantId = if let Some(tenant_id) = spec.tenant_id {
|
||||
tenant_id
|
||||
@@ -278,7 +326,7 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
.clone()
|
||||
.or_else(|| spec.cluster.settings.find("neon.endpoint_storage_token"));
|
||||
|
||||
Ok(ParsedSpec {
|
||||
let res = ParsedSpec {
|
||||
spec,
|
||||
pageserver_connstr,
|
||||
safekeeper_connstrings,
|
||||
@@ -287,7 +335,11 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
|
||||
timeline_id,
|
||||
endpoint_storage_addr,
|
||||
endpoint_storage_token,
|
||||
})
|
||||
};
|
||||
|
||||
// Now check validity of the parsed specification
|
||||
res.validate()?;
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -354,14 +406,11 @@ impl ComputeNode {
|
||||
// that can affect `compute_ctl` and prevent it from properly configuring the database schema.
|
||||
// Unset them via connection string options before connecting to the database.
|
||||
// N.B. keep it in sync with `ZENITH_OPTIONS` in `get_maintenance_client()`.
|
||||
//
|
||||
// TODO(ololobus): we currently pass `-c default_transaction_read_only=off` from control plane
|
||||
// as well. After rolling out this code, we can remove this parameter from control plane.
|
||||
// In the meantime, double-passing is fine, the last value is applied.
|
||||
// See: <https://github.com/neondatabase/cloud/blob/133dd8c4dbbba40edfbad475bf6a45073ca63faf/goapp/controlplane/internal/pkg/compute/provisioner/provisioner_common.go#L70>
|
||||
const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
|
||||
let options = match conn_conf.get_options() {
|
||||
Some(options) => format!("{} {}", options, EXTRA_OPTIONS),
|
||||
// Allow the control plane to override any options set by the
|
||||
// compute
|
||||
Some(options) => format!("{} {}", EXTRA_OPTIONS, options),
|
||||
None => EXTRA_OPTIONS.to_string(),
|
||||
};
|
||||
conn_conf.options(&options);
|
||||
@@ -489,12 +538,21 @@ impl ComputeNode {
|
||||
// Reap the postgres process
|
||||
delay_exit |= this.cleanup_after_postgres_exit()?;
|
||||
|
||||
// /terminate returns LSN. If we don't sleep at all, connection will break and we
|
||||
// won't get result. If we sleep too much, tests will take significantly longer
|
||||
// and Github Action run will error out
|
||||
let sleep_duration = if delay_exit {
|
||||
Duration::from_secs(30)
|
||||
} else {
|
||||
Duration::from_millis(300)
|
||||
};
|
||||
|
||||
// If launch failed, keep serving HTTP requests for a while, so the cloud
|
||||
// control plane can get the actual error.
|
||||
if delay_exit {
|
||||
info!("giving control plane 30s to collect the error before shutdown");
|
||||
std::thread::sleep(Duration::from_secs(30));
|
||||
}
|
||||
std::thread::sleep(sleep_duration);
|
||||
Ok(exit_code)
|
||||
}
|
||||
|
||||
@@ -866,20 +924,25 @@ impl ComputeNode {
|
||||
// Maybe sync safekeepers again, to speed up next startup
|
||||
let compute_state = self.state.lock().unwrap().clone();
|
||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
|
||||
let lsn = if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
|
||||
info!("syncing safekeepers on shutdown");
|
||||
let storage_auth_token = pspec.storage_auth_token.clone();
|
||||
let lsn = self.sync_safekeepers(storage_auth_token)?;
|
||||
info!("synced safekeepers at lsn {lsn}");
|
||||
}
|
||||
info!(%lsn, "synced safekeepers");
|
||||
Some(lsn)
|
||||
} else {
|
||||
info!("not primary, not syncing safekeepers");
|
||||
None
|
||||
};
|
||||
|
||||
let mut delay_exit = false;
|
||||
let mut state = self.state.lock().unwrap();
|
||||
if state.status == ComputeStatus::TerminationPending {
|
||||
state.terminate_flush_lsn = lsn;
|
||||
if let ComputeStatus::TerminationPending { mode } = state.status {
|
||||
state.status = ComputeStatus::Terminated;
|
||||
self.state_changed.notify_all();
|
||||
// we were asked to terminate gracefully, don't exit to avoid restart
|
||||
delay_exit = true
|
||||
delay_exit = mode == compute_api::responses::TerminateMode::Fast
|
||||
}
|
||||
drop(state);
|
||||
|
||||
@@ -1750,7 +1813,7 @@ impl ComputeNode {
|
||||
|
||||
// exit loop
|
||||
ComputeStatus::Failed
|
||||
| ComputeStatus::TerminationPending
|
||||
| ComputeStatus::TerminationPending { .. }
|
||||
| ComputeStatus::Terminated => break 'cert_update,
|
||||
|
||||
// wait
|
||||
@@ -2251,12 +2314,68 @@ pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn forward_termination_signal() {
|
||||
pub fn forward_termination_signal(dev_mode: bool) {
|
||||
let ss_pid = SYNC_SAFEKEEPERS_PID.load(Ordering::SeqCst);
|
||||
if ss_pid != 0 {
|
||||
let ss_pid = nix::unistd::Pid::from_raw(ss_pid as i32);
|
||||
kill(ss_pid, Signal::SIGTERM).ok();
|
||||
}
|
||||
|
||||
if !dev_mode {
|
||||
// Terminate pgbouncer with SIGKILL
|
||||
match pid_file::read(PGBOUNCER_PIDFILE.into()) {
|
||||
Ok(pid_file::PidFileRead::LockedByOtherProcess(pid)) => {
|
||||
info!("sending SIGKILL to pgbouncer process pid: {}", pid);
|
||||
if let Err(e) = kill(pid, Signal::SIGKILL) {
|
||||
error!("failed to terminate pgbouncer: {}", e);
|
||||
}
|
||||
}
|
||||
// pgbouncer does not lock the pid file, so we read and kill the process directly
|
||||
Ok(pid_file::PidFileRead::NotHeldByAnyProcess(_)) => {
|
||||
if let Ok(pid_str) = std::fs::read_to_string(PGBOUNCER_PIDFILE) {
|
||||
if let Ok(pid) = pid_str.trim().parse::<i32>() {
|
||||
info!(
|
||||
"sending SIGKILL to pgbouncer process pid: {} (from unlocked pid file)",
|
||||
pid
|
||||
);
|
||||
if let Err(e) = kill(Pid::from_raw(pid), Signal::SIGKILL) {
|
||||
error!("failed to terminate pgbouncer: {}", e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
info!("pgbouncer pid file exists but process not running");
|
||||
}
|
||||
}
|
||||
Ok(pid_file::PidFileRead::NotExist) => {
|
||||
info!("pgbouncer pid file not found, process may not be running");
|
||||
}
|
||||
Err(e) => {
|
||||
error!("error reading pgbouncer pid file: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Terminate local_proxy
|
||||
match pid_file::read("/etc/local_proxy/pid".into()) {
|
||||
Ok(pid_file::PidFileRead::LockedByOtherProcess(pid)) => {
|
||||
info!("sending SIGTERM to local_proxy process pid: {}", pid);
|
||||
if let Err(e) = kill(pid, Signal::SIGTERM) {
|
||||
error!("failed to terminate local_proxy: {}", e);
|
||||
}
|
||||
}
|
||||
Ok(pid_file::PidFileRead::NotHeldByAnyProcess(_)) => {
|
||||
info!("local_proxy PID file exists but process not running");
|
||||
}
|
||||
Ok(pid_file::PidFileRead::NotExist) => {
|
||||
info!("local_proxy PID file not found, process may not be running");
|
||||
}
|
||||
Err(e) => {
|
||||
error!("error reading local_proxy PID file: {}", e);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
info!("Skipping pgbouncer and local_proxy termination because in dev mode");
|
||||
}
|
||||
|
||||
let pg_pid = PG_PID.load(Ordering::SeqCst);
|
||||
if pg_pid != 0 {
|
||||
let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
|
||||
@@ -2289,3 +2408,21 @@ impl<T: 'static> JoinSetExt<T> for tokio::task::JoinSet<T> {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::File;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn duplicate_safekeeper_connstring() {
|
||||
let file = File::open("tests/cluster_spec.json").unwrap();
|
||||
let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
|
||||
|
||||
match ParsedSpec::try_from(spec.clone()) {
|
||||
Ok(_p) => panic!("Failed to detect duplicate entry"),
|
||||
Err(e) => assert!(e.starts_with("duplicate entry in safekeeper_connstrings:")),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,32 +1,42 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::compute::{ComputeNode, forward_termination_signal};
|
||||
use crate::http::JsonResponse;
|
||||
use axum::extract::State;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use compute_api::responses::ComputeStatus;
|
||||
use axum::response::Response;
|
||||
use axum_extra::extract::OptionalQuery;
|
||||
use compute_api::responses::{ComputeStatus, TerminateResponse};
|
||||
use http::StatusCode;
|
||||
use serde::Deserialize;
|
||||
use std::sync::Arc;
|
||||
use tokio::task;
|
||||
use tracing::info;
|
||||
|
||||
use crate::compute::{ComputeNode, forward_termination_signal};
|
||||
use crate::http::JsonResponse;
|
||||
#[derive(Deserialize, Default)]
|
||||
pub struct TerminateQuery {
|
||||
mode: compute_api::responses::TerminateMode,
|
||||
}
|
||||
|
||||
/// Terminate the compute.
|
||||
pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
|
||||
pub(in crate::http) async fn terminate(
|
||||
State(compute): State<Arc<ComputeNode>>,
|
||||
OptionalQuery(terminate): OptionalQuery<TerminateQuery>,
|
||||
) -> Response {
|
||||
let mode = terminate.unwrap_or_default().mode;
|
||||
{
|
||||
let mut state = compute.state.lock().unwrap();
|
||||
if state.status == ComputeStatus::Terminated {
|
||||
return StatusCode::CREATED.into_response();
|
||||
return JsonResponse::success(StatusCode::CREATED, state.terminate_flush_lsn);
|
||||
}
|
||||
|
||||
if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
|
||||
return JsonResponse::invalid_status(state.status);
|
||||
}
|
||||
|
||||
state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
|
||||
drop(state);
|
||||
state.set_status(
|
||||
ComputeStatus::TerminationPending { mode },
|
||||
&compute.state_changed,
|
||||
);
|
||||
}
|
||||
|
||||
forward_termination_signal();
|
||||
forward_termination_signal(false);
|
||||
info!("sent signal and notified waiters");
|
||||
|
||||
// Spawn a blocking thread to wait for compute to become Terminated.
|
||||
@@ -34,7 +44,7 @@ pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>)
|
||||
// be able to serve other requests while some particular request
|
||||
// is waiting for compute to finish configuration.
|
||||
let c = compute.clone();
|
||||
task::spawn_blocking(move || {
|
||||
let lsn = task::spawn_blocking(move || {
|
||||
let mut state = c.state.lock().unwrap();
|
||||
while state.status != ComputeStatus::Terminated {
|
||||
state = c.state_changed.wait(state).unwrap();
|
||||
@@ -44,11 +54,10 @@ pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>)
|
||||
state.status
|
||||
);
|
||||
}
|
||||
state.terminate_flush_lsn
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
info!("terminated Postgres");
|
||||
|
||||
StatusCode::OK.into_response()
|
||||
JsonResponse::success(StatusCode::OK, TerminateResponse { lsn })
|
||||
}
|
||||
|
||||
@@ -22,6 +22,7 @@ mod migration;
|
||||
pub mod monitor;
|
||||
pub mod params;
|
||||
pub mod pg_helpers;
|
||||
pub mod pgbouncer;
|
||||
pub mod rsyslog;
|
||||
pub mod spec;
|
||||
mod spec_apply;
|
||||
|
||||
@@ -83,7 +83,9 @@ impl ComputeMonitor {
|
||||
let compute_status = self.compute.get_status();
|
||||
if matches!(
|
||||
compute_status,
|
||||
ComputeStatus::Terminated | ComputeStatus::TerminationPending | ComputeStatus::Failed
|
||||
ComputeStatus::Terminated
|
||||
| ComputeStatus::TerminationPending { .. }
|
||||
| ComputeStatus::Failed
|
||||
) {
|
||||
info!(
|
||||
"compute is in {} status, stopping compute monitor",
|
||||
|
||||
1
compute_tools/src/pgbouncer.rs
Normal file
1
compute_tools/src/pgbouncer.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub const PGBOUNCER_PIDFILE: &str = "/tmp/pgbouncer.pid";
|
||||
6
compute_tools/tests/README.md
Normal file
6
compute_tools/tests/README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
### Test files
|
||||
|
||||
The file `cluster_spec.json` has been copied over from libs/compute_api
|
||||
tests, with some edits:
|
||||
|
||||
- the neon.safekeepers setting contains a duplicate value
|
||||
245
compute_tools/tests/cluster_spec.json
Normal file
245
compute_tools/tests/cluster_spec.json
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"format_version": 1.0,
|
||||
|
||||
"timestamp": "2021-05-23T18:25:43.511Z",
|
||||
"operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
|
||||
|
||||
"cluster": {
|
||||
"cluster_id": "test-cluster-42",
|
||||
"name": "Zenith Test",
|
||||
"state": "restarted",
|
||||
"roles": [
|
||||
{
|
||||
"name": "postgres",
|
||||
"encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "alexk",
|
||||
"encrypted_password": null,
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "zenith \"new\"",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
|
||||
"options": null
|
||||
},
|
||||
{
|
||||
"name": "zen",
|
||||
"encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
|
||||
},
|
||||
{
|
||||
"name": "\"name\";\\n select 1;",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
|
||||
},
|
||||
{
|
||||
"name": "MyRole",
|
||||
"encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
|
||||
}
|
||||
],
|
||||
"databases": [
|
||||
{
|
||||
"name": "DB2",
|
||||
"owner": "alexk",
|
||||
"options": [
|
||||
{
|
||||
"name": "LC_COLLATE",
|
||||
"value": "C",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "LC_CTYPE",
|
||||
"value": "C",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "TEMPLATE",
|
||||
"value": "template0",
|
||||
"vartype": "enum"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "zenith",
|
||||
"owner": "MyRole"
|
||||
},
|
||||
{
|
||||
"name": "zen",
|
||||
"owner": "zen"
|
||||
}
|
||||
],
|
||||
"settings": [
|
||||
{
|
||||
"name": "fsync",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "wal_level",
|
||||
"value": "logical",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "hot_standby",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "prewarm_lfc_on_startup",
|
||||
"value": "off",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "neon.safekeepers",
|
||||
"value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501,127.0.0.1:6502",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_log_hints",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "log_connections",
|
||||
"value": "on",
|
||||
"vartype": "bool"
|
||||
},
|
||||
{
|
||||
"name": "shared_buffers",
|
||||
"value": "32768",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "port",
|
||||
"value": "55432",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_connections",
|
||||
"value": "100",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_wal_senders",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "listen_addresses",
|
||||
"value": "0.0.0.0",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "wal_sender_timeout",
|
||||
"value": "0",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "password_encryption",
|
||||
"value": "md5",
|
||||
"vartype": "enum"
|
||||
},
|
||||
{
|
||||
"name": "maintenance_work_mem",
|
||||
"value": "65536",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_parallel_workers",
|
||||
"value": "8",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "max_worker_processes",
|
||||
"value": "8",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "neon.tenant_id",
|
||||
"value": "b0554b632bd4d547a63b86c3630317e8",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "max_replication_slots",
|
||||
"value": "10",
|
||||
"vartype": "integer"
|
||||
},
|
||||
{
|
||||
"name": "neon.timeline_id",
|
||||
"value": "2414a61ffc94e428f14b5758fe308e13",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "shared_preload_libraries",
|
||||
"value": "neon",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "synchronous_standby_names",
|
||||
"value": "walproposer",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "neon.pageserver_connstring",
|
||||
"value": "host=127.0.0.1 port=6400",
|
||||
"vartype": "string"
|
||||
},
|
||||
{
|
||||
"name": "test.escaping",
|
||||
"value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
|
||||
"vartype": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"delta_operations": [
|
||||
{
|
||||
"action": "delete_db",
|
||||
"name": "zenith_test"
|
||||
},
|
||||
{
|
||||
"action": "rename_db",
|
||||
"name": "DB",
|
||||
"new_name": "DB2"
|
||||
},
|
||||
{
|
||||
"action": "delete_role",
|
||||
"name": "zenith2"
|
||||
},
|
||||
{
|
||||
"action": "rename_role",
|
||||
"name": "zenith new",
|
||||
"new_name": "zenith \"new\""
|
||||
}
|
||||
],
|
||||
"remote_extensions": {
|
||||
"library_index": {
|
||||
"postgis-3": "postgis",
|
||||
"libpgrouting-3.4": "postgis",
|
||||
"postgis_raster-3": "postgis",
|
||||
"postgis_sfcgal-3": "postgis",
|
||||
"postgis_topology-3": "postgis",
|
||||
"address_standardizer-3": "postgis"
|
||||
},
|
||||
"extension_data": {
|
||||
"postgis": {
|
||||
"archive_path": "5834329303/v15/extensions/postgis.tar.zst",
|
||||
"control_data": {
|
||||
"postgis.control": "# postgis extension\ncomment = ''PostGIS geometry and geography spatial types and functions''\ndefault_version = ''3.3.2''\nmodule_pathname = ''$libdir/postgis-3''\nrelocatable = false\ntrusted = true\n",
|
||||
"pgrouting.control": "# pgRouting Extension\ncomment = ''pgRouting Extension''\ndefault_version = ''3.4.2''\nmodule_pathname = ''$libdir/libpgrouting-3.4''\nrelocatable = true\nrequires = ''plpgsql''\nrequires = ''postgis''\ntrusted = true\n",
|
||||
"postgis_raster.control": "# postgis_raster extension\ncomment = ''PostGIS raster types and functions''\ndefault_version = ''3.3.2''\nmodule_pathname = ''$libdir/postgis_raster-3''\nrelocatable = false\nrequires = postgis\ntrusted = true\n",
|
||||
"postgis_sfcgal.control": "# postgis topology extension\ncomment = ''PostGIS SFCGAL functions''\ndefault_version = ''3.3.2''\nrelocatable = true\nrequires = postgis\ntrusted = true\n",
|
||||
"postgis_topology.control": "# postgis topology extension\ncomment = ''PostGIS topology spatial types and functions''\ndefault_version = ''3.3.2''\nrelocatable = false\nschema = topology\nrequires = postgis\ntrusted = true\n",
|
||||
"address_standardizer.control": "# address_standardizer extension\ncomment = ''Used to parse an address into constituent elements. Generally used to support geocoding address normalization step.''\ndefault_version = ''3.3.2''\nrelocatable = true\ntrusted = true\n",
|
||||
"postgis_tiger_geocoder.control": "# postgis tiger geocoder extension\ncomment = ''PostGIS tiger geocoder and reverse geocoder''\ndefault_version = ''3.3.2''\nrelocatable = false\nschema = tiger\nrequires = ''postgis,fuzzystrmatch''\nsuperuser= false\ntrusted = true\n",
|
||||
"address_standardizer_data_us.control": "# address standardizer us dataset\ncomment = ''Address Standardizer US dataset example''\ndefault_version = ''3.3.2''\nrelocatable = true\ntrusted = true\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
"custom_extensions": [],
|
||||
"public_extensions": ["postgis"]
|
||||
},
|
||||
"pgbouncer_settings": {
|
||||
"default_pool_size": "42",
|
||||
"pool_mode": "session"
|
||||
}
|
||||
}
|
||||
@@ -18,7 +18,7 @@ use clap::Parser;
|
||||
use compute_api::requests::ComputeClaimsScope;
|
||||
use compute_api::spec::ComputeMode;
|
||||
use control_plane::broker::StorageBroker;
|
||||
use control_plane::endpoint::ComputeControlPlane;
|
||||
use control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode, PageserverProtocol};
|
||||
use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
|
||||
use control_plane::local_env;
|
||||
use control_plane::local_env::{
|
||||
@@ -605,6 +605,14 @@ struct EndpointCreateCmdArgs {
|
||||
#[clap(long, help = "Postgres version")]
|
||||
pg_version: u32,
|
||||
|
||||
/// Use gRPC to communicate with Pageservers, by generating grpc:// connstrings.
|
||||
///
|
||||
/// Specified on creation such that it's retained across reconfiguration and restarts.
|
||||
///
|
||||
/// NB: not yet supported by computes.
|
||||
#[clap(long)]
|
||||
grpc: bool,
|
||||
|
||||
#[clap(
|
||||
long,
|
||||
help = "If set, the node will be a hot replica on the specified timeline",
|
||||
@@ -664,6 +672,13 @@ struct EndpointStartCmdArgs {
|
||||
#[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
|
||||
#[arg(default_value = "90s")]
|
||||
start_timeout: Duration,
|
||||
|
||||
#[clap(
|
||||
long,
|
||||
help = "Run in development mode, skipping VM-specific operations like process termination",
|
||||
action = clap::ArgAction::SetTrue
|
||||
)]
|
||||
dev: bool,
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
@@ -696,10 +711,9 @@ struct EndpointStopCmdArgs {
|
||||
)]
|
||||
destroy: bool,
|
||||
|
||||
#[clap(long, help = "Postgres shutdown mode, passed to \"pg_ctl -m <mode>\"")]
|
||||
#[arg(value_parser(["smart", "fast", "immediate"]))]
|
||||
#[arg(default_value = "fast")]
|
||||
mode: String,
|
||||
#[clap(long, help = "Postgres shutdown mode")]
|
||||
#[clap(default_value = "fast")]
|
||||
mode: EndpointTerminateMode,
|
||||
}
|
||||
|
||||
#[derive(clap::Args)]
|
||||
@@ -1451,6 +1465,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
args.internal_http_port,
|
||||
args.pg_version,
|
||||
mode,
|
||||
args.grpc,
|
||||
!args.update_catalog,
|
||||
false,
|
||||
)?;
|
||||
@@ -1491,13 +1506,20 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
|
||||
let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
|
||||
let conf = env.get_pageserver_conf(pageserver_id).unwrap();
|
||||
let parsed = parse_host_port(&conf.listen_pg_addr).expect("Bad config");
|
||||
(
|
||||
vec![(parsed.0, parsed.1.unwrap_or(5432))],
|
||||
// If caller is telling us what pageserver to use, this is not a tenant which is
|
||||
// full managed by storage controller, therefore not sharded.
|
||||
DEFAULT_STRIPE_SIZE,
|
||||
)
|
||||
// Use gRPC if requested.
|
||||
let pageserver = if endpoint.grpc {
|
||||
let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
|
||||
let (host, port) = parse_host_port(grpc_addr)?;
|
||||
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
|
||||
(PageserverProtocol::Grpc, host, port)
|
||||
} else {
|
||||
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
||||
let port = port.unwrap_or(5432);
|
||||
(PageserverProtocol::Libpq, host, port)
|
||||
};
|
||||
// If caller is telling us what pageserver to use, this is not a tenant which is
|
||||
// fully managed by storage controller, therefore not sharded.
|
||||
(vec![pageserver], DEFAULT_STRIPE_SIZE)
|
||||
} else {
|
||||
// Look up the currently attached location of the tenant, and its striping metadata,
|
||||
// to pass these on to postgres.
|
||||
@@ -1516,11 +1538,20 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.await?;
|
||||
}
|
||||
|
||||
anyhow::Ok((
|
||||
Host::parse(&shard.listen_pg_addr)
|
||||
.expect("Storage controller reported bad hostname"),
|
||||
shard.listen_pg_port,
|
||||
))
|
||||
let pageserver = if endpoint.grpc {
|
||||
(
|
||||
PageserverProtocol::Grpc,
|
||||
Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))?,
|
||||
shard.listen_grpc_port.expect("no gRPC port"),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
PageserverProtocol::Libpq,
|
||||
Host::parse(&shard.listen_pg_addr)?,
|
||||
shard.listen_pg_port,
|
||||
)
|
||||
};
|
||||
anyhow::Ok(pageserver)
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
@@ -1565,6 +1596,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
stripe_size.0 as usize,
|
||||
args.create_test_user,
|
||||
args.start_timeout,
|
||||
args.dev,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
@@ -1575,11 +1607,19 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.get(endpoint_id.as_str())
|
||||
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||
let pageservers = if let Some(ps_id) = args.endpoint_pageserver_id {
|
||||
let pageserver = PageServerNode::from_env(env, env.get_pageserver_conf(ps_id)?);
|
||||
vec![(
|
||||
pageserver.pg_connection_config.host().clone(),
|
||||
pageserver.pg_connection_config.port(),
|
||||
)]
|
||||
let conf = env.get_pageserver_conf(ps_id)?;
|
||||
// Use gRPC if requested.
|
||||
let pageserver = if endpoint.grpc {
|
||||
let grpc_addr = conf.listen_grpc_addr.as_ref().expect("bad config");
|
||||
let (host, port) = parse_host_port(grpc_addr)?;
|
||||
let port = port.unwrap_or(DEFAULT_PAGESERVER_GRPC_PORT);
|
||||
(PageserverProtocol::Grpc, host, port)
|
||||
} else {
|
||||
let (host, port) = parse_host_port(&conf.listen_pg_addr)?;
|
||||
let port = port.unwrap_or(5432);
|
||||
(PageserverProtocol::Libpq, host, port)
|
||||
};
|
||||
vec![pageserver]
|
||||
} else {
|
||||
let storage_controller = StorageController::from_env(env);
|
||||
storage_controller
|
||||
@@ -1588,11 +1628,21 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.shards
|
||||
.into_iter()
|
||||
.map(|shard| {
|
||||
(
|
||||
Host::parse(&shard.listen_pg_addr)
|
||||
.expect("Storage controller reported malformed host"),
|
||||
shard.listen_pg_port,
|
||||
)
|
||||
// Use gRPC if requested.
|
||||
if endpoint.grpc {
|
||||
(
|
||||
PageserverProtocol::Grpc,
|
||||
Host::parse(&shard.listen_grpc_addr.expect("no gRPC address"))
|
||||
.expect("bad hostname"),
|
||||
shard.listen_grpc_port.expect("no gRPC port"),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
PageserverProtocol::Libpq,
|
||||
Host::parse(&shard.listen_pg_addr).expect("bad hostname"),
|
||||
shard.listen_pg_port,
|
||||
)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
@@ -1607,7 +1657,10 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
|
||||
.endpoints
|
||||
.get(endpoint_id)
|
||||
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
|
||||
endpoint.stop(&args.mode, args.destroy)?;
|
||||
match endpoint.stop(args.mode, args.destroy).await?.lsn {
|
||||
Some(lsn) => println!("{lsn}"),
|
||||
None => println!("null"),
|
||||
}
|
||||
}
|
||||
EndpointCmd::GenerateJwt(args) => {
|
||||
let endpoint = {
|
||||
@@ -2039,11 +2092,16 @@ async fn handle_stop_all(args: &StopCmdArgs, env: &local_env::LocalEnv) -> Resul
|
||||
}
|
||||
|
||||
async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
|
||||
let mode = if immediate {
|
||||
EndpointTerminateMode::Immediate
|
||||
} else {
|
||||
EndpointTerminateMode::Fast
|
||||
};
|
||||
// Stop all endpoints
|
||||
match ComputeControlPlane::load(env.clone()) {
|
||||
Ok(cplane) => {
|
||||
for (_k, node) in cplane.endpoints {
|
||||
if let Err(e) = node.stop(if immediate { "immediate" } else { "fast" }, false) {
|
||||
if let Err(e) = node.stop(mode, false).await {
|
||||
eprintln!("postgres stop failed: {e:#}");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
//! ```
|
||||
//!
|
||||
use std::collections::BTreeMap;
|
||||
use std::fmt::Display;
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
@@ -51,7 +52,8 @@ use compute_api::requests::{
|
||||
COMPUTE_AUDIENCE, ComputeClaims, ComputeClaimsScope, ConfigurationRequest,
|
||||
};
|
||||
use compute_api::responses::{
|
||||
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TlsConfig,
|
||||
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TerminateResponse,
|
||||
TlsConfig,
|
||||
};
|
||||
use compute_api::spec::{
|
||||
Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
|
||||
@@ -76,7 +78,6 @@ use utils::id::{NodeId, TenantId, TimelineId};
|
||||
|
||||
use crate::local_env::LocalEnv;
|
||||
use crate::postgresql_conf::PostgresConf;
|
||||
use crate::storage_controller::StorageController;
|
||||
|
||||
// contents of a endpoint.json file
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
|
||||
@@ -89,6 +90,7 @@ pub struct EndpointConf {
|
||||
external_http_port: u16,
|
||||
internal_http_port: u16,
|
||||
pg_version: u32,
|
||||
grpc: bool,
|
||||
skip_pg_catalog_updates: bool,
|
||||
reconfigure_concurrency: usize,
|
||||
drop_subscriptions_before_start: bool,
|
||||
@@ -192,6 +194,7 @@ impl ComputeControlPlane {
|
||||
internal_http_port: Option<u16>,
|
||||
pg_version: u32,
|
||||
mode: ComputeMode,
|
||||
grpc: bool,
|
||||
skip_pg_catalog_updates: bool,
|
||||
drop_subscriptions_before_start: bool,
|
||||
) -> Result<Arc<Endpoint>> {
|
||||
@@ -226,6 +229,7 @@ impl ComputeControlPlane {
|
||||
// we also skip catalog updates in the cloud.
|
||||
skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start,
|
||||
grpc,
|
||||
reconfigure_concurrency: 1,
|
||||
features: vec![],
|
||||
cluster: None,
|
||||
@@ -244,6 +248,7 @@ impl ComputeControlPlane {
|
||||
internal_http_port,
|
||||
pg_port,
|
||||
pg_version,
|
||||
grpc,
|
||||
skip_pg_catalog_updates,
|
||||
drop_subscriptions_before_start,
|
||||
reconfigure_concurrency: 1,
|
||||
@@ -298,6 +303,8 @@ pub struct Endpoint {
|
||||
pub tenant_id: TenantId,
|
||||
pub timeline_id: TimelineId,
|
||||
pub mode: ComputeMode,
|
||||
/// If true, the endpoint should use gRPC to communicate with Pageservers.
|
||||
pub grpc: bool,
|
||||
|
||||
// port and address of the Postgres server and `compute_ctl`'s HTTP APIs
|
||||
pub pg_address: SocketAddr,
|
||||
@@ -333,15 +340,58 @@ pub enum EndpointStatus {
|
||||
RunningNoPidfile,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for EndpointStatus {
|
||||
impl Display for EndpointStatus {
|
||||
fn fmt(&self, writer: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let s = match self {
|
||||
writer.write_str(match self {
|
||||
Self::Running => "running",
|
||||
Self::Stopped => "stopped",
|
||||
Self::Crashed => "crashed",
|
||||
Self::RunningNoPidfile => "running, no pidfile",
|
||||
};
|
||||
write!(writer, "{}", s)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Copy, clap::ValueEnum)]
|
||||
pub enum EndpointTerminateMode {
|
||||
#[default]
|
||||
/// Use pg_ctl stop -m fast
|
||||
Fast,
|
||||
/// Use pg_ctl stop -m immediate
|
||||
Immediate,
|
||||
/// Use /terminate?mode=immediate
|
||||
ImmediateTerminate,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for EndpointTerminateMode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(match &self {
|
||||
EndpointTerminateMode::Fast => "fast",
|
||||
EndpointTerminateMode::Immediate => "immediate",
|
||||
EndpointTerminateMode::ImmediateTerminate => "immediate-terminate",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Protocol used to connect to a Pageserver.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum PageserverProtocol {
|
||||
Libpq,
|
||||
Grpc,
|
||||
}
|
||||
|
||||
impl PageserverProtocol {
|
||||
/// Returns the URL scheme for the protocol, used in connstrings.
|
||||
pub fn scheme(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Libpq => "postgresql",
|
||||
Self::Grpc => "grpc",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for PageserverProtocol {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(self.scheme())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -380,6 +430,7 @@ impl Endpoint {
|
||||
mode: conf.mode,
|
||||
tenant_id: conf.tenant_id,
|
||||
pg_version: conf.pg_version,
|
||||
grpc: conf.grpc,
|
||||
skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
|
||||
reconfigure_concurrency: conf.reconfigure_concurrency,
|
||||
drop_subscriptions_before_start: conf.drop_subscriptions_before_start,
|
||||
@@ -608,10 +659,10 @@ impl Endpoint {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_pageserver_connstr(pageservers: &[(Host, u16)]) -> String {
|
||||
fn build_pageserver_connstr(pageservers: &[(PageserverProtocol, Host, u16)]) -> String {
|
||||
pageservers
|
||||
.iter()
|
||||
.map(|(host, port)| format!("postgresql://no_user@{host}:{port}"))
|
||||
.map(|(scheme, host, port)| format!("{scheme}://no_user@{host}:{port}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
}
|
||||
@@ -656,11 +707,12 @@ impl Endpoint {
|
||||
endpoint_storage_addr: String,
|
||||
safekeepers_generation: Option<SafekeeperGeneration>,
|
||||
safekeepers: Vec<NodeId>,
|
||||
pageservers: Vec<(Host, u16)>,
|
||||
pageservers: Vec<(PageserverProtocol, Host, u16)>,
|
||||
remote_ext_base_url: Option<&String>,
|
||||
shard_stripe_size: usize,
|
||||
create_test_user: bool,
|
||||
start_timeout: Duration,
|
||||
dev: bool,
|
||||
) -> Result<()> {
|
||||
if self.status() == EndpointStatus::Running {
|
||||
anyhow::bail!("The endpoint is already running");
|
||||
@@ -831,6 +883,10 @@ impl Endpoint {
|
||||
cmd.args(["--remote-ext-base-url", remote_ext_base_url]);
|
||||
}
|
||||
|
||||
if dev {
|
||||
cmd.arg("--dev");
|
||||
}
|
||||
|
||||
let child = cmd.spawn()?;
|
||||
// set up a scopeguard to kill & wait for the child in case we panic or bail below
|
||||
let child = scopeguard::guard(child, |mut child| {
|
||||
@@ -883,7 +939,7 @@ impl Endpoint {
|
||||
ComputeStatus::Empty
|
||||
| ComputeStatus::ConfigurationPending
|
||||
| ComputeStatus::Configuration
|
||||
| ComputeStatus::TerminationPending
|
||||
| ComputeStatus::TerminationPending { .. }
|
||||
| ComputeStatus::Terminated => {
|
||||
bail!("unexpected compute status: {:?}", state.status)
|
||||
}
|
||||
@@ -941,10 +997,12 @@ impl Endpoint {
|
||||
|
||||
pub async fn reconfigure(
|
||||
&self,
|
||||
mut pageservers: Vec<(Host, u16)>,
|
||||
pageservers: Vec<(PageserverProtocol, Host, u16)>,
|
||||
stripe_size: Option<ShardStripeSize>,
|
||||
safekeepers: Option<Vec<NodeId>>,
|
||||
) -> Result<()> {
|
||||
anyhow::ensure!(!pageservers.is_empty(), "no pageservers provided");
|
||||
|
||||
let (mut spec, compute_ctl_config) = {
|
||||
let config_path = self.endpoint_path().join("config.json");
|
||||
let file = std::fs::File::open(config_path)?;
|
||||
@@ -956,25 +1014,7 @@ impl Endpoint {
|
||||
let postgresql_conf = self.read_postgresql_conf()?;
|
||||
spec.cluster.postgresql_conf = Some(postgresql_conf);
|
||||
|
||||
// If we weren't given explicit pageservers, query the storage controller
|
||||
if pageservers.is_empty() {
|
||||
let storage_controller = StorageController::from_env(&self.env);
|
||||
let locate_result = storage_controller.tenant_locate(self.tenant_id).await?;
|
||||
pageservers = locate_result
|
||||
.shards
|
||||
.into_iter()
|
||||
.map(|shard| {
|
||||
(
|
||||
Host::parse(&shard.listen_pg_addr)
|
||||
.expect("Storage controller reported bad hostname"),
|
||||
shard.listen_pg_port,
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
}
|
||||
|
||||
let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
|
||||
assert!(!pageserver_connstr.is_empty());
|
||||
spec.pageserver_connstring = Some(pageserver_connstr);
|
||||
if stripe_size.is_some() {
|
||||
spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
|
||||
@@ -1021,8 +1061,27 @@ impl Endpoint {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stop(&self, mode: &str, destroy: bool) -> Result<()> {
|
||||
self.pg_ctl(&["-m", mode, "stop"], &None)?;
|
||||
pub async fn stop(
|
||||
&self,
|
||||
mode: EndpointTerminateMode,
|
||||
destroy: bool,
|
||||
) -> Result<TerminateResponse> {
|
||||
// pg_ctl stop is fast but doesn't allow us to collect LSN. /terminate is
|
||||
// slow, and test runs time out. Solution: special mode "immediate-terminate"
|
||||
// which uses /terminate
|
||||
let response = if let EndpointTerminateMode::ImmediateTerminate = mode {
|
||||
let ip = self.external_http_address.ip();
|
||||
let port = self.external_http_address.port();
|
||||
let url = format!("http://{ip}:{port}/terminate?mode=immediate");
|
||||
let token = self.generate_jwt(Some(ComputeClaimsScope::Admin))?;
|
||||
let request = reqwest::Client::new().post(url).bearer_auth(token);
|
||||
let response = request.send().await.context("/terminate")?;
|
||||
let text = response.text().await.context("/terminate result")?;
|
||||
serde_json::from_str(&text).with_context(|| format!("deserializing {text}"))?
|
||||
} else {
|
||||
self.pg_ctl(&["-m", &mode.to_string(), "stop"], &None)?;
|
||||
TerminateResponse { lsn: None }
|
||||
};
|
||||
|
||||
// Also wait for the compute_ctl process to die. It might have some
|
||||
// cleanup work to do after postgres stops, like syncing safekeepers,
|
||||
@@ -1032,7 +1091,7 @@ impl Endpoint {
|
||||
// waiting. Sometimes we do *not* want this cleanup: tests intentionally
|
||||
// do stop when majority of safekeepers is down, so sync-safekeepers
|
||||
// would hang otherwise. This could be a separate flag though.
|
||||
let send_sigterm = destroy || mode == "immediate";
|
||||
let send_sigterm = destroy || !matches!(mode, EndpointTerminateMode::Fast);
|
||||
self.wait_for_compute_ctl_to_exit(send_sigterm)?;
|
||||
if destroy {
|
||||
println!(
|
||||
@@ -1041,7 +1100,7 @@ impl Endpoint {
|
||||
);
|
||||
std::fs::remove_dir_all(self.endpoint_path())?;
|
||||
}
|
||||
Ok(())
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub fn connstr(&self, user: &str, db_name: &str) -> String {
|
||||
|
||||
@@ -209,6 +209,8 @@ pub struct NeonStorageControllerConf {
|
||||
pub use_https_safekeeper_api: bool,
|
||||
|
||||
pub use_local_compute_notifications: bool,
|
||||
|
||||
pub timeline_safekeeper_count: Option<i64>,
|
||||
}
|
||||
|
||||
impl NeonStorageControllerConf {
|
||||
@@ -236,9 +238,10 @@ impl Default for NeonStorageControllerConf {
|
||||
heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
|
||||
long_reconcile_threshold: None,
|
||||
use_https_pageserver_api: false,
|
||||
timelines_onto_safekeepers: false,
|
||||
timelines_onto_safekeepers: true,
|
||||
use_https_safekeeper_api: false,
|
||||
use_local_compute_notifications: true,
|
||||
timeline_safekeeper_count: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use camino::Utf8PathBuf;
|
||||
use pageserver_api::config::{DEFAULT_GRPC_LISTEN_PORT, DEFAULT_HTTP_LISTEN_PORT};
|
||||
use pageserver_api::models::{self, TenantInfo, TimelineInfo};
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api;
|
||||
@@ -252,9 +253,10 @@ impl PageServerNode {
|
||||
// the storage controller
|
||||
let metadata_path = datadir.join("metadata.json");
|
||||
|
||||
let (_http_host, http_port) =
|
||||
let http_host = "localhost".to_string();
|
||||
let (_, http_port) =
|
||||
parse_host_port(&self.conf.listen_http_addr).expect("Unable to parse listen_http_addr");
|
||||
let http_port = http_port.unwrap_or(9898);
|
||||
let http_port = http_port.unwrap_or(DEFAULT_HTTP_LISTEN_PORT);
|
||||
|
||||
let https_port = match self.conf.listen_https_addr.as_ref() {
|
||||
Some(https_addr) => {
|
||||
@@ -265,6 +267,13 @@ impl PageServerNode {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let (mut grpc_host, mut grpc_port) = (None, None);
|
||||
if let Some(grpc_addr) = &self.conf.listen_grpc_addr {
|
||||
let (_, port) = parse_host_port(grpc_addr).expect("Unable to parse listen_grpc_addr");
|
||||
grpc_host = Some("localhost".to_string());
|
||||
grpc_port = Some(port.unwrap_or(DEFAULT_GRPC_LISTEN_PORT));
|
||||
}
|
||||
|
||||
// Intentionally hand-craft JSON: this acts as an implicit format compat test
|
||||
// in case the pageserver-side structure is edited, and reflects the real life
|
||||
// situation: the metadata is written by some other script.
|
||||
@@ -273,7 +282,9 @@ impl PageServerNode {
|
||||
serde_json::to_vec(&pageserver_api::config::NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: self.pg_connection_config.port(),
|
||||
http_host: "localhost".to_string(),
|
||||
grpc_host,
|
||||
grpc_port,
|
||||
http_host,
|
||||
http_port,
|
||||
https_port,
|
||||
other: HashMap::from([(
|
||||
|
||||
@@ -628,6 +628,10 @@ impl StorageController {
|
||||
args.push("--timelines-onto-safekeepers".to_string());
|
||||
}
|
||||
|
||||
if let Some(sk_cnt) = self.config.timeline_safekeeper_count {
|
||||
args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
|
||||
}
|
||||
|
||||
println!("Starting storage controller");
|
||||
|
||||
background_process::start_process(
|
||||
|
||||
@@ -36,6 +36,10 @@ enum Command {
|
||||
listen_pg_addr: String,
|
||||
#[arg(long)]
|
||||
listen_pg_port: u16,
|
||||
#[arg(long)]
|
||||
listen_grpc_addr: Option<String>,
|
||||
#[arg(long)]
|
||||
listen_grpc_port: Option<u16>,
|
||||
|
||||
#[arg(long)]
|
||||
listen_http_addr: String,
|
||||
@@ -418,6 +422,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
node_id,
|
||||
listen_pg_addr,
|
||||
listen_pg_port,
|
||||
listen_grpc_addr,
|
||||
listen_grpc_port,
|
||||
listen_http_addr,
|
||||
listen_http_port,
|
||||
listen_https_port,
|
||||
@@ -431,6 +437,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
node_id,
|
||||
listen_pg_addr,
|
||||
listen_pg_port,
|
||||
listen_grpc_addr,
|
||||
listen_grpc_port,
|
||||
listen_http_addr,
|
||||
listen_http_port,
|
||||
listen_https_port,
|
||||
|
||||
@@ -95,3 +95,4 @@ echo "Start compute node"
|
||||
-b /usr/local/bin/postgres \
|
||||
--compute-id "compute-${RANDOM}" \
|
||||
--config "${CONFIG_FILE}"
|
||||
--dev
|
||||
|
||||
@@ -31,13 +31,12 @@ struct Args {
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
struct Config {
|
||||
#[serde(default = "listen")]
|
||||
listen: std::net::SocketAddr,
|
||||
pemfile: camino::Utf8PathBuf,
|
||||
#[serde(flatten)]
|
||||
storage_config: remote_storage::RemoteStorageConfig,
|
||||
storage_kind: remote_storage::TypedRemoteStorageKind,
|
||||
#[serde(default = "max_upload_file_limit")]
|
||||
max_upload_file_limit: usize,
|
||||
}
|
||||
@@ -70,7 +69,8 @@ async fn main() -> anyhow::Result<()> {
|
||||
let listener = tokio::net::TcpListener::bind(config.listen).await.unwrap();
|
||||
info!("listening on {}", listener.local_addr().unwrap());
|
||||
|
||||
let storage = remote_storage::GenericRemoteStorage::from_config(&config.storage_config).await?;
|
||||
let storage =
|
||||
remote_storage::GenericRemoteStorage::from_storage_kind(config.storage_kind).await?;
|
||||
let cancel = tokio_util::sync::CancellationToken::new();
|
||||
if !args.no_s3_check_on_startup {
|
||||
app::check_storage_permissions(&storage, cancel.clone()).await?;
|
||||
|
||||
@@ -16,6 +16,7 @@ pub static COMPUTE_AUDIENCE: &str = "compute";
|
||||
pub enum ComputeClaimsScope {
|
||||
/// An admin-scoped token allows access to all of `compute_ctl`'s authorized
|
||||
/// facilities.
|
||||
#[serde(rename = "compute_ctl:admin")]
|
||||
Admin,
|
||||
}
|
||||
|
||||
@@ -24,7 +25,7 @@ impl FromStr for ComputeClaimsScope {
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"admin" => Ok(ComputeClaimsScope::Admin),
|
||||
"compute_ctl:admin" => Ok(ComputeClaimsScope::Admin),
|
||||
_ => Err(anyhow::anyhow!("invalid compute claims scope \"{s}\"")),
|
||||
}
|
||||
}
|
||||
@@ -80,3 +81,23 @@ pub struct SetRoleGrantsRequest {
|
||||
pub privileges: Vec<Privilege>,
|
||||
pub role: PgIdent,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::str::FromStr;
|
||||
|
||||
use crate::requests::ComputeClaimsScope;
|
||||
|
||||
/// Confirm that whether we parse the scope by string or through serde, the
|
||||
/// same values parse to the same enum variant.
|
||||
#[test]
|
||||
fn compute_request_scopes() {
|
||||
const ADMIN_SCOPE: &str = "compute_ctl:admin";
|
||||
|
||||
let from_serde: ComputeClaimsScope =
|
||||
serde_json::from_str(&format!("\"{ADMIN_SCOPE}\"")).unwrap();
|
||||
let from_str = ComputeClaimsScope::from_str(ADMIN_SCOPE).unwrap();
|
||||
|
||||
assert_eq!(from_serde, from_str);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,6 +83,16 @@ pub struct ComputeStatusResponse {
|
||||
pub error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum TerminateMode {
|
||||
#[default]
|
||||
/// wait 30s till returning from /terminate to allow control plane to get the error
|
||||
Fast,
|
||||
/// return from /terminate immediately as soon as all components are terminated
|
||||
Immediate,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ComputeStatus {
|
||||
@@ -103,11 +113,16 @@ pub enum ComputeStatus {
|
||||
// control-plane to terminate it.
|
||||
Failed,
|
||||
// Termination requested
|
||||
TerminationPending,
|
||||
TerminationPending { mode: TerminateMode },
|
||||
// Terminated Postgres
|
||||
Terminated,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
pub struct TerminateResponse {
|
||||
pub lsn: Option<utils::lsn::Lsn>,
|
||||
}
|
||||
|
||||
impl Display for ComputeStatus {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
@@ -117,7 +132,7 @@ impl Display for ComputeStatus {
|
||||
ComputeStatus::Running => f.write_str("running"),
|
||||
ComputeStatus::Configuration => f.write_str("configuration"),
|
||||
ComputeStatus::Failed => f.write_str("failed"),
|
||||
ComputeStatus::TerminationPending => f.write_str("termination-pending"),
|
||||
ComputeStatus::TerminationPending { .. } => f.write_str("termination-pending"),
|
||||
ComputeStatus::Terminated => f.write_str("terminated"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -419,13 +419,13 @@ pub fn now() -> u64 {
|
||||
with_thread_context(|ctx| ctx.clock.get().unwrap().now())
|
||||
}
|
||||
|
||||
pub fn exit(code: i32, msg: String) {
|
||||
pub fn exit(code: i32, msg: String) -> ! {
|
||||
with_thread_context(|ctx| {
|
||||
ctx.allow_panic.store(true, Ordering::SeqCst);
|
||||
let mut result = ctx.result.lock();
|
||||
*result = (code, msg);
|
||||
panic!("exit");
|
||||
});
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn get_thread_ctx() -> Arc<ThreadContext> {
|
||||
|
||||
@@ -17,7 +17,7 @@ anyhow.workspace = true
|
||||
bytes.workspace = true
|
||||
byteorder.workspace = true
|
||||
utils.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
postgres_ffi_types.workspace = true
|
||||
enum-map.workspace = true
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
|
||||
@@ -12,6 +12,7 @@ pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LI
|
||||
pub const DEFAULT_GRPC_LISTEN_PORT: u16 = 51051; // storage-broker already uses 50051
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::num::{NonZeroU64, NonZeroUsize};
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
@@ -24,16 +25,17 @@ use utils::logging::LogFormat;
|
||||
use crate::models::{ImageCompressionAlgorithm, LsnLease};
|
||||
|
||||
// Certain metadata (e.g. externally-addressable name, AZ) is delivered
|
||||
// as a separate structure. This information is not neeed by the pageserver
|
||||
// as a separate structure. This information is not needed by the pageserver
|
||||
// itself, it is only used for registering the pageserver with the control
|
||||
// plane and/or storage controller.
|
||||
//
|
||||
#[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
|
||||
pub struct NodeMetadata {
|
||||
#[serde(rename = "host")]
|
||||
pub postgres_host: String,
|
||||
#[serde(rename = "port")]
|
||||
pub postgres_port: u16,
|
||||
pub grpc_host: Option<String>,
|
||||
pub grpc_port: Option<u16>,
|
||||
pub http_host: String,
|
||||
pub http_port: u16,
|
||||
pub https_port: Option<u16>,
|
||||
@@ -44,6 +46,23 @@ pub struct NodeMetadata {
|
||||
pub other: HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
impl Display for NodeMetadata {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"postgresql://{}:{} ",
|
||||
self.postgres_host, self.postgres_port
|
||||
)?;
|
||||
if let Some(grpc_host) = &self.grpc_host {
|
||||
let grpc_port = self.grpc_port.unwrap_or_default();
|
||||
write!(f, "grpc://{grpc_host}:{grpc_port} ")?;
|
||||
}
|
||||
write!(f, "http://{}:{} ", self.http_host, self.http_port)?;
|
||||
write!(f, "other:{:?}", self.other)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// PostHog integration config.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct PostHogConfig {
|
||||
@@ -57,6 +76,10 @@ pub struct PostHogConfig {
|
||||
pub private_api_url: String,
|
||||
/// Public API URL
|
||||
pub public_api_url: String,
|
||||
/// Refresh interval for the feature flag spec
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub refresh_interval: Option<Duration>,
|
||||
}
|
||||
|
||||
/// `pageserver.toml`
|
||||
@@ -337,16 +360,21 @@ pub struct TimelineImportConfig {
|
||||
pub struct BasebackupCacheConfig {
|
||||
#[serde(with = "humantime_serde")]
|
||||
pub cleanup_period: Duration,
|
||||
// FIXME: Support max_size_bytes.
|
||||
// pub max_size_bytes: usize,
|
||||
pub max_size_entries: i64,
|
||||
/// Maximum total size of basebackup cache entries on disk in bytes.
|
||||
/// The cache may slightly exceed this limit because we do not know
|
||||
/// the exact size of the cache entry untill it's written to disk.
|
||||
pub max_total_size_bytes: u64,
|
||||
// TODO(diko): support max_entry_size_bytes.
|
||||
// pub max_entry_size_bytes: u64,
|
||||
pub max_size_entries: usize,
|
||||
}
|
||||
|
||||
impl Default for BasebackupCacheConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
cleanup_period: Duration::from_secs(60),
|
||||
// max_size_bytes: 1024 * 1024 * 1024, // 1 GiB
|
||||
max_total_size_bytes: 1024 * 1024 * 1024, // 1 GiB
|
||||
// max_entry_size_bytes: 16 * 1024 * 1024, // 16 MiB
|
||||
max_size_entries: 1000,
|
||||
}
|
||||
}
|
||||
@@ -792,7 +820,7 @@ pub mod tenant_conf_defaults {
|
||||
// By default ingest enough WAL for two new L0 layers before checking if new image
|
||||
// image layers should be created.
|
||||
pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
|
||||
pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false;
|
||||
pub const DEFAULT_GC_COMPACTION_ENABLED: bool = true;
|
||||
pub const DEFAULT_GC_COMPACTION_VERIFICATION: bool = true;
|
||||
pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB
|
||||
pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100;
|
||||
|
||||
@@ -14,6 +14,8 @@ fn test_node_metadata_v1_backward_compatibilty() {
|
||||
NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: 23,
|
||||
grpc_host: None,
|
||||
grpc_port: None,
|
||||
http_host: "localhost".to_string(),
|
||||
http_port: 42,
|
||||
https_port: None,
|
||||
@@ -37,6 +39,35 @@ fn test_node_metadata_v2_backward_compatibilty() {
|
||||
NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: 23,
|
||||
grpc_host: None,
|
||||
grpc_port: None,
|
||||
http_host: "localhost".to_string(),
|
||||
http_port: 42,
|
||||
https_port: Some(123),
|
||||
other: HashMap::new(),
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_metadata_v3_backward_compatibilty() {
|
||||
let v3 = serde_json::to_vec(&serde_json::json!({
|
||||
"host": "localhost",
|
||||
"port": 23,
|
||||
"grpc_host": "localhost",
|
||||
"grpc_port": 51,
|
||||
"http_host": "localhost",
|
||||
"http_port": 42,
|
||||
"https_port": 123,
|
||||
}));
|
||||
|
||||
assert_eq!(
|
||||
serde_json::from_slice::<NodeMetadata>(&v3.unwrap()).unwrap(),
|
||||
NodeMetadata {
|
||||
postgres_host: "localhost".to_string(),
|
||||
postgres_port: 23,
|
||||
grpc_host: Some("localhost".to_string()),
|
||||
grpc_port: Some(51),
|
||||
http_host: "localhost".to_string(),
|
||||
http_port: 42,
|
||||
https_port: Some(123),
|
||||
|
||||
@@ -52,6 +52,8 @@ pub struct NodeRegisterRequest {
|
||||
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_pg_port: u16,
|
||||
pub listen_grpc_addr: Option<String>,
|
||||
pub listen_grpc_port: Option<u16>,
|
||||
|
||||
pub listen_http_addr: String,
|
||||
pub listen_http_port: u16,
|
||||
@@ -101,6 +103,8 @@ pub struct TenantLocateResponseShard {
|
||||
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_pg_port: u16,
|
||||
pub listen_grpc_addr: Option<String>,
|
||||
pub listen_grpc_port: Option<u16>,
|
||||
|
||||
pub listen_http_addr: String,
|
||||
pub listen_http_port: u16,
|
||||
@@ -152,6 +156,8 @@ pub struct NodeDescribeResponse {
|
||||
|
||||
pub listen_pg_addr: String,
|
||||
pub listen_pg_port: u16,
|
||||
pub listen_grpc_addr: Option<String>,
|
||||
pub listen_grpc_port: Option<u16>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
|
||||
@@ -4,8 +4,8 @@ use std::ops::Range;
|
||||
use anyhow::{Result, bail};
|
||||
use byteorder::{BE, ByteOrder};
|
||||
use bytes::Bytes;
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::{Oid, RepOriginId};
|
||||
use postgres_ffi_types::forknum::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi_types::{Oid, RepOriginId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utils::const_assert;
|
||||
|
||||
@@ -194,7 +194,7 @@ impl Key {
|
||||
/// will be rejected on the write path.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_valid_key_on_write_path_strong(&self) -> bool {
|
||||
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
||||
use postgres_ffi_types::constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
||||
if !self.is_i128_representable() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::ops::Range;
|
||||
|
||||
use itertools::Itertools;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
|
||||
use crate::key::Key;
|
||||
use crate::shard::{ShardCount, ShardIdentity};
|
||||
@@ -269,9 +268,13 @@ impl KeySpace {
|
||||
/// Partition a key space into roughly chunks of roughly 'target_size' bytes
|
||||
/// in each partition.
|
||||
///
|
||||
pub fn partition(&self, shard_identity: &ShardIdentity, target_size: u64) -> KeyPartitioning {
|
||||
// Assume that each value is 8k in size.
|
||||
let target_nblocks = (target_size / BLCKSZ as u64) as u32;
|
||||
pub fn partition(
|
||||
&self,
|
||||
shard_identity: &ShardIdentity,
|
||||
target_size: u64,
|
||||
block_size: u64,
|
||||
) -> KeyPartitioning {
|
||||
let target_nblocks = (target_size / block_size) as u32;
|
||||
|
||||
let mut parts = Vec::new();
|
||||
let mut current_part = Vec::new();
|
||||
|
||||
@@ -5,11 +5,10 @@ pub mod controller_api;
|
||||
pub mod key;
|
||||
pub mod keyspace;
|
||||
pub mod models;
|
||||
pub mod record;
|
||||
pub mod pagestream_api;
|
||||
pub mod reltag;
|
||||
pub mod shard;
|
||||
/// Public API types
|
||||
pub mod upcall_api;
|
||||
pub mod value;
|
||||
|
||||
pub mod config;
|
||||
|
||||
@@ -5,16 +5,12 @@ pub mod utilization;
|
||||
use core::ops::Range;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::Display;
|
||||
use std::io::{BufRead, Read};
|
||||
use std::num::{NonZeroU32, NonZeroU64, NonZeroUsize};
|
||||
use std::str::FromStr;
|
||||
use std::time::{Duration, SystemTime};
|
||||
|
||||
use byteorder::{BigEndian, ReadBytesExt};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
#[cfg(feature = "testing")]
|
||||
use camino::Utf8PathBuf;
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use serde_with::serde_as;
|
||||
pub use utilization::PageserverUtilization;
|
||||
@@ -24,7 +20,6 @@ use utils::{completion, serde_system_time};
|
||||
|
||||
use crate::config::Ratio;
|
||||
use crate::key::{CompactKey, Key};
|
||||
use crate::reltag::RelTag;
|
||||
use crate::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize, TenantShardId};
|
||||
|
||||
/// The state of a tenant in this pageserver.
|
||||
@@ -1907,219 +1902,6 @@ pub struct ScanDisposableKeysResponse {
|
||||
pub not_disposable_count: usize,
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
pub enum PagestreamFeMessage {
|
||||
Exists(PagestreamExistsRequest),
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentRequest),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestRequest),
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(Debug, strum_macros::EnumProperty)]
|
||||
pub enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
GetPage(PagestreamGetPageResponse),
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentResponse),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestResponse),
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
#[repr(u8)]
|
||||
enum PagestreamFeMessageTag {
|
||||
Exists = 0,
|
||||
Nblocks = 1,
|
||||
GetPage = 2,
|
||||
DbSize = 3,
|
||||
GetSlruSegment = 4,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 99,
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
#[repr(u8)]
|
||||
enum PagestreamBeMessageTag {
|
||||
Exists = 100,
|
||||
Nblocks = 101,
|
||||
GetPage = 102,
|
||||
Error = 103,
|
||||
DbSize = 104,
|
||||
GetSlruSegment = 105,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 199,
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamFeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
match value {
|
||||
0 => Ok(PagestreamFeMessageTag::Exists),
|
||||
1 => Ok(PagestreamFeMessageTag::Nblocks),
|
||||
2 => Ok(PagestreamFeMessageTag::GetPage),
|
||||
3 => Ok(PagestreamFeMessageTag::DbSize),
|
||||
4 => Ok(PagestreamFeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
99 => Ok(PagestreamFeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
match value {
|
||||
100 => Ok(PagestreamBeMessageTag::Exists),
|
||||
101 => Ok(PagestreamBeMessageTag::Nblocks),
|
||||
102 => Ok(PagestreamBeMessageTag::GetPage),
|
||||
103 => Ok(PagestreamBeMessageTag::Error),
|
||||
104 => Ok(PagestreamBeMessageTag::DbSize),
|
||||
105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
199 => Ok(PagestreamBeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A GetPage request contains two LSN values:
|
||||
//
|
||||
// request_lsn: Get the page version at this point in time. Lsn::Max is a special value that means
|
||||
// "get the latest version present". It's used by the primary server, which knows that no one else
|
||||
// is writing WAL. 'not_modified_since' must be set to a proper value even if request_lsn is
|
||||
// Lsn::Max. Standby servers use the current replay LSN as the request LSN.
|
||||
//
|
||||
// not_modified_since: Hint to the pageserver that the client knows that the page has not been
|
||||
// modified between 'not_modified_since' and the request LSN. It's always correct to set
|
||||
// 'not_modified_since equal' to 'request_lsn' (unless Lsn::Max is used as the 'request_lsn'), but
|
||||
// passing an earlier LSN can speed up the request, by allowing the pageserver to process the
|
||||
// request without waiting for 'request_lsn' to arrive.
|
||||
//
|
||||
// The now-defunct V1 interface contained only one LSN, and a boolean 'latest' flag. The V1 interface was
|
||||
// sufficient for the primary; the 'lsn' was equivalent to the 'not_modified_since' value, and
|
||||
// 'latest' was set to true. The V2 interface was added because there was no correct way for a
|
||||
// standby to request a page at a particular non-latest LSN, and also include the
|
||||
// 'not_modified_since' hint. That led to an awkward choice of either using an old LSN in the
|
||||
// request, if the standby knows that the page hasn't been modified since, and risk getting an error
|
||||
// if that LSN has fallen behind the GC horizon, or requesting the current replay LSN, which could
|
||||
// require the pageserver unnecessarily to wait for the WAL to arrive up to that point. The new V2
|
||||
// interface allows sending both LSNs, and let the pageserver do the right thing. There was no
|
||||
// difference in the responses between V1 and V2.
|
||||
//
|
||||
// V3 version of protocol adds request ID to all requests. This request ID is also included in response
|
||||
// as well as other fields from requests, which allows to verify that we receive response for our request.
|
||||
// We copy fields from request to response to make checking more reliable: request ID is formed from process ID
|
||||
// and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
|
||||
//
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum PagestreamProtocolVersion {
|
||||
V2,
|
||||
V3,
|
||||
}
|
||||
|
||||
pub type RequestId = u64;
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamRequest {
|
||||
pub reqid: RequestId,
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamExistsRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamNblocksRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetPageRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
pub blkno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamDbSizeRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetSlruSegmentRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub kind: u8,
|
||||
pub segno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsResponse {
|
||||
pub req: PagestreamExistsRequest,
|
||||
pub exists: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksResponse {
|
||||
pub req: PagestreamNblocksRequest,
|
||||
pub n_blocks: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageResponse {
|
||||
pub req: PagestreamGetPageRequest,
|
||||
pub page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetSlruSegmentResponse {
|
||||
pub req: PagestreamGetSlruSegmentRequest,
|
||||
pub segment: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamErrorResponse {
|
||||
pub req: PagestreamRequest,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeResponse {
|
||||
pub req: PagestreamDbSizeRequest,
|
||||
pub db_size: i64,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct PagestreamTestRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub batch_key: u64,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamTestResponse {
|
||||
pub req: PagestreamTestRequest,
|
||||
}
|
||||
|
||||
// This is a cut-down version of TenantHistorySize from the pageserver crate, omitting fields
|
||||
// that require pageserver-internal types. It is sufficient to get the total size.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@@ -2131,506 +1913,6 @@ pub struct TenantHistorySize {
|
||||
pub size: Option<u64>,
|
||||
}
|
||||
|
||||
impl PagestreamFeMessage {
|
||||
/// Serialize a compute -> pageserver message. This is currently only used in testing
|
||||
/// tools. Always uses protocol version 3.
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
match self {
|
||||
Self::Exists(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Exists as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
}
|
||||
|
||||
Self::Nblocks(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Nblocks as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
}
|
||||
|
||||
Self::GetPage(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetPage as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
bytes.put_u32(req.blkno);
|
||||
}
|
||||
|
||||
Self::DbSize(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::DbSize as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.dbnode);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetSlruSegment as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(req.kind);
|
||||
bytes.put_u32(req.segno);
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Test as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(req.batch_key);
|
||||
let message = req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn parse<R: std::io::Read>(
|
||||
body: &mut R,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
) -> anyhow::Result<PagestreamFeMessage> {
|
||||
// these correspond to the NeonMessageTag enum in pagestore_client.h
|
||||
//
|
||||
// TODO: consider using protobuf or serde bincode for less error prone
|
||||
// serialization.
|
||||
let msg_tag = body.read_u8()?;
|
||||
let (reqid, request_lsn, not_modified_since) = match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => (
|
||||
0,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
PagestreamProtocolVersion::V3 => (
|
||||
body.read_u64::<BigEndian>()?,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
};
|
||||
|
||||
match PagestreamFeMessageTag::try_from(msg_tag)
|
||||
.map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))?
|
||||
{
|
||||
PagestreamFeMessageTag::Exists => {
|
||||
Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::Nblocks => {
|
||||
Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetPage => {
|
||||
Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
blkno: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::DbSize => {
|
||||
Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetSlruSegment => Ok(PagestreamFeMessage::GetSlruSegment(
|
||||
PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind: body.read_u8()?,
|
||||
segno: body.read_u32::<BigEndian>()?,
|
||||
},
|
||||
)),
|
||||
#[cfg(feature = "testing")]
|
||||
PagestreamFeMessageTag::Test => Ok(PagestreamFeMessage::Test(PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key: body.read_u64::<BigEndian>()?,
|
||||
message: {
|
||||
let len = body.read_u64::<BigEndian>()?;
|
||||
let mut buf = vec![0; len as usize];
|
||||
body.read_exact(&mut buf)?;
|
||||
String::from_utf8(buf)?
|
||||
},
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PagestreamBeMessage {
|
||||
pub fn serialize(&self, protocol_version: PagestreamProtocolVersion) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
PagestreamProtocolVersion::V3 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.req.blkno);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put_u64(resp.req.reqid);
|
||||
bytes.put_u64(resp.req.request_lsn.0);
|
||||
bytes.put_u64(resp.req.not_modified_since.0);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.dbnode);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(resp.req.kind);
|
||||
bytes.put_u32(resp.req.segno);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ as usize) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
|
||||
let mut buf = buf.reader();
|
||||
let msg_tag = buf.read_u8()?;
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
let ok =
|
||||
match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
|
||||
Tag::Exists => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let exists = buf.read_u8()? != 0;
|
||||
Self::Exists(PagestreamExistsResponse {
|
||||
req: PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
exists,
|
||||
})
|
||||
}
|
||||
Tag::Nblocks => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
Self::Nblocks(PagestreamNblocksResponse {
|
||||
req: PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
n_blocks,
|
||||
})
|
||||
}
|
||||
Tag::GetPage => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let blkno = buf.read_u32::<BigEndian>()?;
|
||||
let mut page = vec![0; 8192]; // TODO: use MaybeUninit
|
||||
buf.read_exact(&mut page)?;
|
||||
Self::GetPage(PagestreamGetPageResponse {
|
||||
req: PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
blkno,
|
||||
},
|
||||
page: page.into(),
|
||||
})
|
||||
}
|
||||
Tag::Error => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let mut msg = Vec::new();
|
||||
buf.read_until(0, &mut msg)?;
|
||||
let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
|
||||
let rust_str = cstring.to_str()?;
|
||||
Self::Error(PagestreamErrorResponse {
|
||||
req: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
message: rust_str.to_owned(),
|
||||
})
|
||||
}
|
||||
Tag::DbSize => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let dbnode = buf.read_u32::<BigEndian>()?;
|
||||
let db_size = buf.read_i64::<BigEndian>()?;
|
||||
Self::DbSize(PagestreamDbSizeResponse {
|
||||
req: PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode,
|
||||
},
|
||||
db_size,
|
||||
})
|
||||
}
|
||||
Tag::GetSlruSegment => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let kind = buf.read_u8()?;
|
||||
let segno = buf.read_u32::<BigEndian>()?;
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
let mut segment = vec![0; n_blocks as usize * BLCKSZ as usize];
|
||||
buf.read_exact(&mut segment)?;
|
||||
Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
|
||||
req: PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind,
|
||||
segno,
|
||||
},
|
||||
segment: segment.into(),
|
||||
})
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Tag::Test => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let batch_key = buf.read_u64::<BigEndian>()?;
|
||||
let len = buf.read_u64::<BigEndian>()?;
|
||||
let mut msg = vec![0; len as usize];
|
||||
buf.read_exact(&mut msg)?;
|
||||
let message = String::from_utf8(msg)?;
|
||||
Self::Test(PagestreamTestResponse {
|
||||
req: PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key,
|
||||
message,
|
||||
},
|
||||
})
|
||||
}
|
||||
};
|
||||
let remaining = buf.into_inner();
|
||||
if !remaining.is_empty() {
|
||||
anyhow::bail!(
|
||||
"remaining bytes in msg with tag={msg_tag}: {}",
|
||||
remaining.len()
|
||||
);
|
||||
}
|
||||
Ok(ok)
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Exists(_) => "Exists",
|
||||
Self::Nblocks(_) => "Nblocks",
|
||||
Self::GetPage(_) => "GetPage",
|
||||
Self::Error(_) => "Error",
|
||||
Self::DbSize(_) => "DbSize",
|
||||
Self::GetSlruSegment(_) => "GetSlruSegment",
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(_) => "Test",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct PageTraceEvent {
|
||||
pub key: CompactKey,
|
||||
@@ -2656,68 +1938,6 @@ mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_pagestream() {
|
||||
// Test serialization/deserialization of PagestreamFeMessage
|
||||
let messages = vec![
|
||||
PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(4),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
blkno: 7,
|
||||
}),
|
||||
PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
dbnode: 7,
|
||||
}),
|
||||
];
|
||||
for msg in messages {
|
||||
let bytes = msg.serialize();
|
||||
let reconstructed =
|
||||
PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V3)
|
||||
.unwrap();
|
||||
assert!(msg == reconstructed);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tenantinfo_serde() {
|
||||
// Test serialization/deserialization of TenantInfo
|
||||
|
||||
798
libs/pageserver_api/src/pagestream_api.rs
Normal file
798
libs/pageserver_api/src/pagestream_api.rs
Normal file
@@ -0,0 +1,798 @@
|
||||
//! Rust definitions of the libpq-based pagestream API
|
||||
//!
|
||||
//! See also the C implementation of the same API in pgxn/neon/pagestore_client.h
|
||||
|
||||
use std::io::{BufRead, Read};
|
||||
|
||||
use crate::reltag::RelTag;
|
||||
|
||||
use byteorder::{BigEndian, ReadBytesExt};
|
||||
use bytes::{Buf, BufMut, Bytes, BytesMut};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
/// Block size.
|
||||
///
|
||||
/// XXX: We assume 8k block size in the SLRU fetch API. It's not great to hardcode
|
||||
/// that in the protocol, because Postgres supports different block sizes as a compile
|
||||
/// time option.
|
||||
const BLCKSZ: usize = 8192;
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(PartialEq, Eq, Debug)]
|
||||
pub enum PagestreamFeMessage {
|
||||
Exists(PagestreamExistsRequest),
|
||||
Nblocks(PagestreamNblocksRequest),
|
||||
GetPage(PagestreamGetPageRequest),
|
||||
DbSize(PagestreamDbSizeRequest),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentRequest),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestRequest),
|
||||
}
|
||||
|
||||
// Wrapped in libpq CopyData
|
||||
#[derive(Debug, strum_macros::EnumProperty)]
|
||||
pub enum PagestreamBeMessage {
|
||||
Exists(PagestreamExistsResponse),
|
||||
Nblocks(PagestreamNblocksResponse),
|
||||
GetPage(PagestreamGetPageResponse),
|
||||
Error(PagestreamErrorResponse),
|
||||
DbSize(PagestreamDbSizeResponse),
|
||||
GetSlruSegment(PagestreamGetSlruSegmentResponse),
|
||||
#[cfg(feature = "testing")]
|
||||
Test(PagestreamTestResponse),
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
#[repr(u8)]
|
||||
enum PagestreamFeMessageTag {
|
||||
Exists = 0,
|
||||
Nblocks = 1,
|
||||
GetPage = 2,
|
||||
DbSize = 3,
|
||||
GetSlruSegment = 4,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 99,
|
||||
}
|
||||
|
||||
// Keep in sync with `pagestore_client.h`
|
||||
#[repr(u8)]
|
||||
enum PagestreamBeMessageTag {
|
||||
Exists = 100,
|
||||
Nblocks = 101,
|
||||
GetPage = 102,
|
||||
Error = 103,
|
||||
DbSize = 104,
|
||||
GetSlruSegment = 105,
|
||||
/* future tags above this line */
|
||||
/// For testing purposes, not available in production.
|
||||
#[cfg(feature = "testing")]
|
||||
Test = 199,
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamFeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
match value {
|
||||
0 => Ok(PagestreamFeMessageTag::Exists),
|
||||
1 => Ok(PagestreamFeMessageTag::Nblocks),
|
||||
2 => Ok(PagestreamFeMessageTag::GetPage),
|
||||
3 => Ok(PagestreamFeMessageTag::DbSize),
|
||||
4 => Ok(PagestreamFeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
99 => Ok(PagestreamFeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<u8> for PagestreamBeMessageTag {
|
||||
type Error = u8;
|
||||
fn try_from(value: u8) -> Result<Self, u8> {
|
||||
match value {
|
||||
100 => Ok(PagestreamBeMessageTag::Exists),
|
||||
101 => Ok(PagestreamBeMessageTag::Nblocks),
|
||||
102 => Ok(PagestreamBeMessageTag::GetPage),
|
||||
103 => Ok(PagestreamBeMessageTag::Error),
|
||||
104 => Ok(PagestreamBeMessageTag::DbSize),
|
||||
105 => Ok(PagestreamBeMessageTag::GetSlruSegment),
|
||||
#[cfg(feature = "testing")]
|
||||
199 => Ok(PagestreamBeMessageTag::Test),
|
||||
_ => Err(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A GetPage request contains two LSN values:
|
||||
//
|
||||
// request_lsn: Get the page version at this point in time. Lsn::Max is a special value that means
|
||||
// "get the latest version present". It's used by the primary server, which knows that no one else
|
||||
// is writing WAL. 'not_modified_since' must be set to a proper value even if request_lsn is
|
||||
// Lsn::Max. Standby servers use the current replay LSN as the request LSN.
|
||||
//
|
||||
// not_modified_since: Hint to the pageserver that the client knows that the page has not been
|
||||
// modified between 'not_modified_since' and the request LSN. It's always correct to set
|
||||
// 'not_modified_since equal' to 'request_lsn' (unless Lsn::Max is used as the 'request_lsn'), but
|
||||
// passing an earlier LSN can speed up the request, by allowing the pageserver to process the
|
||||
// request without waiting for 'request_lsn' to arrive.
|
||||
//
|
||||
// The now-defunct V1 interface contained only one LSN, and a boolean 'latest' flag. The V1 interface was
|
||||
// sufficient for the primary; the 'lsn' was equivalent to the 'not_modified_since' value, and
|
||||
// 'latest' was set to true. The V2 interface was added because there was no correct way for a
|
||||
// standby to request a page at a particular non-latest LSN, and also include the
|
||||
// 'not_modified_since' hint. That led to an awkward choice of either using an old LSN in the
|
||||
// request, if the standby knows that the page hasn't been modified since, and risk getting an error
|
||||
// if that LSN has fallen behind the GC horizon, or requesting the current replay LSN, which could
|
||||
// require the pageserver unnecessarily to wait for the WAL to arrive up to that point. The new V2
|
||||
// interface allows sending both LSNs, and let the pageserver do the right thing. There was no
|
||||
// difference in the responses between V1 and V2.
|
||||
//
|
||||
// V3 version of protocol adds request ID to all requests. This request ID is also included in response
|
||||
// as well as other fields from requests, which allows to verify that we receive response for our request.
|
||||
// We copy fields from request to response to make checking more reliable: request ID is formed from process ID
|
||||
// and local counter, so in principle there can be duplicated requests IDs if process PID is reused.
|
||||
//
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum PagestreamProtocolVersion {
|
||||
V2,
|
||||
V3,
|
||||
}
|
||||
|
||||
pub type RequestId = u64;
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamRequest {
|
||||
pub reqid: RequestId,
|
||||
pub request_lsn: Lsn,
|
||||
pub not_modified_since: Lsn,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamExistsRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamNblocksRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetPageRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub rel: RelTag,
|
||||
pub blkno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamDbSizeRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub dbnode: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub struct PagestreamGetSlruSegmentRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub kind: u8,
|
||||
pub segno: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamExistsResponse {
|
||||
pub req: PagestreamExistsRequest,
|
||||
pub exists: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamNblocksResponse {
|
||||
pub req: PagestreamNblocksRequest,
|
||||
pub n_blocks: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetPageResponse {
|
||||
pub req: PagestreamGetPageRequest,
|
||||
pub page: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamGetSlruSegmentResponse {
|
||||
pub req: PagestreamGetSlruSegmentRequest,
|
||||
pub segment: Bytes,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamErrorResponse {
|
||||
pub req: PagestreamRequest,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamDbSizeResponse {
|
||||
pub req: PagestreamDbSizeRequest,
|
||||
pub db_size: i64,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
pub struct PagestreamTestRequest {
|
||||
pub hdr: PagestreamRequest,
|
||||
pub batch_key: u64,
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
#[derive(Debug)]
|
||||
pub struct PagestreamTestResponse {
|
||||
pub req: PagestreamTestRequest,
|
||||
}
|
||||
|
||||
impl PagestreamFeMessage {
|
||||
/// Serialize a compute -> pageserver message. This is currently only used in testing
|
||||
/// tools. Always uses protocol version 3.
|
||||
pub fn serialize(&self) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
match self {
|
||||
Self::Exists(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Exists as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
}
|
||||
|
||||
Self::Nblocks(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Nblocks as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
}
|
||||
|
||||
Self::GetPage(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetPage as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.rel.spcnode);
|
||||
bytes.put_u32(req.rel.dbnode);
|
||||
bytes.put_u32(req.rel.relnode);
|
||||
bytes.put_u8(req.rel.forknum);
|
||||
bytes.put_u32(req.blkno);
|
||||
}
|
||||
|
||||
Self::DbSize(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::DbSize as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(req.dbnode);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::GetSlruSegment as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(req.kind);
|
||||
bytes.put_u32(req.segno);
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(req) => {
|
||||
bytes.put_u8(PagestreamFeMessageTag::Test as u8);
|
||||
bytes.put_u64(req.hdr.reqid);
|
||||
bytes.put_u64(req.hdr.request_lsn.0);
|
||||
bytes.put_u64(req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(req.batch_key);
|
||||
let message = req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn parse<R: std::io::Read>(
|
||||
body: &mut R,
|
||||
protocol_version: PagestreamProtocolVersion,
|
||||
) -> anyhow::Result<PagestreamFeMessage> {
|
||||
// these correspond to the NeonMessageTag enum in pagestore_client.h
|
||||
//
|
||||
// TODO: consider using protobuf or serde bincode for less error prone
|
||||
// serialization.
|
||||
let msg_tag = body.read_u8()?;
|
||||
let (reqid, request_lsn, not_modified_since) = match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => (
|
||||
0,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
PagestreamProtocolVersion::V3 => (
|
||||
body.read_u64::<BigEndian>()?,
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
Lsn::from(body.read_u64::<BigEndian>()?),
|
||||
),
|
||||
};
|
||||
|
||||
match PagestreamFeMessageTag::try_from(msg_tag)
|
||||
.map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))?
|
||||
{
|
||||
PagestreamFeMessageTag::Exists => {
|
||||
Ok(PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::Nblocks => {
|
||||
Ok(PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetPage => {
|
||||
Ok(PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel: RelTag {
|
||||
spcnode: body.read_u32::<BigEndian>()?,
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
relnode: body.read_u32::<BigEndian>()?,
|
||||
forknum: body.read_u8()?,
|
||||
},
|
||||
blkno: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::DbSize => {
|
||||
Ok(PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode: body.read_u32::<BigEndian>()?,
|
||||
}))
|
||||
}
|
||||
PagestreamFeMessageTag::GetSlruSegment => Ok(PagestreamFeMessage::GetSlruSegment(
|
||||
PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind: body.read_u8()?,
|
||||
segno: body.read_u32::<BigEndian>()?,
|
||||
},
|
||||
)),
|
||||
#[cfg(feature = "testing")]
|
||||
PagestreamFeMessageTag::Test => Ok(PagestreamFeMessage::Test(PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key: body.read_u64::<BigEndian>()?,
|
||||
message: {
|
||||
let len = body.read_u64::<BigEndian>()?;
|
||||
let mut buf = vec![0; len as usize];
|
||||
body.read_exact(&mut buf)?;
|
||||
String::from_utf8(buf)?
|
||||
},
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PagestreamBeMessage {
|
||||
pub fn serialize(&self, protocol_version: PagestreamProtocolVersion) -> Bytes {
|
||||
let mut bytes = BytesMut::new();
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
match protocol_version {
|
||||
PagestreamProtocolVersion::V2 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
PagestreamProtocolVersion::V3 => {
|
||||
match self {
|
||||
Self::Exists(resp) => {
|
||||
bytes.put_u8(Tag::Exists as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u8(resp.exists as u8);
|
||||
}
|
||||
|
||||
Self::Nblocks(resp) => {
|
||||
bytes.put_u8(Tag::Nblocks as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.n_blocks);
|
||||
}
|
||||
|
||||
Self::GetPage(resp) => {
|
||||
bytes.put_u8(Tag::GetPage as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.rel.spcnode);
|
||||
bytes.put_u32(resp.req.rel.dbnode);
|
||||
bytes.put_u32(resp.req.rel.relnode);
|
||||
bytes.put_u8(resp.req.rel.forknum);
|
||||
bytes.put_u32(resp.req.blkno);
|
||||
bytes.put(&resp.page[..])
|
||||
}
|
||||
|
||||
Self::Error(resp) => {
|
||||
bytes.put_u8(Tag::Error as u8);
|
||||
bytes.put_u64(resp.req.reqid);
|
||||
bytes.put_u64(resp.req.request_lsn.0);
|
||||
bytes.put_u64(resp.req.not_modified_since.0);
|
||||
bytes.put(resp.message.as_bytes());
|
||||
bytes.put_u8(0); // null terminator
|
||||
}
|
||||
Self::DbSize(resp) => {
|
||||
bytes.put_u8(Tag::DbSize as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u32(resp.req.dbnode);
|
||||
bytes.put_i64(resp.db_size);
|
||||
}
|
||||
|
||||
Self::GetSlruSegment(resp) => {
|
||||
bytes.put_u8(Tag::GetSlruSegment as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u8(resp.req.kind);
|
||||
bytes.put_u32(resp.req.segno);
|
||||
bytes.put_u32((resp.segment.len() / BLCKSZ) as u32);
|
||||
bytes.put(&resp.segment[..]);
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(resp) => {
|
||||
bytes.put_u8(Tag::Test as u8);
|
||||
bytes.put_u64(resp.req.hdr.reqid);
|
||||
bytes.put_u64(resp.req.hdr.request_lsn.0);
|
||||
bytes.put_u64(resp.req.hdr.not_modified_since.0);
|
||||
bytes.put_u64(resp.req.batch_key);
|
||||
let message = resp.req.message.as_bytes();
|
||||
bytes.put_u64(message.len() as u64);
|
||||
bytes.put_slice(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
bytes.into()
|
||||
}
|
||||
|
||||
pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
|
||||
let mut buf = buf.reader();
|
||||
let msg_tag = buf.read_u8()?;
|
||||
|
||||
use PagestreamBeMessageTag as Tag;
|
||||
let ok =
|
||||
match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
|
||||
Tag::Exists => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let exists = buf.read_u8()? != 0;
|
||||
Self::Exists(PagestreamExistsResponse {
|
||||
req: PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
exists,
|
||||
})
|
||||
}
|
||||
Tag::Nblocks => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
Self::Nblocks(PagestreamNblocksResponse {
|
||||
req: PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
},
|
||||
n_blocks,
|
||||
})
|
||||
}
|
||||
Tag::GetPage => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let rel = RelTag {
|
||||
spcnode: buf.read_u32::<BigEndian>()?,
|
||||
dbnode: buf.read_u32::<BigEndian>()?,
|
||||
relnode: buf.read_u32::<BigEndian>()?,
|
||||
forknum: buf.read_u8()?,
|
||||
};
|
||||
let blkno = buf.read_u32::<BigEndian>()?;
|
||||
let mut page = vec![0; 8192]; // TODO: use MaybeUninit
|
||||
buf.read_exact(&mut page)?;
|
||||
Self::GetPage(PagestreamGetPageResponse {
|
||||
req: PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
rel,
|
||||
blkno,
|
||||
},
|
||||
page: page.into(),
|
||||
})
|
||||
}
|
||||
Tag::Error => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let mut msg = Vec::new();
|
||||
buf.read_until(0, &mut msg)?;
|
||||
let cstring = std::ffi::CString::from_vec_with_nul(msg)?;
|
||||
let rust_str = cstring.to_str()?;
|
||||
Self::Error(PagestreamErrorResponse {
|
||||
req: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
message: rust_str.to_owned(),
|
||||
})
|
||||
}
|
||||
Tag::DbSize => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let dbnode = buf.read_u32::<BigEndian>()?;
|
||||
let db_size = buf.read_i64::<BigEndian>()?;
|
||||
Self::DbSize(PagestreamDbSizeResponse {
|
||||
req: PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
dbnode,
|
||||
},
|
||||
db_size,
|
||||
})
|
||||
}
|
||||
Tag::GetSlruSegment => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let kind = buf.read_u8()?;
|
||||
let segno = buf.read_u32::<BigEndian>()?;
|
||||
let n_blocks = buf.read_u32::<BigEndian>()?;
|
||||
let mut segment = vec![0; n_blocks as usize * BLCKSZ];
|
||||
buf.read_exact(&mut segment)?;
|
||||
Self::GetSlruSegment(PagestreamGetSlruSegmentResponse {
|
||||
req: PagestreamGetSlruSegmentRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
kind,
|
||||
segno,
|
||||
},
|
||||
segment: segment.into(),
|
||||
})
|
||||
}
|
||||
#[cfg(feature = "testing")]
|
||||
Tag::Test => {
|
||||
let reqid = buf.read_u64::<BigEndian>()?;
|
||||
let request_lsn = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let not_modified_since = Lsn(buf.read_u64::<BigEndian>()?);
|
||||
let batch_key = buf.read_u64::<BigEndian>()?;
|
||||
let len = buf.read_u64::<BigEndian>()?;
|
||||
let mut msg = vec![0; len as usize];
|
||||
buf.read_exact(&mut msg)?;
|
||||
let message = String::from_utf8(msg)?;
|
||||
Self::Test(PagestreamTestResponse {
|
||||
req: PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid,
|
||||
request_lsn,
|
||||
not_modified_since,
|
||||
},
|
||||
batch_key,
|
||||
message,
|
||||
},
|
||||
})
|
||||
}
|
||||
};
|
||||
let remaining = buf.into_inner();
|
||||
if !remaining.is_empty() {
|
||||
anyhow::bail!(
|
||||
"remaining bytes in msg with tag={msg_tag}: {}",
|
||||
remaining.len()
|
||||
);
|
||||
}
|
||||
Ok(ok)
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Exists(_) => "Exists",
|
||||
Self::Nblocks(_) => "Nblocks",
|
||||
Self::GetPage(_) => "GetPage",
|
||||
Self::Error(_) => "Error",
|
||||
Self::DbSize(_) => "DbSize",
|
||||
Self::GetSlruSegment(_) => "GetSlruSegment",
|
||||
#[cfg(feature = "testing")]
|
||||
Self::Test(_) => "Test",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_pagestream() {
|
||||
// Test serialization/deserialization of PagestreamFeMessage
|
||||
let messages = vec![
|
||||
PagestreamFeMessage::Exists(PagestreamExistsRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::Nblocks(PagestreamNblocksRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(4),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
}),
|
||||
PagestreamFeMessage::GetPage(PagestreamGetPageRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
rel: RelTag {
|
||||
forknum: 1,
|
||||
spcnode: 2,
|
||||
dbnode: 3,
|
||||
relnode: 4,
|
||||
},
|
||||
blkno: 7,
|
||||
}),
|
||||
PagestreamFeMessage::DbSize(PagestreamDbSizeRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(4),
|
||||
not_modified_since: Lsn(3),
|
||||
},
|
||||
dbnode: 7,
|
||||
}),
|
||||
];
|
||||
for msg in messages {
|
||||
let bytes = msg.serialize();
|
||||
let reconstructed =
|
||||
PagestreamFeMessage::parse(&mut bytes.reader(), PagestreamProtocolVersion::V3)
|
||||
.unwrap();
|
||||
assert!(msg == reconstructed);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,9 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt;
|
||||
|
||||
use postgres_ffi::Oid;
|
||||
use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
|
||||
use postgres_ffi::relfile_utils::{MAIN_FORKNUM, forkname_to_number, forknumber_to_name};
|
||||
use postgres_ffi_types::Oid;
|
||||
use postgres_ffi_types::constants::GLOBALTABLESPACE_OID;
|
||||
use postgres_ffi_types::forknum::{MAIN_FORKNUM, forkname_to_number, forknumber_to_name};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
///
|
||||
|
||||
@@ -35,7 +35,7 @@ use std::hash::{Hash, Hasher};
|
||||
|
||||
#[doc(inline)]
|
||||
pub use ::utils::shard::*;
|
||||
use postgres_ffi::relfile_utils::INIT_FORKNUM;
|
||||
use postgres_ffi_types::forknum::INIT_FORKNUM;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::key::Key;
|
||||
|
||||
@@ -23,22 +23,12 @@ pub struct ReAttachRequest {
|
||||
pub register: Option<NodeRegisterRequest>,
|
||||
}
|
||||
|
||||
fn default_mode() -> LocationConfigMode {
|
||||
LocationConfigMode::AttachedSingle
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct ReAttachResponseTenant {
|
||||
pub id: TenantShardId,
|
||||
/// Mandatory if LocationConfigMode is None or set to an Attached* mode
|
||||
pub r#gen: Option<u32>,
|
||||
|
||||
/// Default value only for backward compat: this field should be set
|
||||
#[serde(default = "default_mode")]
|
||||
pub mode: LocationConfigMode,
|
||||
|
||||
// Default value only for backward compat: this field should be set
|
||||
#[serde(default = "ShardStripeSize::default")]
|
||||
pub stripe_size: ShardStripeSize,
|
||||
}
|
||||
#[derive(Serialize, Deserialize)]
|
||||
|
||||
@@ -16,6 +16,7 @@ memoffset.workspace = true
|
||||
pprof.workspace = true
|
||||
thiserror.workspace = true
|
||||
serde.workspace = true
|
||||
postgres_ffi_types.workspace = true
|
||||
utils.workspace = true
|
||||
tracing.workspace = true
|
||||
|
||||
|
||||
@@ -11,11 +11,7 @@
|
||||
|
||||
use crate::{BLCKSZ, PageHeaderData};
|
||||
|
||||
//
|
||||
// From pg_tablespace_d.h
|
||||
//
|
||||
pub const DEFAULTTABLESPACE_OID: u32 = 1663;
|
||||
pub const GLOBALTABLESPACE_OID: u32 = 1664;
|
||||
// Note: There are a few more widely-used constants in the postgres_ffi_types::constants crate.
|
||||
|
||||
// From storage_xlog.h
|
||||
pub const XLOG_SMGR_CREATE: u8 = 0x10;
|
||||
|
||||
@@ -4,50 +4,7 @@
|
||||
use once_cell::sync::OnceCell;
|
||||
use regex::Regex;
|
||||
|
||||
//
|
||||
// Fork numbers, from relpath.h
|
||||
//
|
||||
pub const MAIN_FORKNUM: u8 = 0;
|
||||
pub const FSM_FORKNUM: u8 = 1;
|
||||
pub const VISIBILITYMAP_FORKNUM: u8 = 2;
|
||||
pub const INIT_FORKNUM: u8 = 3;
|
||||
|
||||
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
|
||||
pub enum FilePathError {
|
||||
#[error("invalid relation fork name")]
|
||||
InvalidForkName,
|
||||
#[error("invalid relation data file name")]
|
||||
InvalidFileName,
|
||||
}
|
||||
|
||||
impl From<core::num::ParseIntError> for FilePathError {
|
||||
fn from(_e: core::num::ParseIntError) -> Self {
|
||||
FilePathError::InvalidFileName
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert Postgres relation file's fork suffix to fork number.
|
||||
pub fn forkname_to_number(forkname: Option<&str>) -> Result<u8, FilePathError> {
|
||||
match forkname {
|
||||
// "main" is not in filenames, it's implicit if the fork name is not present
|
||||
None => Ok(MAIN_FORKNUM),
|
||||
Some("fsm") => Ok(FSM_FORKNUM),
|
||||
Some("vm") => Ok(VISIBILITYMAP_FORKNUM),
|
||||
Some("init") => Ok(INIT_FORKNUM),
|
||||
Some(_) => Err(FilePathError::InvalidForkName),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert Postgres fork number to the right suffix of the relation data file.
|
||||
pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
|
||||
match forknum {
|
||||
MAIN_FORKNUM => None,
|
||||
FSM_FORKNUM => Some("fsm"),
|
||||
VISIBILITYMAP_FORKNUM => Some("vm"),
|
||||
INIT_FORKNUM => Some("init"),
|
||||
_ => Some("UNKNOWN FORKNUM"),
|
||||
}
|
||||
}
|
||||
use postgres_ffi_types::forknum::*;
|
||||
|
||||
/// Parse a filename of a relation file. Returns (relfilenode, forknum, segno) tuple.
|
||||
///
|
||||
@@ -75,7 +32,9 @@ pub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {
|
||||
.ok_or(FilePathError::InvalidFileName)?;
|
||||
|
||||
let relnode_str = caps.name("relnode").unwrap().as_str();
|
||||
let relnode = relnode_str.parse::<u32>()?;
|
||||
let relnode = relnode_str
|
||||
.parse::<u32>()
|
||||
.map_err(|_e| FilePathError::InvalidFileName)?;
|
||||
|
||||
let forkname = caps.name("forkname").map(|f| f.as_str());
|
||||
let forknum = forkname_to_number(forkname)?;
|
||||
@@ -84,7 +43,11 @@ pub fn parse_relfilename(fname: &str) -> Result<(u32, u8, u32), FilePathError> {
|
||||
let segno = if segno_match.is_none() {
|
||||
0
|
||||
} else {
|
||||
segno_match.unwrap().as_str().parse::<u32>()?
|
||||
segno_match
|
||||
.unwrap()
|
||||
.as_str()
|
||||
.parse::<u32>()
|
||||
.map_err(|_e| FilePathError::InvalidFileName)?
|
||||
};
|
||||
|
||||
Ok((relnode, forknum, segno))
|
||||
|
||||
11
libs/postgres_ffi_types/Cargo.toml
Normal file
11
libs/postgres_ffi_types/Cargo.toml
Normal file
@@ -0,0 +1,11 @@
|
||||
[package]
|
||||
name = "postgres_ffi_types"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
thiserror.workspace = true
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
8
libs/postgres_ffi_types/src/constants.rs
Normal file
8
libs/postgres_ffi_types/src/constants.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
//! Misc constants, copied from PostgreSQL headers.
|
||||
//!
|
||||
//! Any constants included here must be the same in all PostgreSQL versions and unlikely to change
|
||||
//! in the future either!
|
||||
|
||||
// From pg_tablespace_d.h
|
||||
pub const DEFAULTTABLESPACE_OID: u32 = 1663;
|
||||
pub const GLOBALTABLESPACE_OID: u32 = 1664;
|
||||
36
libs/postgres_ffi_types/src/forknum.rs
Normal file
36
libs/postgres_ffi_types/src/forknum.rs
Normal file
@@ -0,0 +1,36 @@
|
||||
// Fork numbers, from relpath.h
|
||||
pub const MAIN_FORKNUM: u8 = 0;
|
||||
pub const FSM_FORKNUM: u8 = 1;
|
||||
pub const VISIBILITYMAP_FORKNUM: u8 = 2;
|
||||
pub const INIT_FORKNUM: u8 = 3;
|
||||
|
||||
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
|
||||
pub enum FilePathError {
|
||||
#[error("invalid relation fork name")]
|
||||
InvalidForkName,
|
||||
#[error("invalid relation data file name")]
|
||||
InvalidFileName,
|
||||
}
|
||||
|
||||
/// Convert Postgres relation file's fork suffix to fork number.
|
||||
pub fn forkname_to_number(forkname: Option<&str>) -> Result<u8, FilePathError> {
|
||||
match forkname {
|
||||
// "main" is not in filenames, it's implicit if the fork name is not present
|
||||
None => Ok(MAIN_FORKNUM),
|
||||
Some("fsm") => Ok(FSM_FORKNUM),
|
||||
Some("vm") => Ok(VISIBILITYMAP_FORKNUM),
|
||||
Some("init") => Ok(INIT_FORKNUM),
|
||||
Some(_) => Err(FilePathError::InvalidForkName),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert Postgres fork number to the right suffix of the relation data file.
|
||||
pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
|
||||
match forknum {
|
||||
MAIN_FORKNUM => None,
|
||||
FSM_FORKNUM => Some("fsm"),
|
||||
VISIBILITYMAP_FORKNUM => Some("vm"),
|
||||
INIT_FORKNUM => Some("init"),
|
||||
_ => Some("UNKNOWN FORKNUM"),
|
||||
}
|
||||
}
|
||||
13
libs/postgres_ffi_types/src/lib.rs
Normal file
13
libs/postgres_ffi_types/src/lib.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
//! This package contains some PostgreSQL constants and datatypes that are the same in all versions
|
||||
//! of PostgreSQL and unlikely to change in the future either. These could be derived from the
|
||||
//! PostgreSQL headers with 'bindgen', but in order to avoid proliferating the dependency to bindgen
|
||||
//! and the PostgreSQL C headers to all services, we prefer to have this small stand-alone crate for
|
||||
//! them instead.
|
||||
//!
|
||||
//! Be mindful in what you add here, as these types are deeply ingrained in the APIs.
|
||||
|
||||
pub mod constants;
|
||||
pub mod forknum;
|
||||
|
||||
pub type Oid = u32;
|
||||
pub type RepOriginId = u16;
|
||||
@@ -36,7 +36,10 @@ impl FeatureResolverBackgroundLoop {
|
||||
// Main loop of updating the feature flags.
|
||||
handle.spawn(
|
||||
async move {
|
||||
tracing::info!("Starting PostHog feature resolver");
|
||||
tracing::info!(
|
||||
"Starting PostHog feature resolver with refresh period: {:?}",
|
||||
refresh_period
|
||||
);
|
||||
let mut ticker = tokio::time::interval(refresh_period);
|
||||
ticker.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
|
||||
loop {
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use std::io;
|
||||
|
||||
use tokio::net::TcpStream;
|
||||
|
||||
use crate::client::SocketConfig;
|
||||
@@ -8,7 +6,7 @@ use crate::tls::MakeTlsConnect;
|
||||
use crate::{Error, cancel_query_raw, connect_socket};
|
||||
|
||||
pub(crate) async fn cancel_query<T>(
|
||||
config: Option<SocketConfig>,
|
||||
config: SocketConfig,
|
||||
ssl_mode: SslMode,
|
||||
tls: T,
|
||||
process_id: i32,
|
||||
@@ -17,16 +15,6 @@ pub(crate) async fn cancel_query<T>(
|
||||
where
|
||||
T: MakeTlsConnect<TcpStream>,
|
||||
{
|
||||
let config = match config {
|
||||
Some(config) => config,
|
||||
None => {
|
||||
return Err(Error::connect(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"unknown host",
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
let hostname = match &config.host {
|
||||
Host::Tcp(host) => &**host,
|
||||
};
|
||||
|
||||
@@ -7,11 +7,16 @@ use crate::config::SslMode;
|
||||
use crate::tls::{MakeTlsConnect, TlsConnect};
|
||||
use crate::{Error, cancel_query, cancel_query_raw};
|
||||
|
||||
/// The capability to request cancellation of in-progress queries on a
|
||||
/// connection.
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
/// A cancellation token that allows easy cancellation of a query.
|
||||
#[derive(Clone)]
|
||||
pub struct CancelToken {
|
||||
pub socket_config: Option<SocketConfig>,
|
||||
pub socket_config: SocketConfig,
|
||||
pub raw: RawCancelToken,
|
||||
}
|
||||
|
||||
/// A raw cancellation token that allows cancellation of a query, given a fresh connection to postgres.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RawCancelToken {
|
||||
pub ssl_mode: SslMode,
|
||||
pub process_id: i32,
|
||||
pub secret_key: i32,
|
||||
@@ -36,14 +41,16 @@ impl CancelToken {
|
||||
{
|
||||
cancel_query::cancel_query(
|
||||
self.socket_config.clone(),
|
||||
self.ssl_mode,
|
||||
self.raw.ssl_mode,
|
||||
tls,
|
||||
self.process_id,
|
||||
self.secret_key,
|
||||
self.raw.process_id,
|
||||
self.raw.secret_key,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
impl RawCancelToken {
|
||||
/// Like `cancel_query`, but uses a stream which is already connected to the server rather than opening a new
|
||||
/// connection itself.
|
||||
pub async fn cancel_query_raw<S, T>(&self, stream: S, tls: T) -> Result<(), Error>
|
||||
|
||||
@@ -12,6 +12,7 @@ use postgres_protocol2::message::frontend;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::cancel_token::RawCancelToken;
|
||||
use crate::codec::{BackendMessages, FrontendMessage};
|
||||
use crate::config::{Host, SslMode};
|
||||
use crate::query::RowStream;
|
||||
@@ -331,10 +332,12 @@ impl Client {
|
||||
/// connection associated with this client.
|
||||
pub fn cancel_token(&self) -> CancelToken {
|
||||
CancelToken {
|
||||
socket_config: Some(self.socket_config.clone()),
|
||||
ssl_mode: self.ssl_mode,
|
||||
process_id: self.process_id,
|
||||
secret_key: self.secret_key,
|
||||
socket_config: self.socket_config.clone(),
|
||||
raw: RawCancelToken {
|
||||
ssl_mode: self.ssl_mode,
|
||||
process_id: self.process_id,
|
||||
secret_key: self.secret_key,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
use postgres_protocol2::message::backend::ReadyForQueryBody;
|
||||
|
||||
pub use crate::cancel_token::CancelToken;
|
||||
pub use crate::cancel_token::{CancelToken, RawCancelToken};
|
||||
pub use crate::client::{Client, SocketConfig};
|
||||
pub use crate::config::Config;
|
||||
pub use crate::connect_raw::RawConnection;
|
||||
|
||||
@@ -87,6 +87,28 @@ pub enum RemoteStorageKind {
|
||||
AzureContainer(AzureConfig),
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
/// Version of RemoteStorageKind which deserializes with type: LocalFs | AwsS3 | AzureContainer
|
||||
/// Needed for endpoint storage service
|
||||
pub enum TypedRemoteStorageKind {
|
||||
LocalFs { local_path: Utf8PathBuf },
|
||||
AwsS3(S3Config),
|
||||
AzureContainer(AzureConfig),
|
||||
}
|
||||
|
||||
impl From<TypedRemoteStorageKind> for RemoteStorageKind {
|
||||
fn from(value: TypedRemoteStorageKind) -> Self {
|
||||
match value {
|
||||
TypedRemoteStorageKind::LocalFs { local_path } => {
|
||||
RemoteStorageKind::LocalFs { local_path }
|
||||
}
|
||||
TypedRemoteStorageKind::AwsS3(v) => RemoteStorageKind::AwsS3(v),
|
||||
TypedRemoteStorageKind::AzureContainer(v) => RemoteStorageKind::AzureContainer(v),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// AWS S3 bucket coordinates and access credentials to manage the bucket contents (read and write).
|
||||
#[derive(Clone, PartialEq, Eq, Deserialize, Serialize)]
|
||||
pub struct S3Config {
|
||||
|
||||
@@ -31,6 +31,7 @@ use anyhow::Context;
|
||||
pub use azure_core::Etag;
|
||||
use bytes::Bytes;
|
||||
use camino::{Utf8Path, Utf8PathBuf};
|
||||
pub use config::TypedRemoteStorageKind;
|
||||
pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
|
||||
use futures::StreamExt;
|
||||
use futures::stream::Stream;
|
||||
@@ -676,6 +677,15 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
|
||||
}
|
||||
|
||||
impl GenericRemoteStorage {
|
||||
pub async fn from_storage_kind(kind: TypedRemoteStorageKind) -> anyhow::Result<Self> {
|
||||
Self::from_config(&RemoteStorageConfig {
|
||||
storage: kind.into(),
|
||||
timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
|
||||
small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn from_config(storage_config: &RemoteStorageConfig) -> anyhow::Result<Self> {
|
||||
let timeout = storage_config.timeout;
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ bytes.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
prost.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
postgres_ffi_types.workspace = true
|
||||
serde.workspace = true
|
||||
thiserror.workspace = true
|
||||
tokio = { workspace = true, features = ["io-util"] }
|
||||
|
||||
@@ -8,8 +8,8 @@ use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use postgres_ffi::pg_constants;
|
||||
use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM;
|
||||
use postgres_ffi::walrecord::*;
|
||||
use postgres_ffi_types::forknum::VISIBILITYMAP_FORKNUM;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::models::*;
|
||||
|
||||
@@ -25,6 +25,9 @@
|
||||
//! |
|
||||
//! |--> write to KV store within the pageserver
|
||||
|
||||
pub mod record;
|
||||
pub mod value;
|
||||
|
||||
use bytes::Bytes;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use postgres_ffi::walrecord::{
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
use bytes::Bytes;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::record::NeonWalRecord;
|
||||
use crate::models::record::NeonWalRecord;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub enum Value {
|
||||
@@ -1,4 +1,4 @@
|
||||
//! This module implements batch type for serialized [`pageserver_api::value::Value`]
|
||||
//! This module implements batch type for serialized [`crate::models::value::Value`]
|
||||
//! instances. Each batch contains a raw buffer (serialized values)
|
||||
//! and a list of metadata for each (key, LSN) tuple present in the batch.
|
||||
//!
|
||||
@@ -10,10 +10,8 @@ use std::collections::{BTreeSet, HashMap};
|
||||
use bytes::{Bytes, BytesMut};
|
||||
use pageserver_api::key::{CompactKey, Key, rel_block_to_key};
|
||||
use pageserver_api::keyspace::KeySpace;
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::reltag::RelTag;
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use pageserver_api::value::Value;
|
||||
use postgres_ffi::walrecord::{DecodedBkpBlock, DecodedWALRecord};
|
||||
use postgres_ffi::{BLCKSZ, page_is_new, page_set_lsn, pg_constants};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -21,6 +19,8 @@ use utils::bin_ser::BeSer;
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
use crate::models::InterpretedWalRecord;
|
||||
use crate::models::record::NeonWalRecord;
|
||||
use crate::models::value::Value;
|
||||
|
||||
static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);
|
||||
|
||||
|
||||
@@ -13,22 +13,24 @@ fn main() -> anyhow::Result<()> {
|
||||
// Tell cargo to invalidate the built crate whenever the wrapper changes
|
||||
println!("cargo:rerun-if-changed=bindgen_deps.h");
|
||||
|
||||
let root_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
|
||||
|
||||
// Finding the location of built libraries and Postgres C headers:
|
||||
// - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/pg_install`
|
||||
// - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/pg_install/{PG_MAJORVERSION}/include/postgresql/server`
|
||||
let pg_install_dir = if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR") {
|
||||
postgres_install_dir.into()
|
||||
} else {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../pg_install")
|
||||
root_path.join("pg_install")
|
||||
};
|
||||
|
||||
let pg_install_abs = std::fs::canonicalize(pg_install_dir)?;
|
||||
let walproposer_lib_dir = pg_install_abs.join("build/walproposer-lib");
|
||||
let walproposer_lib_dir = root_path.join("build/walproposer-lib");
|
||||
let walproposer_lib_search_str = walproposer_lib_dir
|
||||
.to_str()
|
||||
.ok_or(anyhow!("Bad non-UTF path"))?;
|
||||
|
||||
let pgxn_neon = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../pgxn/neon");
|
||||
let pgxn_neon = root_path.join("pgxn/neon");
|
||||
let pgxn_neon = std::fs::canonicalize(pgxn_neon)?;
|
||||
let pgxn_neon = pgxn_neon.to_str().ok_or(anyhow!("Bad non-UTF path"))?;
|
||||
|
||||
|
||||
@@ -311,7 +311,7 @@ extern "C" fn get_redo_start_lsn(wp: *mut WalProposer) -> XLogRecPtr {
|
||||
}
|
||||
}
|
||||
|
||||
extern "C-unwind" fn finish_sync_safekeepers(wp: *mut WalProposer, lsn: XLogRecPtr) {
|
||||
unsafe extern "C-unwind" fn finish_sync_safekeepers(wp: *mut WalProposer, lsn: XLogRecPtr) -> ! {
|
||||
unsafe {
|
||||
let callback_data = (*(*wp).config).callback_data;
|
||||
let api = callback_data as *mut Box<dyn ApiImpl>;
|
||||
|
||||
@@ -144,7 +144,7 @@ pub trait ApiImpl {
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn finish_sync_safekeepers(&self, _lsn: u64) {
|
||||
fn finish_sync_safekeepers(&self, _lsn: u64) -> ! {
|
||||
todo!()
|
||||
}
|
||||
|
||||
@@ -469,7 +469,7 @@ mod tests {
|
||||
true
|
||||
}
|
||||
|
||||
fn finish_sync_safekeepers(&self, lsn: u64) {
|
||||
fn finish_sync_safekeepers(&self, lsn: u64) -> ! {
|
||||
self.sync_channel.send(lsn).unwrap();
|
||||
panic!("sync safekeepers finished at lsn={}", lsn);
|
||||
}
|
||||
|
||||
@@ -12,6 +12,9 @@ testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing", "
|
||||
|
||||
fuzz-read-path = ["testing"]
|
||||
|
||||
# Enables benchmarking only APIs
|
||||
benchmarking = []
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
arc-swap.workspace = true
|
||||
@@ -56,6 +59,7 @@ pin-project-lite.workspace = true
|
||||
postgres_backend.workspace = true
|
||||
postgres_connection.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
postgres_ffi_types.workspace = true
|
||||
postgres_initdb.workspace = true
|
||||
postgres-protocol.workspace = true
|
||||
postgres-types.workspace = true
|
||||
@@ -126,6 +130,7 @@ harness = false
|
||||
[[bench]]
|
||||
name = "bench_ingest"
|
||||
harness = false
|
||||
required-features = ["benchmarking"]
|
||||
|
||||
[[bench]]
|
||||
name = "upload_queue"
|
||||
|
||||
@@ -1,23 +1,30 @@
|
||||
use std::env;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::Arc;
|
||||
|
||||
use bytes::Bytes;
|
||||
use camino::Utf8PathBuf;
|
||||
use criterion::{Criterion, criterion_group, criterion_main};
|
||||
use futures::stream::FuturesUnordered;
|
||||
use pageserver::config::PageServerConf;
|
||||
use pageserver::context::{DownloadBehavior, RequestContext};
|
||||
use pageserver::keyspace::KeySpace;
|
||||
use pageserver::l0_flush::{L0FlushConfig, L0FlushGlobalState};
|
||||
use pageserver::task_mgr::TaskKind;
|
||||
use pageserver::tenant::storage_layer::InMemoryLayer;
|
||||
use pageserver::tenant::storage_layer::IoConcurrency;
|
||||
use pageserver::tenant::storage_layer::{InMemoryLayer, ValuesReconstructState};
|
||||
use pageserver::{page_cache, virtual_file};
|
||||
use pageserver_api::config::GetVectoredConcurrentIo;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::models::virtual_file::IoMode;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_api::value::Value;
|
||||
use strum::IntoEnumIterator;
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use utils::bin_ser::BeSer;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::sync::gate::Gate;
|
||||
use wal_decoder::models::value::Value;
|
||||
use wal_decoder::serialized_batch::SerializedValueBatch;
|
||||
|
||||
// A very cheap hash for generating non-sequential keys.
|
||||
@@ -30,7 +37,7 @@ fn murmurhash32(mut h: u32) -> u32 {
|
||||
h
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Copy, Debug)]
|
||||
#[derive(serde::Serialize, Clone, Copy, Debug, PartialEq)]
|
||||
enum KeyLayout {
|
||||
/// Sequential unique keys
|
||||
Sequential,
|
||||
@@ -40,19 +47,30 @@ enum KeyLayout {
|
||||
RandomReuse(u32),
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Copy, Debug)]
|
||||
#[derive(serde::Serialize, Clone, Copy, Debug, PartialEq)]
|
||||
enum WriteDelta {
|
||||
Yes,
|
||||
No,
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Copy, Debug, PartialEq)]
|
||||
enum ConcurrentReads {
|
||||
Yes,
|
||||
No,
|
||||
}
|
||||
|
||||
async fn ingest(
|
||||
conf: &'static PageServerConf,
|
||||
put_size: usize,
|
||||
put_count: usize,
|
||||
key_layout: KeyLayout,
|
||||
write_delta: WriteDelta,
|
||||
concurrent_reads: ConcurrentReads,
|
||||
) -> anyhow::Result<()> {
|
||||
if concurrent_reads == ConcurrentReads::Yes {
|
||||
assert_eq!(key_layout, KeyLayout::Sequential);
|
||||
}
|
||||
|
||||
let mut lsn = utils::lsn::Lsn(1000);
|
||||
let mut key = Key::from_i128(0x0);
|
||||
|
||||
@@ -68,16 +86,18 @@ async fn ingest(
|
||||
let gate = utils::sync::gate::Gate::default();
|
||||
let cancel = CancellationToken::new();
|
||||
|
||||
let layer = InMemoryLayer::create(
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_shard_id,
|
||||
lsn,
|
||||
&gate,
|
||||
&cancel,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
let layer = Arc::new(
|
||||
InMemoryLayer::create(
|
||||
conf,
|
||||
timeline_id,
|
||||
tenant_shard_id,
|
||||
lsn,
|
||||
&gate,
|
||||
&cancel,
|
||||
&ctx,
|
||||
)
|
||||
.await?,
|
||||
);
|
||||
|
||||
let data = Value::Image(Bytes::from(vec![0u8; put_size]));
|
||||
let data_ser_size = data.serialized_size().unwrap() as usize;
|
||||
@@ -86,6 +106,61 @@ async fn ingest(
|
||||
pageserver::context::DownloadBehavior::Download,
|
||||
);
|
||||
|
||||
const READ_BATCH_SIZE: u32 = 32;
|
||||
let (tx, mut rx) = tokio::sync::watch::channel::<Option<Key>>(None);
|
||||
let reader_cancel = CancellationToken::new();
|
||||
let reader_handle = if concurrent_reads == ConcurrentReads::Yes {
|
||||
Some(tokio::task::spawn({
|
||||
let cancel = reader_cancel.clone();
|
||||
let layer = layer.clone();
|
||||
let ctx = ctx.attached_child();
|
||||
async move {
|
||||
let gate = Gate::default();
|
||||
let gate_guard = gate.enter().unwrap();
|
||||
let io_concurrency = IoConcurrency::spawn_from_conf(
|
||||
GetVectoredConcurrentIo::SidecarTask,
|
||||
gate_guard,
|
||||
);
|
||||
|
||||
rx.wait_for(|key| key.is_some()).await.unwrap();
|
||||
|
||||
while !cancel.is_cancelled() {
|
||||
let key = match *rx.borrow() {
|
||||
Some(some) => some,
|
||||
None => unreachable!(),
|
||||
};
|
||||
|
||||
let mut start_key = key;
|
||||
start_key.field6 = key.field6.saturating_sub(READ_BATCH_SIZE);
|
||||
let key_range = start_key..key.next();
|
||||
|
||||
let mut reconstruct_state = ValuesReconstructState::new(io_concurrency.clone());
|
||||
|
||||
layer
|
||||
.get_values_reconstruct_data(
|
||||
KeySpace::single(key_range),
|
||||
Lsn(1)..Lsn(u64::MAX),
|
||||
&mut reconstruct_state,
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let mut collect_futs = std::mem::take(&mut reconstruct_state.keys)
|
||||
.into_values()
|
||||
.map(|state| state.sink_pending_ios())
|
||||
.collect::<FuturesUnordered<_>>();
|
||||
while collect_futs.next().await.is_some() {}
|
||||
}
|
||||
|
||||
drop(io_concurrency);
|
||||
gate.close().await;
|
||||
}
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
const BATCH_SIZE: usize = 16;
|
||||
let mut batch = Vec::new();
|
||||
|
||||
@@ -113,19 +188,27 @@ async fn ingest(
|
||||
|
||||
batch.push((key.to_compact(), lsn, data_ser_size, data.clone()));
|
||||
if batch.len() >= BATCH_SIZE {
|
||||
let last_key = Key::from_compact(batch.last().unwrap().0);
|
||||
|
||||
let this_batch = std::mem::take(&mut batch);
|
||||
let serialized = SerializedValueBatch::from_values(this_batch);
|
||||
layer.put_batch(serialized, &ctx).await?;
|
||||
|
||||
tx.send(Some(last_key)).unwrap();
|
||||
}
|
||||
}
|
||||
if !batch.is_empty() {
|
||||
let last_key = Key::from_compact(batch.last().unwrap().0);
|
||||
|
||||
let this_batch = std::mem::take(&mut batch);
|
||||
let serialized = SerializedValueBatch::from_values(this_batch);
|
||||
layer.put_batch(serialized, &ctx).await?;
|
||||
|
||||
tx.send(Some(last_key)).unwrap();
|
||||
}
|
||||
layer.freeze(lsn + 1).await;
|
||||
|
||||
if matches!(write_delta, WriteDelta::Yes) {
|
||||
if write_delta == WriteDelta::Yes {
|
||||
let l0_flush_state = L0FlushGlobalState::new(L0FlushConfig::Direct {
|
||||
max_concurrency: NonZeroUsize::new(1).unwrap(),
|
||||
});
|
||||
@@ -136,6 +219,11 @@ async fn ingest(
|
||||
tokio::fs::remove_file(path).await?;
|
||||
}
|
||||
|
||||
reader_cancel.cancel();
|
||||
if let Some(handle) = reader_handle {
|
||||
handle.await.unwrap();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -147,6 +235,7 @@ fn ingest_main(
|
||||
put_count: usize,
|
||||
key_layout: KeyLayout,
|
||||
write_delta: WriteDelta,
|
||||
concurrent_reads: ConcurrentReads,
|
||||
) {
|
||||
pageserver::virtual_file::set_io_mode(io_mode);
|
||||
|
||||
@@ -156,7 +245,15 @@ fn ingest_main(
|
||||
.unwrap();
|
||||
|
||||
runtime.block_on(async move {
|
||||
let r = ingest(conf, put_size, put_count, key_layout, write_delta).await;
|
||||
let r = ingest(
|
||||
conf,
|
||||
put_size,
|
||||
put_count,
|
||||
key_layout,
|
||||
write_delta,
|
||||
concurrent_reads,
|
||||
)
|
||||
.await;
|
||||
if let Err(e) = r {
|
||||
panic!("{e:?}");
|
||||
}
|
||||
@@ -195,6 +292,7 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
key_size: usize,
|
||||
key_layout: KeyLayout,
|
||||
write_delta: WriteDelta,
|
||||
concurrent_reads: ConcurrentReads,
|
||||
}
|
||||
#[derive(Clone)]
|
||||
struct HandPickedParameters {
|
||||
@@ -245,7 +343,7 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
];
|
||||
let exploded_parameters = {
|
||||
let mut out = Vec::new();
|
||||
for io_mode in IoMode::iter() {
|
||||
for concurrent_reads in [ConcurrentReads::Yes, ConcurrentReads::No] {
|
||||
for param in expect.clone() {
|
||||
let HandPickedParameters {
|
||||
volume_mib,
|
||||
@@ -253,12 +351,18 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
key_layout,
|
||||
write_delta,
|
||||
} = param;
|
||||
|
||||
if key_layout != KeyLayout::Sequential && concurrent_reads == ConcurrentReads::Yes {
|
||||
continue;
|
||||
}
|
||||
|
||||
out.push(ExplodedParameters {
|
||||
io_mode,
|
||||
io_mode: IoMode::DirectRw,
|
||||
volume_mib,
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
concurrent_reads,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -272,9 +376,10 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
concurrent_reads,
|
||||
} = self;
|
||||
format!(
|
||||
"io_mode={io_mode:?} volume_mib={volume_mib:?} key_size_bytes={key_size:?} key_layout={key_layout:?} write_delta={write_delta:?}"
|
||||
"io_mode={io_mode:?} volume_mib={volume_mib:?} key_size_bytes={key_size:?} key_layout={key_layout:?} write_delta={write_delta:?} concurrent_reads={concurrent_reads:?}"
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -287,12 +392,23 @@ fn criterion_benchmark(c: &mut Criterion) {
|
||||
key_size,
|
||||
key_layout,
|
||||
write_delta,
|
||||
concurrent_reads,
|
||||
} = params;
|
||||
let put_count = volume_mib * 1024 * 1024 / key_size;
|
||||
group.throughput(criterion::Throughput::Bytes((key_size * put_count) as u64));
|
||||
group.sample_size(10);
|
||||
group.bench_function(id, |b| {
|
||||
b.iter(|| ingest_main(conf, io_mode, key_size, put_count, key_layout, write_delta))
|
||||
b.iter(|| {
|
||||
ingest_main(
|
||||
conf,
|
||||
io_mode,
|
||||
key_size,
|
||||
put_count,
|
||||
key_layout,
|
||||
write_delta,
|
||||
concurrent_reads,
|
||||
)
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,12 +67,12 @@ use once_cell::sync::Lazy;
|
||||
use pageserver::config::PageServerConf;
|
||||
use pageserver::walredo::{PostgresRedoManager, RedoAttemptType};
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use tokio::sync::Barrier;
|
||||
use tokio::task::JoinSet;
|
||||
use utils::id::TenantId;
|
||||
use utils::lsn::Lsn;
|
||||
use wal_decoder::models::record::NeonWalRecord;
|
||||
|
||||
fn bench(c: &mut Criterion) {
|
||||
macro_rules! bench_group {
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::sync::{Arc, Mutex};
|
||||
|
||||
use futures::stream::{SplitSink, SplitStream};
|
||||
use futures::{SinkExt, StreamExt};
|
||||
use pageserver_api::models::{
|
||||
use pageserver_api::pagestream_api::{
|
||||
PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
|
||||
};
|
||||
use pageserver_api::reltag::RelTag;
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
//!
|
||||
//! # local timeline dir
|
||||
//! ls test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE | \
|
||||
//! grep "__" | cargo run --release --bin pagectl draw-timeline-dir > out.svg
|
||||
//! grep "__" | cargo run --release --bin pagectl draw-timeline > out.svg
|
||||
//!
|
||||
//! # Layer map dump from `/v1/tenant/$TENANT/timeline/$TIMELINE/layer`
|
||||
//! (jq -r '.historic_layers[] | .layer_file_name' | cargo run -p pagectl draw-timeline) < layer-map.json > out.svg
|
||||
@@ -81,7 +81,11 @@ fn build_coordinate_compression_map<T: Ord + Copy>(coords: Vec<T>) -> BTreeMap<T
|
||||
fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
|
||||
let split: Vec<&str> = name.split("__").collect();
|
||||
let keys: Vec<&str> = split[0].split('-').collect();
|
||||
let mut lsns: Vec<&str> = split[1].split('-').collect();
|
||||
|
||||
// Remove the temporary file extension, e.g., remove the `.d20a.___temp` part from the following filename:
|
||||
// 000000067F000040490000404A00441B0000-000000067F000040490000404A00441B4000__000043483A34CE00.d20a.___temp
|
||||
let lsns = split[1].split('.').collect::<Vec<&str>>()[0];
|
||||
let mut lsns: Vec<&str> = lsns.split('-').collect();
|
||||
|
||||
// The current format of the layer file name: 000000067F0000000400000B150100000000-000000067F0000000400000D350100000000__00000000014B7AC8-v1-00000001
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ use pageserver::{page_cache, virtual_file};
|
||||
use pageserver_api::key::Key;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use crate::layer_map_analyzer::parse_filename;
|
||||
use crate::layer_map_analyzer::{LayerFile, parse_filename};
|
||||
|
||||
#[derive(Subcommand)]
|
||||
pub(crate) enum LayerCmd {
|
||||
@@ -38,6 +38,8 @@ pub(crate) enum LayerCmd {
|
||||
/// The id from list-layer command
|
||||
id: usize,
|
||||
},
|
||||
/// Dump all information of a layer file locally
|
||||
DumpLayerLocal { path: PathBuf },
|
||||
RewriteSummary {
|
||||
layer_file_path: Utf8PathBuf,
|
||||
#[clap(long)]
|
||||
@@ -131,15 +133,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
}
|
||||
|
||||
for (idx, layer_file) in to_print {
|
||||
println!(
|
||||
"[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}",
|
||||
idx,
|
||||
layer_file.key_range.start,
|
||||
layer_file.key_range.end,
|
||||
layer_file.lsn_range.start,
|
||||
layer_file.lsn_range.end,
|
||||
layer_file.is_delta,
|
||||
);
|
||||
print_layer_file(idx, &layer_file);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -159,16 +153,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
let layer = layer?;
|
||||
if let Ok(layer_file) = parse_filename(&layer.file_name().into_string().unwrap()) {
|
||||
if *id == idx {
|
||||
// TODO(chi): dedup code
|
||||
println!(
|
||||
"[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}",
|
||||
idx,
|
||||
layer_file.key_range.start,
|
||||
layer_file.key_range.end,
|
||||
layer_file.lsn_range.start,
|
||||
layer_file.lsn_range.end,
|
||||
layer_file.is_delta,
|
||||
);
|
||||
print_layer_file(idx, &layer_file);
|
||||
|
||||
if layer_file.is_delta {
|
||||
read_delta_file(layer.path(), &ctx).await?;
|
||||
@@ -183,6 +168,18 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
LayerCmd::DumpLayerLocal { path } => {
|
||||
if let Ok(layer_file) = parse_filename(path.file_name().unwrap().to_str().unwrap()) {
|
||||
print_layer_file(0, &layer_file);
|
||||
|
||||
if layer_file.is_delta {
|
||||
read_delta_file(path, &ctx).await?;
|
||||
} else {
|
||||
read_image_file(path, &ctx).await?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
LayerCmd::RewriteSummary {
|
||||
layer_file_path,
|
||||
new_tenant_id,
|
||||
@@ -247,3 +244,15 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn print_layer_file(idx: usize, layer_file: &LayerFile) {
|
||||
println!(
|
||||
"[{:3}] key:{}-{}\n lsn:{}-{}\n delta:{}",
|
||||
idx,
|
||||
layer_file.key_range.start,
|
||||
layer_file.key_range.end,
|
||||
layer_file.lsn_range.start,
|
||||
layer_file.lsn_range.end,
|
||||
layer_file.is_delta,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -5,11 +5,16 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
bytes.workspace = true
|
||||
futures.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
postgres_ffi.workspace = true
|
||||
prost.workspace = true
|
||||
strum.workspace = true
|
||||
strum_macros.workspace = true
|
||||
thiserror.workspace = true
|
||||
tokio.workspace = true
|
||||
tonic.workspace = true
|
||||
utils.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
|
||||
@@ -102,12 +102,14 @@ message CheckRelExistsResponse {
|
||||
bool exists = 1;
|
||||
}
|
||||
|
||||
// Requests a base backup at a given LSN.
|
||||
// Requests a base backup.
|
||||
message GetBaseBackupRequest {
|
||||
// The LSN to fetch a base backup at.
|
||||
ReadLsn read_lsn = 1;
|
||||
// The LSN to fetch the base backup at. 0 or absent means the latest LSN known to the Pageserver.
|
||||
uint64 lsn = 1;
|
||||
// If true, logical replication slots will not be created.
|
||||
bool replica = 2;
|
||||
// If true, include relation files in the base backup. Mainly for debugging and tests.
|
||||
bool full = 3;
|
||||
}
|
||||
|
||||
// Base backup response chunk, returned as an ordered stream.
|
||||
|
||||
200
pageserver/page_api/src/client.rs
Normal file
200
pageserver/page_api/src/client.rs
Normal file
@@ -0,0 +1,200 @@
|
||||
use std::convert::TryInto;
|
||||
|
||||
use bytes::Bytes;
|
||||
use futures::TryStreamExt;
|
||||
use futures::{Stream, StreamExt};
|
||||
use tonic::metadata::AsciiMetadataValue;
|
||||
use tonic::metadata::errors::InvalidMetadataValue;
|
||||
use tonic::transport::Channel;
|
||||
use tonic::{Request, Streaming};
|
||||
|
||||
use utils::id::TenantId;
|
||||
use utils::id::TimelineId;
|
||||
use utils::shard::ShardIndex;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
use crate::model;
|
||||
use crate::proto;
|
||||
|
||||
///
|
||||
/// AuthInterceptor adds tenant, timeline, and auth header to the channel. These
|
||||
/// headers are required at the pageserver.
|
||||
///
|
||||
#[derive(Clone)]
|
||||
struct AuthInterceptor {
|
||||
tenant_id: AsciiMetadataValue,
|
||||
timeline_id: AsciiMetadataValue,
|
||||
shard_id: AsciiMetadataValue,
|
||||
auth_header: Option<AsciiMetadataValue>, // including "Bearer " prefix
|
||||
}
|
||||
|
||||
impl AuthInterceptor {
|
||||
fn new(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
auth_token: Option<String>,
|
||||
shard_id: ShardIndex,
|
||||
) -> Result<Self, InvalidMetadataValue> {
|
||||
let tenant_ascii: AsciiMetadataValue = tenant_id.to_string().try_into()?;
|
||||
let timeline_ascii: AsciiMetadataValue = timeline_id.to_string().try_into()?;
|
||||
let shard_ascii: AsciiMetadataValue = shard_id.to_string().try_into()?;
|
||||
|
||||
let auth_header: Option<AsciiMetadataValue> = match auth_token {
|
||||
Some(token) => Some(format!("Bearer {token}").try_into()?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
tenant_id: tenant_ascii,
|
||||
shard_id: shard_ascii,
|
||||
timeline_id: timeline_ascii,
|
||||
auth_header,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl tonic::service::Interceptor for AuthInterceptor {
|
||||
fn call(&mut self, mut req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {
|
||||
req.metadata_mut()
|
||||
.insert("neon-tenant-id", self.tenant_id.clone());
|
||||
req.metadata_mut()
|
||||
.insert("neon-shard-id", self.shard_id.clone());
|
||||
req.metadata_mut()
|
||||
.insert("neon-timeline-id", self.timeline_id.clone());
|
||||
if let Some(auth_header) = &self.auth_header {
|
||||
req.metadata_mut()
|
||||
.insert("authorization", auth_header.clone());
|
||||
}
|
||||
Ok(req)
|
||||
}
|
||||
}
|
||||
#[derive(Clone)]
|
||||
pub struct Client {
|
||||
client: proto::PageServiceClient<
|
||||
tonic::service::interceptor::InterceptedService<Channel, AuthInterceptor>,
|
||||
>,
|
||||
}
|
||||
|
||||
impl Client {
|
||||
pub async fn new<T: TryInto<tonic::transport::Endpoint> + Send + Sync + 'static>(
|
||||
into_endpoint: T,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
shard_id: ShardIndex,
|
||||
auth_header: Option<String>,
|
||||
compression: Option<tonic::codec::CompressionEncoding>,
|
||||
) -> anyhow::Result<Self> {
|
||||
let endpoint: tonic::transport::Endpoint = into_endpoint
|
||||
.try_into()
|
||||
.map_err(|_e| anyhow::anyhow!("failed to convert endpoint"))?;
|
||||
let channel = endpoint.connect().await?;
|
||||
let auth = AuthInterceptor::new(tenant_id, timeline_id, auth_header, shard_id)
|
||||
.map_err(|e| anyhow::anyhow!(e.to_string()))?;
|
||||
let mut client = proto::PageServiceClient::with_interceptor(channel, auth);
|
||||
|
||||
if let Some(compression) = compression {
|
||||
// TODO: benchmark this (including network latency).
|
||||
// TODO: consider enabling compression by default.
|
||||
client = client
|
||||
.accept_compressed(compression)
|
||||
.send_compressed(compression);
|
||||
}
|
||||
|
||||
Ok(Self { client })
|
||||
}
|
||||
|
||||
/// Returns whether a relation exists.
|
||||
pub async fn check_rel_exists(
|
||||
&mut self,
|
||||
req: model::CheckRelExistsRequest,
|
||||
) -> Result<model::CheckRelExistsResponse, tonic::Status> {
|
||||
let proto_req = proto::CheckRelExistsRequest::from(req);
|
||||
|
||||
let response = self.client.check_rel_exists(proto_req).await?;
|
||||
|
||||
let proto_resp = response.into_inner();
|
||||
Ok(proto_resp.into())
|
||||
}
|
||||
|
||||
/// Fetches a base backup.
|
||||
pub async fn get_base_backup(
|
||||
&mut self,
|
||||
req: model::GetBaseBackupRequest,
|
||||
) -> Result<impl Stream<Item = Result<Bytes, tonic::Status>> + 'static, tonic::Status> {
|
||||
let proto_req = proto::GetBaseBackupRequest::from(req);
|
||||
|
||||
let response_stream: Streaming<proto::GetBaseBackupResponseChunk> =
|
||||
self.client.get_base_backup(proto_req).await?.into_inner();
|
||||
|
||||
// TODO: Consider dechunking internally
|
||||
let domain_stream = response_stream.map(|chunk_res| {
|
||||
chunk_res.and_then(|proto_chunk| {
|
||||
proto_chunk.try_into().map_err(|e| {
|
||||
tonic::Status::internal(format!("Failed to convert response chunk: {}", e))
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
Ok(domain_stream)
|
||||
}
|
||||
|
||||
/// Returns the total size of a database, as # of bytes.
|
||||
pub async fn get_db_size(
|
||||
&mut self,
|
||||
req: model::GetDbSizeRequest,
|
||||
) -> Result<u64, tonic::Status> {
|
||||
let proto_req = proto::GetDbSizeRequest::from(req);
|
||||
|
||||
let response = self.client.get_db_size(proto_req).await?;
|
||||
Ok(response.into_inner().into())
|
||||
}
|
||||
|
||||
/// Fetches pages.
|
||||
///
|
||||
/// This is implemented as a bidirectional streaming RPC for performance.
|
||||
/// Per-request errors are often returned as status_code instead of errors,
|
||||
/// to avoid tearing down the entire stream via tonic::Status.
|
||||
pub async fn get_pages<ReqSt>(
|
||||
&mut self,
|
||||
inbound: ReqSt,
|
||||
) -> Result<
|
||||
impl Stream<Item = Result<model::GetPageResponse, tonic::Status>> + Send + 'static,
|
||||
tonic::Status,
|
||||
>
|
||||
where
|
||||
ReqSt: Stream<Item = model::GetPageRequest> + Send + 'static,
|
||||
{
|
||||
let outbound_proto = inbound.map(|domain_req| domain_req.into());
|
||||
|
||||
let req_new = Request::new(outbound_proto);
|
||||
|
||||
let response_stream: Streaming<proto::GetPageResponse> =
|
||||
self.client.get_pages(req_new).await?.into_inner();
|
||||
|
||||
let domain_stream = response_stream.map_ok(model::GetPageResponse::from);
|
||||
|
||||
Ok(domain_stream)
|
||||
}
|
||||
|
||||
/// Returns the size of a relation, as # of blocks.
|
||||
pub async fn get_rel_size(
|
||||
&mut self,
|
||||
req: model::GetRelSizeRequest,
|
||||
) -> Result<model::GetRelSizeResponse, tonic::Status> {
|
||||
let proto_req = proto::GetRelSizeRequest::from(req);
|
||||
let response = self.client.get_rel_size(proto_req).await?;
|
||||
let proto_resp = response.into_inner();
|
||||
Ok(proto_resp.into())
|
||||
}
|
||||
|
||||
/// Fetches an SLRU segment.
|
||||
pub async fn get_slru_segment(
|
||||
&mut self,
|
||||
req: model::GetSlruSegmentRequest,
|
||||
) -> Result<model::GetSlruSegmentResponse, tonic::Status> {
|
||||
let proto_req = proto::GetSlruSegmentRequest::from(req);
|
||||
let response = self.client.get_slru_segment(proto_req).await?;
|
||||
Ok(response.into_inner().try_into()?)
|
||||
}
|
||||
}
|
||||
@@ -18,6 +18,8 @@ pub mod proto {
|
||||
pub use page_service_server::{PageService, PageServiceServer};
|
||||
}
|
||||
|
||||
mod client;
|
||||
pub use client::Client;
|
||||
mod model;
|
||||
|
||||
pub use model::*;
|
||||
|
||||
@@ -26,7 +26,7 @@ use utils::lsn::Lsn;
|
||||
use crate::proto;
|
||||
|
||||
/// A protocol error. Typically returned via try_from() or try_into().
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
#[derive(thiserror::Error, Clone, Debug)]
|
||||
pub enum ProtocolError {
|
||||
#[error("field '{0}' has invalid value '{1}'")]
|
||||
Invalid(&'static str, String),
|
||||
@@ -182,34 +182,33 @@ impl From<CheckRelExistsResponse> for proto::CheckRelExistsResponse {
|
||||
}
|
||||
}
|
||||
|
||||
/// Requests a base backup at a given LSN.
|
||||
/// Requests a base backup.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct GetBaseBackupRequest {
|
||||
/// The LSN to fetch a base backup at.
|
||||
pub read_lsn: ReadLsn,
|
||||
/// The LSN to fetch a base backup at. If None, uses the latest LSN known to the Pageserver.
|
||||
pub lsn: Option<Lsn>,
|
||||
/// If true, logical replication slots will not be created.
|
||||
pub replica: bool,
|
||||
/// If true, include relation files in the base backup. Mainly for debugging and tests.
|
||||
pub full: bool,
|
||||
}
|
||||
|
||||
impl TryFrom<proto::GetBaseBackupRequest> for GetBaseBackupRequest {
|
||||
type Error = ProtocolError;
|
||||
|
||||
fn try_from(pb: proto::GetBaseBackupRequest) -> Result<Self, Self::Error> {
|
||||
Ok(Self {
|
||||
read_lsn: pb
|
||||
.read_lsn
|
||||
.ok_or(ProtocolError::Missing("read_lsn"))?
|
||||
.try_into()?,
|
||||
impl From<proto::GetBaseBackupRequest> for GetBaseBackupRequest {
|
||||
fn from(pb: proto::GetBaseBackupRequest) -> Self {
|
||||
Self {
|
||||
lsn: (pb.lsn != 0).then_some(Lsn(pb.lsn)),
|
||||
replica: pb.replica,
|
||||
})
|
||||
full: pb.full,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GetBaseBackupRequest> for proto::GetBaseBackupRequest {
|
||||
fn from(request: GetBaseBackupRequest) -> Self {
|
||||
Self {
|
||||
read_lsn: Some(request.read_lsn.into()),
|
||||
lsn: request.lsn.unwrap_or_default().0,
|
||||
replica: request.replica,
|
||||
full: request.full,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -422,12 +421,45 @@ impl From<GetPageResponse> for proto::GetPageResponse {
|
||||
}
|
||||
}
|
||||
|
||||
impl GetPageResponse {
|
||||
/// Attempts to represent a tonic::Status as a GetPageResponse if appropriate. Returning a
|
||||
/// tonic::Status will terminate the GetPage stream, so per-request errors are emitted as a
|
||||
/// GetPageResponse with a non-OK status code instead.
|
||||
#[allow(clippy::result_large_err)]
|
||||
pub fn try_from_status(
|
||||
status: tonic::Status,
|
||||
request_id: RequestID,
|
||||
) -> Result<Self, tonic::Status> {
|
||||
// We shouldn't see an OK status here, because we're emitting an error.
|
||||
debug_assert_ne!(status.code(), tonic::Code::Ok);
|
||||
if status.code() == tonic::Code::Ok {
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"unexpected OK status: {status:?}",
|
||||
)));
|
||||
}
|
||||
|
||||
// If we can't convert the tonic::Code to a GetPageStatusCode, this is not a per-request
|
||||
// error and we should return a tonic::Status to terminate the stream.
|
||||
let Ok(status_code) = status.code().try_into() else {
|
||||
return Err(status);
|
||||
};
|
||||
|
||||
// Return a GetPageResponse for the status.
|
||||
Ok(Self {
|
||||
request_id,
|
||||
status_code,
|
||||
reason: Some(status.message().to_string()),
|
||||
page_images: Vec::new(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// A GetPage response status code.
|
||||
///
|
||||
/// These are effectively equivalent to gRPC statuses. However, we use a bidirectional stream
|
||||
/// (potentially shared by many backends), and a gRPC status response would terminate the stream so
|
||||
/// we send GetPageResponse messages with these codes instead.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
#[derive(Clone, Copy, Debug, PartialEq, strum_macros::Display)]
|
||||
pub enum GetPageStatusCode {
|
||||
/// Unknown status. For forwards compatibility: used when an older client version receives a new
|
||||
/// status code from a newer server version.
|
||||
@@ -485,8 +517,42 @@ impl From<GetPageStatusCode> for i32 {
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tonic::Code> for GetPageStatusCode {
|
||||
type Error = tonic::Code;
|
||||
|
||||
fn try_from(code: tonic::Code) -> Result<Self, Self::Error> {
|
||||
use tonic::Code;
|
||||
|
||||
let status_code = match code {
|
||||
Code::Ok => Self::Ok,
|
||||
|
||||
// These are per-request errors, which should be returned as GetPageResponses.
|
||||
Code::AlreadyExists => Self::InvalidRequest,
|
||||
Code::DataLoss => Self::InternalError,
|
||||
Code::FailedPrecondition => Self::InvalidRequest,
|
||||
Code::InvalidArgument => Self::InvalidRequest,
|
||||
Code::Internal => Self::InternalError,
|
||||
Code::NotFound => Self::NotFound,
|
||||
Code::OutOfRange => Self::InvalidRequest,
|
||||
Code::ResourceExhausted => Self::SlowDown,
|
||||
|
||||
// These should terminate the stream by returning a tonic::Status.
|
||||
Code::Aborted
|
||||
| Code::Cancelled
|
||||
| Code::DeadlineExceeded
|
||||
| Code::PermissionDenied
|
||||
| Code::Unauthenticated
|
||||
| Code::Unavailable
|
||||
| Code::Unimplemented
|
||||
| Code::Unknown => return Err(code),
|
||||
};
|
||||
Ok(status_code)
|
||||
}
|
||||
}
|
||||
|
||||
// Fetches the size of a relation at a given LSN, as # of blocks. Only valid on shard 0, other
|
||||
// shards will error.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct GetRelSizeRequest {
|
||||
pub read_lsn: ReadLsn,
|
||||
pub rel: RelTag,
|
||||
@@ -530,6 +596,7 @@ impl From<GetRelSizeResponse> for proto::GetRelSizeResponse {
|
||||
}
|
||||
|
||||
/// Requests an SLRU segment. Only valid on shard 0, other shards will error.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct GetSlruSegmentRequest {
|
||||
pub read_lsn: ReadLsn,
|
||||
pub kind: SlruKind,
|
||||
|
||||
@@ -25,6 +25,7 @@ tokio.workspace = true
|
||||
tokio-stream.workspace = true
|
||||
tokio-util.workspace = true
|
||||
tonic.workspace = true
|
||||
url.workspace = true
|
||||
|
||||
pageserver_client.workspace = true
|
||||
pageserver_api.workspace = true
|
||||
|
||||
@@ -1,20 +1,29 @@
|
||||
use std::collections::HashMap;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::ops::Range;
|
||||
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
|
||||
use std::pin::Pin;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::anyhow;
|
||||
use futures::TryStreamExt as _;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_client::mgmt_api::ForceAwaitLogicalSize;
|
||||
use pageserver_client::page_service::BasebackupRequest;
|
||||
use pageserver_page_api as page_api;
|
||||
use rand::prelude::*;
|
||||
use tokio::io::AsyncRead;
|
||||
use tokio::sync::Barrier;
|
||||
use tokio::task::JoinSet;
|
||||
use tokio_util::compat::{TokioAsyncReadCompatExt as _, TokioAsyncWriteCompatExt as _};
|
||||
use tokio_util::io::StreamReader;
|
||||
use tonic::async_trait;
|
||||
use tracing::{info, instrument};
|
||||
use url::Url;
|
||||
use utils::id::TenantTimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
use utils::shard::ShardIndex;
|
||||
|
||||
use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
|
||||
use crate::util::{request_stats, tokio_thread_local_stats};
|
||||
@@ -24,14 +33,15 @@ use crate::util::{request_stats, tokio_thread_local_stats};
|
||||
pub(crate) struct Args {
|
||||
#[clap(long, default_value = "http://localhost:9898")]
|
||||
mgmt_api_endpoint: String,
|
||||
#[clap(long, default_value = "postgres://postgres@localhost:64000")]
|
||||
/// The Pageserver to connect to. Use postgresql:// for libpq, or grpc:// for gRPC.
|
||||
#[clap(long, default_value = "postgresql://postgres@localhost:64000")]
|
||||
page_service_connstring: String,
|
||||
#[clap(long)]
|
||||
pageserver_jwt: Option<String>,
|
||||
#[clap(long, default_value = "1")]
|
||||
num_clients: NonZeroUsize,
|
||||
#[clap(long, default_value = "1.0")]
|
||||
gzip_probability: f64,
|
||||
#[clap(long)]
|
||||
no_compression: bool,
|
||||
#[clap(long)]
|
||||
runtime: Option<humantime::Duration>,
|
||||
#[clap(long)]
|
||||
@@ -146,12 +156,27 @@ async fn main_impl(
|
||||
|
||||
let mut work_senders = HashMap::new();
|
||||
let mut tasks = Vec::new();
|
||||
for tl in &timelines {
|
||||
let scheme = match Url::parse(&args.page_service_connstring) {
|
||||
Ok(url) => url.scheme().to_lowercase().to_string(),
|
||||
Err(url::ParseError::RelativeUrlWithoutBase) => "postgresql".to_string(),
|
||||
Err(err) => return Err(anyhow!("invalid connstring: {err}")),
|
||||
};
|
||||
for &tl in &timelines {
|
||||
let (sender, receiver) = tokio::sync::mpsc::channel(1); // TODO: not sure what the implications of this are
|
||||
work_senders.insert(tl, sender);
|
||||
tasks.push(tokio::spawn(client(
|
||||
args,
|
||||
*tl,
|
||||
|
||||
let client: Box<dyn Client> = match scheme.as_str() {
|
||||
"postgresql" | "postgres" => Box::new(
|
||||
LibpqClient::new(&args.page_service_connstring, tl, !args.no_compression).await?,
|
||||
),
|
||||
"grpc" => Box::new(
|
||||
GrpcClient::new(&args.page_service_connstring, tl, !args.no_compression).await?,
|
||||
),
|
||||
scheme => return Err(anyhow!("invalid scheme {scheme}")),
|
||||
};
|
||||
|
||||
tasks.push(tokio::spawn(run_worker(
|
||||
client,
|
||||
Arc::clone(&start_work_barrier),
|
||||
receiver,
|
||||
Arc::clone(&all_work_done_barrier),
|
||||
@@ -166,13 +191,7 @@ async fn main_impl(
|
||||
let mut rng = rand::thread_rng();
|
||||
let target = all_targets.choose(&mut rng).unwrap();
|
||||
let lsn = target.lsn_range.clone().map(|r| rng.gen_range(r));
|
||||
(
|
||||
target.timeline,
|
||||
Work {
|
||||
lsn,
|
||||
gzip: rng.gen_bool(args.gzip_probability),
|
||||
},
|
||||
)
|
||||
(target.timeline, Work { lsn })
|
||||
};
|
||||
let sender = work_senders.get(&timeline).unwrap();
|
||||
// TODO: what if this blocks?
|
||||
@@ -216,13 +235,11 @@ async fn main_impl(
|
||||
#[derive(Copy, Clone)]
|
||||
struct Work {
|
||||
lsn: Option<Lsn>,
|
||||
gzip: bool,
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
async fn client(
|
||||
args: &'static Args,
|
||||
timeline: TenantTimelineId,
|
||||
async fn run_worker(
|
||||
mut client: Box<dyn Client>,
|
||||
start_work_barrier: Arc<Barrier>,
|
||||
mut work: tokio::sync::mpsc::Receiver<Work>,
|
||||
all_work_done_barrier: Arc<Barrier>,
|
||||
@@ -230,37 +247,14 @@ async fn client(
|
||||
) {
|
||||
start_work_barrier.wait().await;
|
||||
|
||||
let client = pageserver_client::page_service::Client::new(args.page_service_connstring.clone())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
while let Some(Work { lsn, gzip }) = work.recv().await {
|
||||
while let Some(Work { lsn }) = work.recv().await {
|
||||
let start = Instant::now();
|
||||
let copy_out_stream = client
|
||||
.basebackup(&BasebackupRequest {
|
||||
tenant_id: timeline.tenant_id,
|
||||
timeline_id: timeline.timeline_id,
|
||||
lsn,
|
||||
gzip,
|
||||
})
|
||||
.await
|
||||
.with_context(|| format!("start basebackup for {timeline}"))
|
||||
.unwrap();
|
||||
let stream = client.basebackup(lsn).await.unwrap();
|
||||
|
||||
use futures::StreamExt;
|
||||
let size = Arc::new(AtomicUsize::new(0));
|
||||
copy_out_stream
|
||||
.for_each({
|
||||
|r| {
|
||||
let size = Arc::clone(&size);
|
||||
async move {
|
||||
let size = Arc::clone(&size);
|
||||
size.fetch_add(r.unwrap().len(), Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
})
|
||||
.await;
|
||||
info!("basebackup size is {} bytes", size.load(Ordering::Relaxed));
|
||||
let size = futures::io::copy(stream.compat(), &mut tokio::io::sink().compat_write())
|
||||
.await
|
||||
.unwrap();
|
||||
info!("basebackup size is {size} bytes");
|
||||
let elapsed = start.elapsed();
|
||||
live_stats.inc();
|
||||
STATS.with(|stats| {
|
||||
@@ -270,3 +264,94 @@ async fn client(
|
||||
|
||||
all_work_done_barrier.wait().await;
|
||||
}
|
||||
|
||||
/// A basebackup client. This allows switching out the client protocol implementation.
|
||||
#[async_trait]
|
||||
trait Client: Send {
|
||||
async fn basebackup(
|
||||
&mut self,
|
||||
lsn: Option<Lsn>,
|
||||
) -> anyhow::Result<Pin<Box<dyn AsyncRead + Send>>>;
|
||||
}
|
||||
|
||||
/// A libpq-based Pageserver client.
|
||||
struct LibpqClient {
|
||||
inner: pageserver_client::page_service::Client,
|
||||
ttid: TenantTimelineId,
|
||||
compression: bool,
|
||||
}
|
||||
|
||||
impl LibpqClient {
|
||||
async fn new(
|
||||
connstring: &str,
|
||||
ttid: TenantTimelineId,
|
||||
compression: bool,
|
||||
) -> anyhow::Result<Self> {
|
||||
Ok(Self {
|
||||
inner: pageserver_client::page_service::Client::new(connstring.to_string()).await?,
|
||||
ttid,
|
||||
compression,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Client for LibpqClient {
|
||||
async fn basebackup(
|
||||
&mut self,
|
||||
lsn: Option<Lsn>,
|
||||
) -> anyhow::Result<Pin<Box<dyn AsyncRead + Send + 'static>>> {
|
||||
let req = BasebackupRequest {
|
||||
tenant_id: self.ttid.tenant_id,
|
||||
timeline_id: self.ttid.timeline_id,
|
||||
lsn,
|
||||
gzip: self.compression,
|
||||
};
|
||||
let stream = self.inner.basebackup(&req).await?;
|
||||
Ok(Box::pin(StreamReader::new(
|
||||
stream.map_err(std::io::Error::other),
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
/// A gRPC Pageserver client.
|
||||
struct GrpcClient {
|
||||
inner: page_api::Client,
|
||||
}
|
||||
|
||||
impl GrpcClient {
|
||||
async fn new(
|
||||
connstring: &str,
|
||||
ttid: TenantTimelineId,
|
||||
compression: bool,
|
||||
) -> anyhow::Result<Self> {
|
||||
let inner = page_api::Client::new(
|
||||
connstring.to_string(),
|
||||
ttid.tenant_id,
|
||||
ttid.timeline_id,
|
||||
ShardIndex::unsharded(),
|
||||
None,
|
||||
compression.then_some(tonic::codec::CompressionEncoding::Zstd),
|
||||
)
|
||||
.await?;
|
||||
Ok(Self { inner })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Client for GrpcClient {
|
||||
async fn basebackup(
|
||||
&mut self,
|
||||
lsn: Option<Lsn>,
|
||||
) -> anyhow::Result<Pin<Box<dyn AsyncRead + Send + 'static>>> {
|
||||
let req = page_api::GetBaseBackupRequest {
|
||||
lsn,
|
||||
replica: false,
|
||||
full: false,
|
||||
};
|
||||
let stream = self.inner.get_base_backup(req).await?;
|
||||
Ok(Box::pin(StreamReader::new(
|
||||
stream.map_err(std::io::Error::other),
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,33 +10,31 @@ use anyhow::Context;
|
||||
use async_trait::async_trait;
|
||||
use bytes::Bytes;
|
||||
use camino::Utf8PathBuf;
|
||||
use futures::{Stream, StreamExt as _};
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::keyspace::KeySpaceAccum;
|
||||
use pageserver_api::models::{PagestreamGetPageRequest, PagestreamRequest};
|
||||
use pageserver_api::pagestream_api::{PagestreamGetPageRequest, PagestreamRequest};
|
||||
use pageserver_api::reltag::RelTag;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
use pageserver_page_api::proto;
|
||||
use pageserver_page_api as page_api;
|
||||
use rand::prelude::*;
|
||||
use tokio::task::JoinSet;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::info;
|
||||
use url::Url;
|
||||
use utils::id::TenantTimelineId;
|
||||
use utils::lsn::Lsn;
|
||||
use utils::shard::ShardIndex;
|
||||
|
||||
use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
|
||||
use crate::util::{request_stats, tokio_thread_local_stats};
|
||||
|
||||
#[derive(clap::ValueEnum, Clone, Debug)]
|
||||
enum Protocol {
|
||||
Libpq,
|
||||
Grpc,
|
||||
}
|
||||
|
||||
/// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
|
||||
#[derive(clap::Parser)]
|
||||
pub(crate) struct Args {
|
||||
#[clap(long, default_value = "http://localhost:9898")]
|
||||
mgmt_api_endpoint: String,
|
||||
/// Pageserver connection string. Supports postgresql:// and grpc:// protocols.
|
||||
#[clap(long, default_value = "postgres://postgres@localhost:64000")]
|
||||
page_service_connstring: String,
|
||||
#[clap(long)]
|
||||
@@ -45,8 +43,9 @@ pub(crate) struct Args {
|
||||
num_clients: NonZeroUsize,
|
||||
#[clap(long)]
|
||||
runtime: Option<humantime::Duration>,
|
||||
#[clap(long, value_enum, default_value = "libpq")]
|
||||
protocol: Protocol,
|
||||
/// If true, enable compression (only for gRPC).
|
||||
#[clap(long)]
|
||||
compression: bool,
|
||||
/// Each client sends requests at the given rate.
|
||||
///
|
||||
/// If a request takes too long and we should be issuing a new request already,
|
||||
@@ -325,18 +324,32 @@ async fn main_impl(
|
||||
.unwrap();
|
||||
|
||||
Box::pin(async move {
|
||||
let client: Box<dyn Client> = match args.protocol {
|
||||
Protocol::Libpq => Box::new(
|
||||
LibpqClient::new(args.page_service_connstring.clone(), worker_id.timeline)
|
||||
.await
|
||||
.unwrap(),
|
||||
let scheme = match Url::parse(&args.page_service_connstring) {
|
||||
Ok(url) => url.scheme().to_lowercase().to_string(),
|
||||
Err(url::ParseError::RelativeUrlWithoutBase) => "postgresql".to_string(),
|
||||
Err(err) => panic!("invalid connstring: {err}"),
|
||||
};
|
||||
let client: Box<dyn Client> = match scheme.as_str() {
|
||||
"postgresql" | "postgres" => {
|
||||
assert!(!args.compression, "libpq does not support compression");
|
||||
Box::new(
|
||||
LibpqClient::new(&args.page_service_connstring, worker_id.timeline)
|
||||
.await
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
"grpc" => Box::new(
|
||||
GrpcClient::new(
|
||||
&args.page_service_connstring,
|
||||
worker_id.timeline,
|
||||
args.compression,
|
||||
)
|
||||
.await
|
||||
.unwrap(),
|
||||
),
|
||||
|
||||
Protocol::Grpc => Box::new(
|
||||
GrpcClient::new(args.page_service_connstring.clone(), worker_id.timeline)
|
||||
.await
|
||||
.unwrap(),
|
||||
),
|
||||
scheme => panic!("unsupported scheme {scheme}"),
|
||||
};
|
||||
run_worker(args, client, ss, cancel, rps_period, ranges, weights).await
|
||||
})
|
||||
@@ -543,8 +556,8 @@ struct LibpqClient {
|
||||
}
|
||||
|
||||
impl LibpqClient {
|
||||
async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
|
||||
let inner = pageserver_client::page_service::Client::new(connstring)
|
||||
async fn new(connstring: &str, ttid: TenantTimelineId) -> anyhow::Result<Self> {
|
||||
let inner = pageserver_client::page_service::Client::new(connstring.to_string())
|
||||
.await?
|
||||
.pagestream(ttid.tenant_id, ttid.timeline_id)
|
||||
.await?;
|
||||
@@ -600,34 +613,36 @@ impl Client for LibpqClient {
|
||||
}
|
||||
}
|
||||
|
||||
/// A gRPC client using the raw, no-frills gRPC client.
|
||||
/// A gRPC Pageserver client.
|
||||
struct GrpcClient {
|
||||
req_tx: tokio::sync::mpsc::Sender<proto::GetPageRequest>,
|
||||
resp_rx: tonic::Streaming<proto::GetPageResponse>,
|
||||
req_tx: tokio::sync::mpsc::Sender<page_api::GetPageRequest>,
|
||||
resp_rx: Pin<Box<dyn Stream<Item = Result<page_api::GetPageResponse, tonic::Status>> + Send>>,
|
||||
}
|
||||
|
||||
impl GrpcClient {
|
||||
async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
|
||||
let mut client = pageserver_page_api::proto::PageServiceClient::connect(connstring).await?;
|
||||
async fn new(
|
||||
connstring: &str,
|
||||
ttid: TenantTimelineId,
|
||||
compression: bool,
|
||||
) -> anyhow::Result<Self> {
|
||||
let mut client = page_api::Client::new(
|
||||
connstring.to_string(),
|
||||
ttid.tenant_id,
|
||||
ttid.timeline_id,
|
||||
ShardIndex::unsharded(),
|
||||
None,
|
||||
compression.then_some(tonic::codec::CompressionEncoding::Zstd),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// The channel has a buffer size of 1, since 0 is not allowed. It does not matter, since the
|
||||
// benchmark will control the queue depth (i.e. in-flight requests) anyway, and requests are
|
||||
// buffered by Tonic and the OS too.
|
||||
let (req_tx, req_rx) = tokio::sync::mpsc::channel(1);
|
||||
let req_stream = tokio_stream::wrappers::ReceiverStream::new(req_rx);
|
||||
let mut req = tonic::Request::new(req_stream);
|
||||
let metadata = req.metadata_mut();
|
||||
metadata.insert("neon-tenant-id", ttid.tenant_id.to_string().try_into()?);
|
||||
metadata.insert("neon-timeline-id", ttid.timeline_id.to_string().try_into()?);
|
||||
metadata.insert("neon-shard-id", "0000".try_into()?);
|
||||
let resp_rx = Box::pin(client.get_pages(req_stream).await?);
|
||||
|
||||
let resp = client.get_pages(req).await?;
|
||||
let resp_stream = resp.into_inner();
|
||||
|
||||
Ok(Self {
|
||||
req_tx,
|
||||
resp_rx: resp_stream,
|
||||
})
|
||||
Ok(Self { req_tx, resp_rx })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -641,27 +656,27 @@ impl Client for GrpcClient {
|
||||
rel: RelTag,
|
||||
blks: Vec<u32>,
|
||||
) -> anyhow::Result<()> {
|
||||
let req = proto::GetPageRequest {
|
||||
let req = page_api::GetPageRequest {
|
||||
request_id: req_id,
|
||||
request_class: proto::GetPageClass::Normal as i32,
|
||||
read_lsn: Some(proto::ReadLsn {
|
||||
request_lsn: req_lsn.0,
|
||||
not_modified_since_lsn: mod_lsn.0,
|
||||
}),
|
||||
rel: Some(rel.into()),
|
||||
block_number: blks,
|
||||
request_class: page_api::GetPageClass::Normal,
|
||||
read_lsn: page_api::ReadLsn {
|
||||
request_lsn: req_lsn,
|
||||
not_modified_since_lsn: Some(mod_lsn),
|
||||
},
|
||||
rel,
|
||||
block_numbers: blks,
|
||||
};
|
||||
self.req_tx.send(req).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn recv_get_page(&mut self) -> anyhow::Result<(u64, Vec<Bytes>)> {
|
||||
let resp = self.resp_rx.message().await?.unwrap();
|
||||
let resp = self.resp_rx.next().await.unwrap().unwrap();
|
||||
anyhow::ensure!(
|
||||
resp.status_code == proto::GetPageStatusCode::Ok as i32,
|
||||
resp.status_code == page_api::GetPageStatusCode::Ok,
|
||||
"unexpected status code: {}",
|
||||
resp.status_code
|
||||
resp.status_code,
|
||||
);
|
||||
Ok((resp.request_id, resp.page_image))
|
||||
Ok((resp.request_id, resp.page_images))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,13 +18,12 @@ use bytes::{BufMut, Bytes, BytesMut};
|
||||
use fail::fail_point;
|
||||
use pageserver_api::key::{Key, rel_block_to_key};
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
use postgres_ffi::pg_constants::{
|
||||
DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID, PG_HBA, PGDATA_SPECIAL_FILES,
|
||||
};
|
||||
use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
|
||||
use postgres_ffi::pg_constants::{PG_HBA, PGDATA_SPECIAL_FILES};
|
||||
use postgres_ffi::{
|
||||
BLCKSZ, PG_TLI, RELSEG_SIZE, WAL_SEGMENT_SIZE, XLogFileName, dispatch_pgversion, pg_constants,
|
||||
};
|
||||
use postgres_ffi_types::constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
|
||||
use postgres_ffi_types::forknum::{INIT_FORKNUM, MAIN_FORKNUM};
|
||||
use tokio::io;
|
||||
use tokio::io::AsyncWrite;
|
||||
use tokio_tar::{Builder, EntryType, Header};
|
||||
@@ -372,6 +371,7 @@ where
|
||||
.partition(
|
||||
self.timeline.get_shard_identity(),
|
||||
self.timeline.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,
|
||||
BLCKSZ as u64,
|
||||
);
|
||||
|
||||
let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar);
|
||||
|
||||
@@ -19,7 +19,10 @@ use utils::{
|
||||
use crate::{
|
||||
basebackup::send_basebackup_tarball,
|
||||
context::{DownloadBehavior, RequestContext},
|
||||
metrics::{BASEBACKUP_CACHE_ENTRIES, BASEBACKUP_CACHE_PREPARE, BASEBACKUP_CACHE_READ},
|
||||
metrics::{
|
||||
BASEBACKUP_CACHE_ENTRIES, BASEBACKUP_CACHE_PREPARE, BASEBACKUP_CACHE_READ,
|
||||
BASEBACKUP_CACHE_SIZE,
|
||||
},
|
||||
task_mgr::TaskKind,
|
||||
tenant::{
|
||||
Timeline,
|
||||
@@ -36,8 +39,13 @@ pub struct BasebackupPrepareRequest {
|
||||
pub type BasebackupPrepareSender = UnboundedSender<BasebackupPrepareRequest>;
|
||||
pub type BasebackupPrepareReceiver = UnboundedReceiver<BasebackupPrepareRequest>;
|
||||
|
||||
type BasebackupRemoveEntrySender = UnboundedSender<Utf8PathBuf>;
|
||||
type BasebackupRemoveEntryReceiver = UnboundedReceiver<Utf8PathBuf>;
|
||||
#[derive(Clone)]
|
||||
struct CacheEntry {
|
||||
/// LSN at which the basebackup was taken.
|
||||
lsn: Lsn,
|
||||
/// Size of the basebackup archive in bytes.
|
||||
size_bytes: u64,
|
||||
}
|
||||
|
||||
/// BasebackupCache stores cached basebackup archives for timelines on local disk.
|
||||
///
|
||||
@@ -53,21 +61,12 @@ type BasebackupRemoveEntryReceiver = UnboundedReceiver<Utf8PathBuf>;
|
||||
/// and ~1 RPS for get requests.
|
||||
pub struct BasebackupCache {
|
||||
data_dir: Utf8PathBuf,
|
||||
config: BasebackupCacheConfig,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
remove_entry_sender: BasebackupRemoveEntrySender,
|
||||
|
||||
entries: std::sync::Mutex<HashMap<TenantTimelineId, Lsn>>,
|
||||
|
||||
cancel: CancellationToken,
|
||||
entries: std::sync::Mutex<HashMap<TenantTimelineId, CacheEntry>>,
|
||||
|
||||
read_hit_count: GenericCounter<AtomicU64>,
|
||||
read_miss_count: GenericCounter<AtomicU64>,
|
||||
read_err_count: GenericCounter<AtomicU64>,
|
||||
|
||||
prepare_ok_count: GenericCounter<AtomicU64>,
|
||||
prepare_skip_count: GenericCounter<AtomicU64>,
|
||||
prepare_err_count: GenericCounter<AtomicU64>,
|
||||
}
|
||||
|
||||
impl BasebackupCache {
|
||||
@@ -83,35 +82,32 @@ impl BasebackupCache {
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
cancel: CancellationToken,
|
||||
) -> Arc<Self> {
|
||||
let (remove_entry_sender, remove_entry_receiver) = tokio::sync::mpsc::unbounded_channel();
|
||||
|
||||
let enabled = config.is_some();
|
||||
|
||||
let cache = Arc::new(BasebackupCache {
|
||||
data_dir,
|
||||
config: config.unwrap_or_default(),
|
||||
tenant_manager,
|
||||
remove_entry_sender,
|
||||
|
||||
entries: std::sync::Mutex::new(HashMap::new()),
|
||||
|
||||
cancel,
|
||||
|
||||
read_hit_count: BASEBACKUP_CACHE_READ.with_label_values(&["hit"]),
|
||||
read_miss_count: BASEBACKUP_CACHE_READ.with_label_values(&["miss"]),
|
||||
read_err_count: BASEBACKUP_CACHE_READ.with_label_values(&["error"]),
|
||||
|
||||
prepare_ok_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&["ok"]),
|
||||
prepare_skip_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&["skip"]),
|
||||
prepare_err_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&["error"]),
|
||||
});
|
||||
|
||||
if enabled {
|
||||
runtime_handle.spawn(
|
||||
cache
|
||||
.clone()
|
||||
.background(prepare_receiver, remove_entry_receiver),
|
||||
);
|
||||
if let Some(config) = config {
|
||||
let background = BackgroundTask {
|
||||
c: cache.clone(),
|
||||
|
||||
config,
|
||||
tenant_manager,
|
||||
cancel,
|
||||
|
||||
entry_count: 0,
|
||||
total_size_bytes: 0,
|
||||
|
||||
prepare_ok_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&["ok"]),
|
||||
prepare_skip_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&["skip"]),
|
||||
prepare_err_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&["error"]),
|
||||
};
|
||||
runtime_handle.spawn(background.run(prepare_receiver));
|
||||
}
|
||||
|
||||
cache
|
||||
@@ -129,7 +125,7 @@ impl BasebackupCache {
|
||||
) -> Option<tokio::fs::File> {
|
||||
// Fast path. Check if the entry exists using the in-memory state.
|
||||
let tti = TenantTimelineId::new(tenant_id, timeline_id);
|
||||
if self.entries.lock().unwrap().get(&tti) != Some(&lsn) {
|
||||
if self.entries.lock().unwrap().get(&tti).map(|e| e.lsn) != Some(lsn) {
|
||||
self.read_miss_count.inc();
|
||||
return None;
|
||||
}
|
||||
@@ -167,9 +163,41 @@ impl BasebackupCache {
|
||||
self.data_dir
|
||||
.join(Self::entry_filename(tenant_id, timeline_id, lsn))
|
||||
}
|
||||
}
|
||||
|
||||
/// The background task that does the job to prepare basebackups
|
||||
/// and manage the cache entries on disk.
|
||||
/// It is a separate struct from BasebackupCache to allow holding
|
||||
/// a mutable reference to this state without a mutex lock,
|
||||
/// while BasebackupCache is referenced by the clients.
|
||||
struct BackgroundTask {
|
||||
c: Arc<BasebackupCache>,
|
||||
|
||||
config: BasebackupCacheConfig,
|
||||
tenant_manager: Arc<TenantManager>,
|
||||
cancel: CancellationToken,
|
||||
|
||||
/// Number of the entries in the cache.
|
||||
/// This counter is used for metrics and applying cache limits.
|
||||
/// It generally should be equal to c.entries.len(), but it's calculated
|
||||
/// pessimistically for abnormal situations: if we encountered some errors
|
||||
/// during removing the entry from disk, we won't decrement this counter to
|
||||
/// make sure that we don't exceed the limit with "trashed" files on the disk.
|
||||
/// It will also count files in the data_dir that are not valid cache entries.
|
||||
entry_count: usize,
|
||||
/// Total size of all the entries on the disk.
|
||||
/// This counter is used for metrics and applying cache limits.
|
||||
/// Similar to entry_count, it is calculated pessimistically for abnormal situations.
|
||||
total_size_bytes: u64,
|
||||
|
||||
prepare_ok_count: GenericCounter<AtomicU64>,
|
||||
prepare_skip_count: GenericCounter<AtomicU64>,
|
||||
prepare_err_count: GenericCounter<AtomicU64>,
|
||||
}
|
||||
|
||||
impl BackgroundTask {
|
||||
fn tmp_dir(&self) -> Utf8PathBuf {
|
||||
self.data_dir.join("tmp")
|
||||
self.c.data_dir.join("tmp")
|
||||
}
|
||||
|
||||
fn entry_tmp_path(
|
||||
@@ -179,7 +207,7 @@ impl BasebackupCache {
|
||||
lsn: Lsn,
|
||||
) -> Utf8PathBuf {
|
||||
self.tmp_dir()
|
||||
.join(Self::entry_filename(tenant_id, timeline_id, lsn))
|
||||
.join(BasebackupCache::entry_filename(tenant_id, timeline_id, lsn))
|
||||
}
|
||||
|
||||
fn parse_entry_filename(filename: &str) -> Option<(TenantId, TimelineId, Lsn)> {
|
||||
@@ -208,11 +236,11 @@ impl BasebackupCache {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn cleanup(&self) -> anyhow::Result<()> {
|
||||
async fn cleanup(&mut self) -> anyhow::Result<()> {
|
||||
self.clean_tmp_dir().await?;
|
||||
|
||||
// Remove outdated entries.
|
||||
let entries_old = self.entries.lock().unwrap().clone();
|
||||
// Leave only up-to-date entries.
|
||||
let entries_old = self.c.entries.lock().unwrap().clone();
|
||||
let mut entries_new = HashMap::new();
|
||||
for (tenant_shard_id, tenant_slot) in self.tenant_manager.list() {
|
||||
if !tenant_shard_id.is_shard_zero() {
|
||||
@@ -225,31 +253,32 @@ impl BasebackupCache {
|
||||
|
||||
for timeline in tenant.list_timelines() {
|
||||
let tti = TenantTimelineId::new(tenant_id, timeline.timeline_id);
|
||||
if let Some(&entry_lsn) = entries_old.get(&tti) {
|
||||
if timeline.get_last_record_lsn() <= entry_lsn {
|
||||
entries_new.insert(tti, entry_lsn);
|
||||
if let Some(entry) = entries_old.get(&tti) {
|
||||
if timeline.get_last_record_lsn() <= entry.lsn {
|
||||
entries_new.insert(tti, entry.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (&tti, &lsn) in entries_old.iter() {
|
||||
// Try to remove all entries that are not up-to-date.
|
||||
for (&tti, entry) in entries_old.iter() {
|
||||
if !entries_new.contains_key(&tti) {
|
||||
self.remove_entry_sender
|
||||
.send(self.entry_path(tti.tenant_id, tti.timeline_id, lsn))
|
||||
.unwrap();
|
||||
self.try_remove_entry(tti.tenant_id, tti.timeline_id, entry)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
BASEBACKUP_CACHE_ENTRIES.set(entries_new.len() as i64);
|
||||
*self.entries.lock().unwrap() = entries_new;
|
||||
// Note: BackgroundTask is the only writer for self.c.entries,
|
||||
// so it couldn't have been modified concurrently.
|
||||
*self.c.entries.lock().unwrap() = entries_new;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn on_startup(&self) -> anyhow::Result<()> {
|
||||
async fn on_startup(&mut self) -> anyhow::Result<()> {
|
||||
// Create data_dir if it does not exist.
|
||||
tokio::fs::create_dir_all(&self.data_dir)
|
||||
tokio::fs::create_dir_all(&self.c.data_dir)
|
||||
.await
|
||||
.context("Failed to create basebackup cache data directory")?;
|
||||
|
||||
@@ -258,8 +287,8 @@ impl BasebackupCache {
|
||||
.context("Failed to clean tmp directory")?;
|
||||
|
||||
// Read existing entries from the data_dir and add them to in-memory state.
|
||||
let mut entries = HashMap::new();
|
||||
let mut dir = tokio::fs::read_dir(&self.data_dir).await?;
|
||||
let mut entries = HashMap::<TenantTimelineId, CacheEntry>::new();
|
||||
let mut dir = tokio::fs::read_dir(&self.c.data_dir).await?;
|
||||
while let Some(dir_entry) = dir.next_entry().await? {
|
||||
let filename = dir_entry.file_name();
|
||||
|
||||
@@ -268,33 +297,43 @@ impl BasebackupCache {
|
||||
continue;
|
||||
}
|
||||
|
||||
let size_bytes = dir_entry
|
||||
.metadata()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
anyhow::anyhow!("Failed to read metadata for file {:?}: {:?}", filename, e)
|
||||
})?
|
||||
.len();
|
||||
|
||||
self.entry_count += 1;
|
||||
BASEBACKUP_CACHE_ENTRIES.set(self.entry_count as u64);
|
||||
|
||||
self.total_size_bytes += size_bytes;
|
||||
BASEBACKUP_CACHE_SIZE.set(self.total_size_bytes);
|
||||
|
||||
let parsed = Self::parse_entry_filename(filename.to_string_lossy().as_ref());
|
||||
let Some((tenant_id, timeline_id, lsn)) = parsed else {
|
||||
tracing::warn!("Invalid basebackup cache file name: {:?}", filename);
|
||||
continue;
|
||||
};
|
||||
|
||||
let cur_entry = CacheEntry { lsn, size_bytes };
|
||||
|
||||
let tti = TenantTimelineId::new(tenant_id, timeline_id);
|
||||
|
||||
use std::collections::hash_map::Entry::*;
|
||||
|
||||
match entries.entry(tti) {
|
||||
Occupied(mut entry) => {
|
||||
let entry_lsn = *entry.get();
|
||||
let found_entry = entry.get();
|
||||
// Leave only the latest entry, remove the old one.
|
||||
if lsn < entry_lsn {
|
||||
self.remove_entry_sender.send(self.entry_path(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
lsn,
|
||||
))?;
|
||||
} else if lsn > entry_lsn {
|
||||
self.remove_entry_sender.send(self.entry_path(
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
entry_lsn,
|
||||
))?;
|
||||
entry.insert(lsn);
|
||||
if cur_entry.lsn < found_entry.lsn {
|
||||
self.try_remove_entry(tenant_id, timeline_id, &cur_entry)
|
||||
.await;
|
||||
} else if cur_entry.lsn > found_entry.lsn {
|
||||
self.try_remove_entry(tenant_id, timeline_id, found_entry)
|
||||
.await;
|
||||
entry.insert(cur_entry);
|
||||
} else {
|
||||
// Two different filenames parsed to the same timline_id and LSN.
|
||||
// Should never happen.
|
||||
@@ -305,22 +344,17 @@ impl BasebackupCache {
|
||||
}
|
||||
}
|
||||
Vacant(entry) => {
|
||||
entry.insert(lsn);
|
||||
entry.insert(cur_entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BASEBACKUP_CACHE_ENTRIES.set(entries.len() as i64);
|
||||
*self.entries.lock().unwrap() = entries;
|
||||
*self.c.entries.lock().unwrap() = entries;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn background(
|
||||
self: Arc<Self>,
|
||||
mut prepare_receiver: BasebackupPrepareReceiver,
|
||||
mut remove_entry_receiver: BasebackupRemoveEntryReceiver,
|
||||
) {
|
||||
async fn run(mut self, mut prepare_receiver: BasebackupPrepareReceiver) {
|
||||
// Panic in the background is a safe fallback.
|
||||
// It will drop receivers and the cache will be effectively disabled.
|
||||
self.on_startup()
|
||||
@@ -343,11 +377,6 @@ impl BasebackupCache {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Some(req) = remove_entry_receiver.recv() => {
|
||||
if let Err(e) = tokio::fs::remove_file(req).await {
|
||||
tracing::warn!("Failed to remove basebackup cache file: {:#}", e);
|
||||
}
|
||||
}
|
||||
_ = cleanup_ticker.tick() => {
|
||||
self.cleanup().await.unwrap_or_else(|e| {
|
||||
tracing::warn!("Failed to clean up basebackup cache: {:#}", e);
|
||||
@@ -361,6 +390,67 @@ impl BasebackupCache {
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to remove an entry from disk.
|
||||
/// The caller is responsible for removing the entry from the in-memory state.
|
||||
/// Updates size counters and corresponding metrics.
|
||||
/// Ignores the filesystem errors as not-so-important, but the size counters
|
||||
/// are not decremented in this case, so the file will continue to be counted
|
||||
/// towards the size limits.
|
||||
async fn try_remove_entry(
|
||||
&mut self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
entry: &CacheEntry,
|
||||
) {
|
||||
let entry_path = self.c.entry_path(tenant_id, timeline_id, entry.lsn);
|
||||
|
||||
match tokio::fs::remove_file(&entry_path).await {
|
||||
Ok(_) => {}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
"Failed to remove basebackup cache file for tenant {} timeline {} LSN {}: {:#}",
|
||||
tenant_id,
|
||||
timeline_id,
|
||||
entry.lsn,
|
||||
e
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
self.entry_count -= 1;
|
||||
BASEBACKUP_CACHE_ENTRIES.set(self.entry_count as u64);
|
||||
|
||||
self.total_size_bytes -= entry.size_bytes;
|
||||
BASEBACKUP_CACHE_SIZE.set(self.total_size_bytes);
|
||||
}
|
||||
|
||||
/// Insert the cache entry into in-memory state and update the size counters.
|
||||
/// Assumes that the file for the entry already exists on disk.
|
||||
/// If the entry already exists with previous LSN, it will be removed.
|
||||
async fn upsert_entry(
|
||||
&mut self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
entry: CacheEntry,
|
||||
) {
|
||||
let tti = TenantTimelineId::new(tenant_id, timeline_id);
|
||||
|
||||
self.entry_count += 1;
|
||||
BASEBACKUP_CACHE_ENTRIES.set(self.entry_count as u64);
|
||||
|
||||
self.total_size_bytes += entry.size_bytes;
|
||||
BASEBACKUP_CACHE_SIZE.set(self.total_size_bytes);
|
||||
|
||||
let old_entry = self.c.entries.lock().unwrap().insert(tti, entry);
|
||||
|
||||
if let Some(old_entry) = old_entry {
|
||||
self.try_remove_entry(tenant_id, timeline_id, &old_entry)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Prepare a basebackup for the given timeline.
|
||||
///
|
||||
/// If the basebackup already exists with a higher LSN or the timeline already
|
||||
@@ -369,7 +459,7 @@ impl BasebackupCache {
|
||||
/// The basebackup is prepared in a temporary directory and then moved to the final
|
||||
/// location to make the operation atomic.
|
||||
async fn prepare_basebackup(
|
||||
&self,
|
||||
&mut self,
|
||||
tenant_shard_id: TenantShardId,
|
||||
timeline_id: TimelineId,
|
||||
req_lsn: Lsn,
|
||||
@@ -383,30 +473,44 @@ impl BasebackupCache {
|
||||
|
||||
let tti = TenantTimelineId::new(tenant_shard_id.tenant_id, timeline_id);
|
||||
|
||||
// TODO(diko): I don't think we will hit the limit,
|
||||
// but if we do, it makes sense to try to evict oldest entries. here
|
||||
if self.entry_count >= self.config.max_size_entries {
|
||||
tracing::info!(
|
||||
%tenant_shard_id,
|
||||
%timeline_id,
|
||||
%req_lsn,
|
||||
"Basebackup cache is full (max_size_entries), skipping basebackup",
|
||||
);
|
||||
self.prepare_skip_count.inc();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if self.total_size_bytes >= self.config.max_total_size_bytes {
|
||||
tracing::info!(
|
||||
%tenant_shard_id,
|
||||
%timeline_id,
|
||||
%req_lsn,
|
||||
"Basebackup cache is full (max_total_size_bytes), skipping basebackup",
|
||||
);
|
||||
self.prepare_skip_count.inc();
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
{
|
||||
let entries = self.entries.lock().unwrap();
|
||||
if let Some(&entry_lsn) = entries.get(&tti) {
|
||||
if entry_lsn >= req_lsn {
|
||||
let entries = self.c.entries.lock().unwrap();
|
||||
if let Some(entry) = entries.get(&tti) {
|
||||
if entry.lsn >= req_lsn {
|
||||
tracing::info!(
|
||||
%timeline_id,
|
||||
%req_lsn,
|
||||
%entry_lsn,
|
||||
%entry.lsn,
|
||||
"Basebackup entry already exists for timeline with higher LSN, skipping basebackup",
|
||||
);
|
||||
self.prepare_skip_count.inc();
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
if entries.len() as i64 >= self.config.max_size_entries {
|
||||
tracing::info!(
|
||||
%timeline_id,
|
||||
%req_lsn,
|
||||
"Basebackup cache is full, skipping basebackup",
|
||||
);
|
||||
self.prepare_skip_count.inc();
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
let tenant = self
|
||||
@@ -442,18 +546,21 @@ impl BasebackupCache {
|
||||
.prepare_basebackup_tmp(&entry_tmp_path, &timeline, req_lsn)
|
||||
.await;
|
||||
|
||||
if let Err(err) = res {
|
||||
tracing::info!("Failed to prepare basebackup tmp file: {:#}", err);
|
||||
// Try to clean up tmp file. If we fail, the background clean up task will take care of it.
|
||||
match tokio::fs::remove_file(&entry_tmp_path).await {
|
||||
Ok(_) => {}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
|
||||
Err(e) => {
|
||||
tracing::info!("Failed to remove basebackup tmp file: {:?}", e);
|
||||
let entry = match res {
|
||||
Ok(entry) => entry,
|
||||
Err(err) => {
|
||||
tracing::info!("Failed to prepare basebackup tmp file: {:#}", err);
|
||||
// Try to clean up tmp file. If we fail, the background clean up task will take care of it.
|
||||
match tokio::fs::remove_file(&entry_tmp_path).await {
|
||||
Ok(_) => {}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
|
||||
Err(e) => {
|
||||
tracing::info!("Failed to remove basebackup tmp file: {:?}", e);
|
||||
}
|
||||
}
|
||||
return Err(err);
|
||||
}
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
|
||||
// Move the tmp file to the final location atomically.
|
||||
// The tmp file is fsynced, so it's guaranteed that we will not have a partial file
|
||||
@@ -461,17 +568,13 @@ impl BasebackupCache {
|
||||
// It's not necessary to fsync the inode after renaming, because the worst case is that
|
||||
// the rename operation will be rolled back on the disk failure, the entry will disappear
|
||||
// from the main directory, and the entry access will cause a cache miss.
|
||||
let entry_path = self.entry_path(tenant_shard_id.tenant_id, timeline_id, req_lsn);
|
||||
let entry_path = self
|
||||
.c
|
||||
.entry_path(tenant_shard_id.tenant_id, timeline_id, req_lsn);
|
||||
tokio::fs::rename(&entry_tmp_path, &entry_path).await?;
|
||||
|
||||
let mut entries = self.entries.lock().unwrap();
|
||||
if let Some(old_lsn) = entries.insert(tti, req_lsn) {
|
||||
// Remove the old entry if it exists.
|
||||
self.remove_entry_sender
|
||||
.send(self.entry_path(tenant_shard_id.tenant_id, timeline_id, old_lsn))
|
||||
.unwrap();
|
||||
}
|
||||
BASEBACKUP_CACHE_ENTRIES.set(entries.len() as i64);
|
||||
self.upsert_entry(tenant_shard_id.tenant_id, timeline_id, entry)
|
||||
.await;
|
||||
|
||||
self.prepare_ok_count.inc();
|
||||
Ok(())
|
||||
@@ -484,7 +587,7 @@ impl BasebackupCache {
|
||||
entry_tmp_path: &Utf8Path,
|
||||
timeline: &Arc<Timeline>,
|
||||
req_lsn: Lsn,
|
||||
) -> anyhow::Result<()> {
|
||||
) -> anyhow::Result<CacheEntry> {
|
||||
let ctx = RequestContext::new(TaskKind::BasebackupCache, DownloadBehavior::Download);
|
||||
let ctx = ctx.with_scope_timeline(timeline);
|
||||
|
||||
@@ -524,6 +627,12 @@ impl BasebackupCache {
|
||||
writer.flush().await?;
|
||||
writer.into_inner().sync_all().await?;
|
||||
|
||||
Ok(())
|
||||
// TODO(diko): we can count it via Writer wrapper instead of a syscall.
|
||||
let size_bytes = tokio::fs::metadata(entry_tmp_path).await?.len();
|
||||
|
||||
Ok(CacheEntry {
|
||||
lsn: req_lsn,
|
||||
size_bytes,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,9 @@ use std::io::{Read, Write, stdin, stdout};
|
||||
use std::time::Duration;
|
||||
|
||||
use clap::Parser;
|
||||
use pageserver_api::models::{PagestreamRequest, PagestreamTestRequest};
|
||||
use pageserver_api::pagestream_api::{
|
||||
PagestreamFeMessage, PagestreamRequest, PagestreamTestRequest,
|
||||
};
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
@@ -28,17 +30,15 @@ async fn main() -> anyhow::Result<()> {
|
||||
let mut msg = 0;
|
||||
loop {
|
||||
msg += 1;
|
||||
let fut = sender.send(pageserver_api::models::PagestreamFeMessage::Test(
|
||||
PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(23),
|
||||
not_modified_since: Lsn(23),
|
||||
},
|
||||
batch_key: 42,
|
||||
message: format!("message {}", msg),
|
||||
let fut = sender.send(PagestreamFeMessage::Test(PagestreamTestRequest {
|
||||
hdr: PagestreamRequest {
|
||||
reqid: 0,
|
||||
request_lsn: Lsn(23),
|
||||
not_modified_since: Lsn(23),
|
||||
},
|
||||
));
|
||||
batch_key: 42,
|
||||
message: format!("message {}", msg),
|
||||
}));
|
||||
let Ok(res) = tokio::time::timeout(Duration::from_secs(10), fut).await else {
|
||||
eprintln!("pipe seems full");
|
||||
break;
|
||||
|
||||
@@ -159,14 +159,7 @@ impl StorageControllerUpcallApi for StorageControllerUpcallClient {
|
||||
Ok(m) => {
|
||||
// Since we run one time at startup, be generous in our logging and
|
||||
// dump all metadata.
|
||||
tracing::info!(
|
||||
"Loaded node metadata: postgres {}:{}, http {}:{}, other fields: {:?}",
|
||||
m.postgres_host,
|
||||
m.postgres_port,
|
||||
m.http_host,
|
||||
m.http_port,
|
||||
m.other
|
||||
);
|
||||
tracing::info!("Loaded node metadata: {m}");
|
||||
|
||||
let az_id = {
|
||||
let az_id_from_metadata = m
|
||||
@@ -195,6 +188,8 @@ impl StorageControllerUpcallApi for StorageControllerUpcallClient {
|
||||
node_id: conf.id,
|
||||
listen_pg_addr: m.postgres_host,
|
||||
listen_pg_port: m.postgres_port,
|
||||
listen_grpc_addr: m.grpc_host,
|
||||
listen_grpc_port: m.grpc_port,
|
||||
listen_http_addr: m.http_host,
|
||||
listen_http_port: m.http_port,
|
||||
listen_https_port: m.https_port,
|
||||
|
||||
@@ -12,6 +12,8 @@ use utils::id::TenantId;
|
||||
|
||||
use crate::{config::PageServerConf, metrics::FEATURE_FLAG_EVALUATION};
|
||||
|
||||
const DEFAULT_POSTHOG_REFRESH_INTERVAL: Duration = Duration::from_secs(600);
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct FeatureResolver {
|
||||
inner: Option<Arc<FeatureResolverBackgroundLoop>>,
|
||||
@@ -139,10 +141,13 @@ impl FeatureResolver {
|
||||
}
|
||||
tenants
|
||||
};
|
||||
// TODO: make refresh period configurable
|
||||
inner
|
||||
.clone()
|
||||
.spawn(handle, Duration::from_secs(60), fake_tenants);
|
||||
inner.clone().spawn(
|
||||
handle,
|
||||
posthog_config
|
||||
.refresh_interval
|
||||
.unwrap_or(DEFAULT_POSTHOG_REFRESH_INTERVAL),
|
||||
fake_tenants,
|
||||
);
|
||||
Ok(FeatureResolver {
|
||||
inner: Some(inner),
|
||||
internal_properties: Some(internal_properties),
|
||||
|
||||
@@ -520,7 +520,7 @@ async fn import_file(
|
||||
}
|
||||
|
||||
if file_path.starts_with("global") {
|
||||
let spcnode = postgres_ffi::pg_constants::GLOBALTABLESPACE_OID;
|
||||
let spcnode = postgres_ffi_types::constants::GLOBALTABLESPACE_OID;
|
||||
let dbnode = 0;
|
||||
|
||||
match file_name.as_ref() {
|
||||
@@ -553,7 +553,7 @@ async fn import_file(
|
||||
}
|
||||
}
|
||||
} else if file_path.starts_with("base") {
|
||||
let spcnode = pg_constants::DEFAULTTABLESPACE_OID;
|
||||
let spcnode = postgres_ffi_types::constants::DEFAULTTABLESPACE_OID;
|
||||
let dbnode: u32 = file_path
|
||||
.iter()
|
||||
.nth(1)
|
||||
|
||||
@@ -3426,7 +3426,7 @@ impl TimelineMetrics {
|
||||
pub fn dec_frozen_layer(&self, layer: &InMemoryLayer) {
|
||||
assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
|
||||
let labels = self.make_frozen_layer_labels(layer);
|
||||
let size = layer.try_len().expect("frozen layer should have no writer");
|
||||
let size = layer.len();
|
||||
TIMELINE_LAYER_COUNT
|
||||
.get_metric_with_label_values(&labels)
|
||||
.unwrap()
|
||||
@@ -3441,7 +3441,7 @@ impl TimelineMetrics {
|
||||
pub fn inc_frozen_layer(&self, layer: &InMemoryLayer) {
|
||||
assert!(matches!(layer.info(), InMemoryLayerInfo::Frozen { .. }));
|
||||
let labels = self.make_frozen_layer_labels(layer);
|
||||
let size = layer.try_len().expect("frozen layer should have no writer");
|
||||
let size = layer.len();
|
||||
TIMELINE_LAYER_COUNT
|
||||
.get_metric_with_label_values(&labels)
|
||||
.unwrap()
|
||||
@@ -4428,18 +4428,16 @@ pub(crate) static BASEBACKUP_CACHE_PREPARE: Lazy<IntCounterVec> = Lazy::new(|| {
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
pub(crate) static BASEBACKUP_CACHE_ENTRIES: Lazy<IntGauge> = Lazy::new(|| {
|
||||
register_int_gauge!(
|
||||
pub(crate) static BASEBACKUP_CACHE_ENTRIES: Lazy<UIntGauge> = Lazy::new(|| {
|
||||
register_uint_gauge!(
|
||||
"pageserver_basebackup_cache_entries_total",
|
||||
"Number of entries in the basebackup cache"
|
||||
)
|
||||
.expect("failed to define a metric")
|
||||
});
|
||||
|
||||
// FIXME: Support basebackup cache size metrics.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) static BASEBACKUP_CACHE_SIZE: Lazy<IntGauge> = Lazy::new(|| {
|
||||
register_int_gauge!(
|
||||
pub(crate) static BASEBACKUP_CACHE_SIZE: Lazy<UIntGauge> = Lazy::new(|| {
|
||||
register_uint_gauge!(
|
||||
"pageserver_basebackup_cache_size_bytes",
|
||||
"Total size of all basebackup cache entries on disk in bytes"
|
||||
)
|
||||
|
||||
@@ -14,7 +14,7 @@ use std::{io, str};
|
||||
|
||||
use anyhow::{Context as _, anyhow, bail};
|
||||
use async_compression::tokio::write::GzipEncoder;
|
||||
use bytes::{Buf, BytesMut};
|
||||
use bytes::{Buf as _, BufMut as _, BytesMut};
|
||||
use futures::future::BoxFuture;
|
||||
use futures::{FutureExt, Stream};
|
||||
use itertools::Itertools;
|
||||
@@ -25,12 +25,13 @@ use pageserver_api::config::{
|
||||
PageServiceProtocolPipelinedBatchingStrategy, PageServiceProtocolPipelinedExecutionStrategy,
|
||||
};
|
||||
use pageserver_api::key::rel_block_to_key;
|
||||
use pageserver_api::models::{
|
||||
self, PageTraceEvent, PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||
use pageserver_api::models::{PageTraceEvent, TenantState};
|
||||
use pageserver_api::pagestream_api::{
|
||||
self, PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse,
|
||||
PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse,
|
||||
PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest,
|
||||
PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse,
|
||||
PagestreamProtocolVersion, PagestreamRequest, TenantState,
|
||||
PagestreamProtocolVersion, PagestreamRequest,
|
||||
};
|
||||
use pageserver_api::reltag::SlruKind;
|
||||
use pageserver_api::shard::TenantShardId;
|
||||
@@ -40,7 +41,7 @@ use postgres_backend::{
|
||||
AuthType, PostgresBackend, PostgresBackendReader, QueryError, is_expected_io_error,
|
||||
};
|
||||
use postgres_ffi::BLCKSZ;
|
||||
use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
|
||||
use postgres_ffi_types::constants::DEFAULTTABLESPACE_OID;
|
||||
use pq_proto::framed::ConnectionError;
|
||||
use pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor};
|
||||
use smallvec::{SmallVec, smallvec};
|
||||
@@ -623,60 +624,6 @@ enum PageStreamError {
|
||||
BadRequest(Cow<'static, str>),
|
||||
}
|
||||
|
||||
impl PageStreamError {
|
||||
/// Converts a PageStreamError into a proto::GetPageResponse with the appropriate status
|
||||
/// code, or a gRPC status if it should terminate the stream (e.g. shutdown). This is a
|
||||
/// convenience method for use from a get_pages gRPC stream.
|
||||
#[allow(clippy::result_large_err)]
|
||||
fn into_get_page_response(
|
||||
self,
|
||||
request_id: page_api::RequestID,
|
||||
) -> Result<proto::GetPageResponse, tonic::Status> {
|
||||
use page_api::GetPageStatusCode;
|
||||
use tonic::Code;
|
||||
|
||||
// We dispatch to Into<tonic::Status> first, and then map it to a GetPageResponse.
|
||||
let status: tonic::Status = self.into();
|
||||
let status_code = match status.code() {
|
||||
// We shouldn't see an OK status here, because we're emitting an error.
|
||||
Code::Ok => {
|
||||
debug_assert_ne!(status.code(), Code::Ok);
|
||||
return Err(tonic::Status::internal(format!(
|
||||
"unexpected OK status: {status:?}",
|
||||
)));
|
||||
}
|
||||
|
||||
// These are per-request errors, returned as GetPageResponses.
|
||||
Code::AlreadyExists => GetPageStatusCode::InvalidRequest,
|
||||
Code::DataLoss => GetPageStatusCode::InternalError,
|
||||
Code::FailedPrecondition => GetPageStatusCode::InvalidRequest,
|
||||
Code::InvalidArgument => GetPageStatusCode::InvalidRequest,
|
||||
Code::Internal => GetPageStatusCode::InternalError,
|
||||
Code::NotFound => GetPageStatusCode::NotFound,
|
||||
Code::OutOfRange => GetPageStatusCode::InvalidRequest,
|
||||
Code::ResourceExhausted => GetPageStatusCode::SlowDown,
|
||||
|
||||
// These should terminate the stream.
|
||||
Code::Aborted => return Err(status),
|
||||
Code::Cancelled => return Err(status),
|
||||
Code::DeadlineExceeded => return Err(status),
|
||||
Code::PermissionDenied => return Err(status),
|
||||
Code::Unauthenticated => return Err(status),
|
||||
Code::Unavailable => return Err(status),
|
||||
Code::Unimplemented => return Err(status),
|
||||
Code::Unknown => return Err(status),
|
||||
};
|
||||
|
||||
Ok(page_api::GetPageResponse {
|
||||
request_id,
|
||||
status_code,
|
||||
reason: Some(status.message().to_string()),
|
||||
page_images: Vec::new(),
|
||||
}
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PageStreamError> for tonic::Status {
|
||||
fn from(err: PageStreamError) -> Self {
|
||||
use tonic::Code;
|
||||
@@ -766,7 +713,7 @@ struct BatchedGetPageRequest {
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
struct BatchedTestRequest {
|
||||
req: models::PagestreamTestRequest,
|
||||
req: pagestream_api::PagestreamTestRequest,
|
||||
timer: SmgrOpTimer,
|
||||
}
|
||||
|
||||
@@ -780,13 +727,13 @@ enum BatchedFeMessage {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: WeakHandle<TenantManagerTypes>,
|
||||
req: models::PagestreamExistsRequest,
|
||||
req: PagestreamExistsRequest,
|
||||
},
|
||||
Nblocks {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: WeakHandle<TenantManagerTypes>,
|
||||
req: models::PagestreamNblocksRequest,
|
||||
req: PagestreamNblocksRequest,
|
||||
},
|
||||
GetPage {
|
||||
span: Span,
|
||||
@@ -798,13 +745,13 @@ enum BatchedFeMessage {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: WeakHandle<TenantManagerTypes>,
|
||||
req: models::PagestreamDbSizeRequest,
|
||||
req: PagestreamDbSizeRequest,
|
||||
},
|
||||
GetSlruSegment {
|
||||
span: Span,
|
||||
timer: SmgrOpTimer,
|
||||
shard: WeakHandle<TenantManagerTypes>,
|
||||
req: models::PagestreamGetSlruSegmentRequest,
|
||||
req: PagestreamGetSlruSegmentRequest,
|
||||
},
|
||||
#[cfg(feature = "testing")]
|
||||
Test {
|
||||
@@ -2497,10 +2444,9 @@ impl PageServerHandler {
|
||||
.map(|(req, res)| {
|
||||
res.map(|page| {
|
||||
(
|
||||
PagestreamBeMessage::GetPage(models::PagestreamGetPageResponse {
|
||||
req: req.req,
|
||||
page,
|
||||
}),
|
||||
PagestreamBeMessage::GetPage(
|
||||
pagestream_api::PagestreamGetPageResponse { req: req.req, page },
|
||||
),
|
||||
req.timer,
|
||||
req.ctx,
|
||||
)
|
||||
@@ -2567,7 +2513,7 @@ impl PageServerHandler {
|
||||
.map(|(req, res)| {
|
||||
res.map(|()| {
|
||||
(
|
||||
PagestreamBeMessage::Test(models::PagestreamTestResponse {
|
||||
PagestreamBeMessage::Test(pagestream_api::PagestreamTestResponse {
|
||||
req: req.req.clone(),
|
||||
}),
|
||||
req.timer,
|
||||
@@ -3340,7 +3286,14 @@ impl GrpcPageServiceHandler {
|
||||
Ok(req)
|
||||
}))
|
||||
// Run the page service.
|
||||
.service(proto::PageServiceServer::new(page_service_handler));
|
||||
.service(
|
||||
proto::PageServiceServer::new(page_service_handler)
|
||||
// Support both gzip and zstd compression. The client decides what to use.
|
||||
.accept_compressed(tonic::codec::CompressionEncoding::Gzip)
|
||||
.accept_compressed(tonic::codec::CompressionEncoding::Zstd)
|
||||
.send_compressed(tonic::codec::CompressionEncoding::Gzip)
|
||||
.send_compressed(tonic::codec::CompressionEncoding::Zstd),
|
||||
);
|
||||
let server = server.add_service(page_service);
|
||||
|
||||
// Reflection service for use with e.g. grpcurl.
|
||||
@@ -3438,8 +3391,8 @@ impl GrpcPageServiceHandler {
|
||||
|
||||
/// Processes a GetPage batch request, via the GetPages bidirectional streaming RPC.
|
||||
///
|
||||
/// NB: errors will terminate the stream. Per-request errors should return a GetPageResponse
|
||||
/// with an appropriate status code instead.
|
||||
/// NB: errors returned from here are intercepted in get_pages(), and may be converted to a
|
||||
/// GetPageResponse with an appropriate status code to avoid terminating the stream.
|
||||
///
|
||||
/// TODO: get_vectored() currently enforces a batch limit of 32. Postgres will typically send
|
||||
/// batches up to effective_io_concurrency = 100. Either we have to accept large batches, or
|
||||
@@ -3456,7 +3409,7 @@ impl GrpcPageServiceHandler {
|
||||
let ctx = ctx.with_scope_page_service_pagestream(&timeline);
|
||||
|
||||
// Validate the request, decorate the span, and convert it to a Pagestream request.
|
||||
let req: page_api::GetPageRequest = req.try_into()?;
|
||||
let req = page_api::GetPageRequest::try_from(req)?;
|
||||
|
||||
span_record!(
|
||||
req_id = %req.request_id,
|
||||
@@ -3467,7 +3420,7 @@ impl GrpcPageServiceHandler {
|
||||
);
|
||||
|
||||
let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn(); // hold guard
|
||||
let effective_lsn = match PageServerHandler::effective_request_lsn(
|
||||
let effective_lsn = PageServerHandler::effective_request_lsn(
|
||||
&timeline,
|
||||
timeline.get_last_record_lsn(),
|
||||
req.read_lsn.request_lsn,
|
||||
@@ -3475,10 +3428,7 @@ impl GrpcPageServiceHandler {
|
||||
.not_modified_since_lsn
|
||||
.unwrap_or(req.read_lsn.request_lsn),
|
||||
&latest_gc_cutoff_lsn,
|
||||
) {
|
||||
Ok(lsn) => lsn,
|
||||
Err(err) => return err.into_get_page_response(req.request_id),
|
||||
};
|
||||
)?;
|
||||
|
||||
let mut batch = SmallVec::with_capacity(req.block_numbers.len());
|
||||
for blkno in req.block_numbers {
|
||||
@@ -3535,7 +3485,7 @@ impl GrpcPageServiceHandler {
|
||||
"unexpected response: {resp:?}"
|
||||
)));
|
||||
}
|
||||
Err(err) => return err.err.into_get_page_response(req.request_id),
|
||||
Err(err) => return Err(err.err.into()),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -3589,56 +3539,55 @@ impl proto::PageService for GrpcPageServiceHandler {
|
||||
Ok(tonic::Response::new(resp.into()))
|
||||
}
|
||||
|
||||
// TODO: ensure clients use gzip compression for the stream.
|
||||
#[instrument(skip_all, fields(lsn))]
|
||||
async fn get_base_backup(
|
||||
&self,
|
||||
req: tonic::Request<proto::GetBaseBackupRequest>,
|
||||
) -> Result<tonic::Response<Self::GetBaseBackupStream>, tonic::Status> {
|
||||
// Send 64 KB chunks to avoid large memory allocations.
|
||||
const CHUNK_SIZE: usize = 64 * 1024;
|
||||
// Send chunks of 256 KB to avoid large memory allocations. pagebench basebackup shows this
|
||||
// to be the sweet spot where throughput is saturated.
|
||||
const CHUNK_SIZE: usize = 256 * 1024;
|
||||
|
||||
let timeline = self.get_request_timeline(&req).await?;
|
||||
let ctx = self.ctx.with_scope_timeline(&timeline);
|
||||
|
||||
// Validate the request, decorate the span, and wait for the LSN to arrive.
|
||||
//
|
||||
// TODO: this requires a read LSN, is that ok?
|
||||
// Validate the request and decorate the span.
|
||||
Self::ensure_shard_zero(&timeline)?;
|
||||
if timeline.is_archived() == Some(true) {
|
||||
return Err(tonic::Status::failed_precondition("timeline is archived"));
|
||||
}
|
||||
let req: page_api::GetBaseBackupRequest = req.into_inner().try_into()?;
|
||||
let req: page_api::GetBaseBackupRequest = req.into_inner().into();
|
||||
|
||||
span_record!(lsn=%req.read_lsn);
|
||||
span_record!(lsn=?req.lsn);
|
||||
|
||||
let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
|
||||
timeline
|
||||
.wait_lsn(
|
||||
req.read_lsn.request_lsn,
|
||||
WaitLsnWaiter::PageService,
|
||||
WaitLsnTimeout::Default,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
timeline
|
||||
.check_lsn_is_in_scope(req.read_lsn.request_lsn, &latest_gc_cutoff_lsn)
|
||||
.map_err(|err| {
|
||||
tonic::Status::invalid_argument(format!("invalid basebackup LSN: {err}"))
|
||||
})?;
|
||||
// Wait for the LSN to arrive, if given.
|
||||
if let Some(lsn) = req.lsn {
|
||||
let latest_gc_cutoff_lsn = timeline.get_applied_gc_cutoff_lsn();
|
||||
timeline
|
||||
.wait_lsn(
|
||||
lsn,
|
||||
WaitLsnWaiter::PageService,
|
||||
WaitLsnTimeout::Default,
|
||||
&ctx,
|
||||
)
|
||||
.await?;
|
||||
timeline
|
||||
.check_lsn_is_in_scope(lsn, &latest_gc_cutoff_lsn)
|
||||
.map_err(|err| {
|
||||
tonic::Status::invalid_argument(format!("invalid basebackup LSN: {err}"))
|
||||
})?;
|
||||
}
|
||||
|
||||
// Spawn a task to run the basebackup.
|
||||
//
|
||||
// TODO: do we need to support full base backups, for debugging?
|
||||
let span = Span::current();
|
||||
let (mut simplex_read, mut simplex_write) = tokio::io::simplex(CHUNK_SIZE);
|
||||
let jh = tokio::spawn(async move {
|
||||
let result = basebackup::send_basebackup_tarball(
|
||||
&mut simplex_write,
|
||||
&timeline,
|
||||
Some(req.read_lsn.request_lsn),
|
||||
req.lsn,
|
||||
None,
|
||||
false,
|
||||
req.full,
|
||||
req.replica,
|
||||
&ctx,
|
||||
)
|
||||
@@ -3652,20 +3601,21 @@ impl proto::PageService for GrpcPageServiceHandler {
|
||||
|
||||
// Emit chunks of size CHUNK_SIZE.
|
||||
let chunks = async_stream::try_stream! {
|
||||
let mut chunk = BytesMut::with_capacity(CHUNK_SIZE);
|
||||
loop {
|
||||
let n = simplex_read.read_buf(&mut chunk).await.map_err(|err| {
|
||||
tonic::Status::internal(format!("failed to read basebackup chunk: {err}"))
|
||||
})?;
|
||||
|
||||
// If we read 0 bytes, either the chunk is full or the stream is closed.
|
||||
if n == 0 {
|
||||
if chunk.is_empty() {
|
||||
break;
|
||||
let mut chunk = BytesMut::with_capacity(CHUNK_SIZE).limit(CHUNK_SIZE);
|
||||
loop {
|
||||
let n = simplex_read.read_buf(&mut chunk).await.map_err(|err| {
|
||||
tonic::Status::internal(format!("failed to read basebackup chunk: {err}"))
|
||||
})?;
|
||||
if n == 0 {
|
||||
break; // full chunk or closed stream
|
||||
}
|
||||
yield proto::GetBaseBackupResponseChunk::from(chunk.clone().freeze());
|
||||
chunk.clear();
|
||||
}
|
||||
let chunk = chunk.into_inner().freeze();
|
||||
if chunk.is_empty() {
|
||||
break;
|
||||
}
|
||||
yield proto::GetBaseBackupResponseChunk::from(chunk);
|
||||
}
|
||||
// Wait for the basebackup task to exit and check for errors.
|
||||
jh.await.map_err(|err| {
|
||||
@@ -3742,9 +3692,16 @@ impl proto::PageService for GrpcPageServiceHandler {
|
||||
.await?
|
||||
.downgrade();
|
||||
while let Some(req) = reqs.message().await? {
|
||||
yield Self::get_page(&ctx, &timeline, req, io_concurrency.clone())
|
||||
let req_id = req.request_id;
|
||||
let result = Self::get_page(&ctx, &timeline, req, io_concurrency.clone())
|
||||
.instrument(span.clone()) // propagate request span
|
||||
.await?
|
||||
.await;
|
||||
yield match result {
|
||||
Ok(resp) => resp,
|
||||
// Convert per-request errors to GetPageResponses as appropriate, or terminate
|
||||
// the stream with a tonic::Status.
|
||||
Err(err) => page_api::GetPageResponse::try_from_status(err, req_id)?.into(),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -23,12 +23,11 @@ use pageserver_api::key::{
|
||||
};
|
||||
use pageserver_api::keyspace::{KeySpaceRandomAccum, SparseKeySpace};
|
||||
use pageserver_api::models::RelSizeMigration;
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
|
||||
use pageserver_api::shard::ShardIdentity;
|
||||
use pageserver_api::value::Value;
|
||||
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi::{BLCKSZ, Oid, RepOriginId, TimestampTz, TransactionId};
|
||||
use postgres_ffi::{BLCKSZ, TimestampTz, TransactionId};
|
||||
use postgres_ffi_types::forknum::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
|
||||
use postgres_ffi_types::{Oid, RepOriginId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use strum::IntoEnumIterator;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
@@ -36,6 +35,8 @@ use tracing::{debug, info, info_span, trace, warn};
|
||||
use utils::bin_ser::{BeSer, DeserializeError};
|
||||
use utils::lsn::Lsn;
|
||||
use utils::pausable_failpoint;
|
||||
use wal_decoder::models::record::NeonWalRecord;
|
||||
use wal_decoder::models::value::Value;
|
||||
use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta};
|
||||
|
||||
use super::tenant::{PageReconstructError, Timeline};
|
||||
@@ -720,6 +721,7 @@ impl Timeline {
|
||||
let batches = keyspace.partition(
|
||||
self.get_shard_identity(),
|
||||
self.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,
|
||||
BLCKSZ as u64,
|
||||
);
|
||||
|
||||
let io_concurrency = IoConcurrency::spawn_from_conf(
|
||||
@@ -960,6 +962,7 @@ impl Timeline {
|
||||
let batches = keyspace.partition(
|
||||
self.get_shard_identity(),
|
||||
self.conf.max_get_vectored_keys.get() as u64 * BLCKSZ as u64,
|
||||
BLCKSZ as u64,
|
||||
);
|
||||
|
||||
let io_concurrency = IoConcurrency::spawn_from_conf(
|
||||
|
||||
@@ -496,7 +496,7 @@ impl WalRedoManager {
|
||||
key: pageserver_api::key::Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<(Lsn, bytes::Bytes)>,
|
||||
records: Vec<(Lsn, pageserver_api::record::NeonWalRecord)>,
|
||||
records: Vec<(Lsn, wal_decoder::models::record::NeonWalRecord)>,
|
||||
pg_version: u32,
|
||||
redo_attempt_type: RedoAttemptType,
|
||||
) -> Result<bytes::Bytes, walredo::Error> {
|
||||
@@ -1859,6 +1859,29 @@ impl TenantShard {
|
||||
}
|
||||
}
|
||||
|
||||
// At this point we've initialized all timelines and are tracking them.
|
||||
// Now compute the layer visibility for all (not offloaded) timelines.
|
||||
let compute_visiblity_for = {
|
||||
let timelines_accessor = self.timelines.lock().unwrap();
|
||||
let mut timelines_offloaded_accessor = self.timelines_offloaded.lock().unwrap();
|
||||
|
||||
timelines_offloaded_accessor.extend(offloaded_timelines_list.into_iter());
|
||||
|
||||
// Before activation, populate each Timeline's GcInfo with information about its children
|
||||
self.initialize_gc_info(&timelines_accessor, &timelines_offloaded_accessor, None);
|
||||
|
||||
timelines_accessor.values().cloned().collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
for tl in compute_visiblity_for {
|
||||
tl.update_layer_visibility().await.with_context(|| {
|
||||
format!(
|
||||
"failed initial timeline visibility computation {} for tenant {}",
|
||||
tl.timeline_id, self.tenant_shard_id
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
// Walk through deleted timelines, resume deletion
|
||||
for (timeline_id, index_part, remote_timeline_client) in timelines_to_resume_deletions {
|
||||
remote_timeline_client
|
||||
@@ -1878,10 +1901,6 @@ impl TenantShard {
|
||||
.context("resume_deletion")
|
||||
.map_err(LoadLocalTimelineError::ResumeDeletion)?;
|
||||
}
|
||||
{
|
||||
let mut offloaded_timelines_accessor = self.timelines_offloaded.lock().unwrap();
|
||||
offloaded_timelines_accessor.extend(offloaded_timelines_list.into_iter());
|
||||
}
|
||||
|
||||
// Stash the preloaded tenant manifest, and upload a new manifest if changed.
|
||||
//
|
||||
@@ -3449,9 +3468,6 @@ impl TenantShard {
|
||||
.values()
|
||||
.filter(|timeline| !(timeline.is_broken() || timeline.is_stopping()));
|
||||
|
||||
// Before activation, populate each Timeline's GcInfo with information about its children
|
||||
self.initialize_gc_info(&timelines_accessor, &timelines_offloaded_accessor, None);
|
||||
|
||||
// Spawn gc and compaction loops. The loops will shut themselves
|
||||
// down when they notice that the tenant is inactive.
|
||||
tasks::start_background_loops(self, background_jobs_can_start);
|
||||
@@ -5836,10 +5852,10 @@ pub(crate) mod harness {
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::key::Key;
|
||||
use pageserver_api::models::ShardParameters;
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::shard::ShardIndex;
|
||||
use utils::id::TenantId;
|
||||
use utils::logging;
|
||||
use wal_decoder::models::record::NeonWalRecord;
|
||||
|
||||
use super::*;
|
||||
use crate::deletion_queue::mock::MockDeletionQueue;
|
||||
@@ -6094,9 +6110,6 @@ mod tests {
|
||||
#[cfg(feature = "testing")]
|
||||
use pageserver_api::keyspace::KeySpaceRandomAccum;
|
||||
use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings};
|
||||
#[cfg(feature = "testing")]
|
||||
use pageserver_api::record::NeonWalRecord;
|
||||
use pageserver_api::value::Value;
|
||||
use pageserver_compaction::helpers::overlaps_with;
|
||||
#[cfg(feature = "testing")]
|
||||
use rand::SeedableRng;
|
||||
@@ -6117,6 +6130,9 @@ mod tests {
|
||||
use timeline::{CompactOptions, DeltaLayerTestDesc, VersionedKeySpaceQuery};
|
||||
use utils::id::TenantId;
|
||||
use utils::shard::{ShardCount, ShardNumber};
|
||||
#[cfg(feature = "testing")]
|
||||
use wal_decoder::models::record::NeonWalRecord;
|
||||
use wal_decoder::models::value::Value;
|
||||
|
||||
use super::*;
|
||||
use crate::DEFAULT_PG_VERSION;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user