Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Bayandin
05a8ec269a wip 2024-09-12 21:12:44 +01:00
233 changed files with 3656 additions and 12849 deletions

View File

@@ -0,0 +1 @@
FROM neondatabase/build-tools:pinned

View File

@@ -0,0 +1,23 @@
// https://containers.dev/implementors/json_reference/
{
"name": "Neon",
"build": {
"context": "..",
"dockerfile": "Dockerfile.devcontainer"
},
"postCreateCommand": {
"build neon": "BUILD_TYPE=debug CARGO_BUILD_FLAGS='--features=testing' mold -run make -s -j`nproc`",
"install python deps": "./scripts/pysync"
},
"customizations": {
"vscode": {
"extensions": [
"charliermarsh.ruff",
"github.vscode-github-actions",
"rust-lang.rust-analyzer"
]
}
}
}

View File

@@ -13,7 +13,6 @@
# Directories # Directories
!.cargo/ !.cargo/
!.config/ !.config/
!compute/
!compute_tools/ !compute_tools/
!control_plane/ !control_plane/
!libs/ !libs/

View File

@@ -62,7 +62,7 @@ jobs:
# #
git config --global --add safe.directory ${{ github.workspace }} git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE} git config --global --add safe.directory ${GITHUB_WORKSPACE}
for r in 14 15 16 17; do for r in 14 15 16; do
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r" git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r" git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
done done
@@ -83,10 +83,6 @@ jobs:
id: pg_v16_rev id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
- name: Set pg 17 revision for caching
id: pg_v17_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
# Set some environment variables used by all the steps. # Set some environment variables used by all the steps.
# #
# CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc. # CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
@@ -140,13 +136,6 @@ jobs:
path: pg_install/v16 path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }} key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Cache postgres v17 build
id: cache_pg_17
uses: actions/cache@v4
with:
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
- name: Build postgres v14 - name: Build postgres v14
if: steps.cache_pg_14.outputs.cache-hit != 'true' if: steps.cache_pg_14.outputs.cache-hit != 'true'
run: mold -run make postgres-v14 -j$(nproc) run: mold -run make postgres-v14 -j$(nproc)
@@ -159,10 +148,6 @@ jobs:
if: steps.cache_pg_16.outputs.cache-hit != 'true' if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: mold -run make postgres-v16 -j$(nproc) run: mold -run make postgres-v16 -j$(nproc)
- name: Build postgres v17
if: steps.cache_pg_17.outputs.cache-hit != 'true'
run: mold -run make postgres-v17 -j$(nproc)
- name: Build neon extensions - name: Build neon extensions
run: mold -run make neon-pg-ext -j$(nproc) run: mold -run make neon-pg-ext -j$(nproc)
@@ -225,7 +210,7 @@ jobs:
run: | run: |
PQ_LIB_DIR=$(pwd)/pg_install/v16/lib PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
export PQ_LIB_DIR export PQ_LIB_DIR
LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
export LD_LIBRARY_PATH export LD_LIBRARY_PATH
#nextest does not yet support running doctests #nextest does not yet support running doctests

View File

@@ -52,5 +52,5 @@ jobs:
for image in ${images}; do for image in ${images}; do
docker buildx imagetools create \ docker buildx imagetools create \
-t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \ -t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \
neondatabase/${image}:${{ inputs.image_tag }} neondatabase/${image}:${{ inputs.image_tag }}
done done

View File

@@ -54,8 +54,8 @@ jobs:
build-tag: ${{steps.build-tag.outputs.tag}} build-tag: ${{steps.build-tag.outputs.tag}}
steps: steps:
# Need `fetch-depth: 0` to count the number of commits in the branch - name: Checkout
- uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -120,59 +120,6 @@ jobs:
- name: Run mypy to check types - name: Run mypy to check types
run: poetry run mypy . run: poetry run mypy .
# Check that the vendor/postgres-* submodules point to the
# corresponding REL_*_STABLE_neon branches.
check-submodules:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true
- uses: dorny/paths-filter@v3
id: check-if-submodules-changed
with:
filters: |
vendor:
- 'vendor/**'
- name: Check vendor/postgres-v14 submodule reference
if: steps.check-if-submodules-changed.outputs.vendor == 'true'
uses: jtmullen/submodule-branch-check-action@v1
with:
path: "vendor/postgres-v14"
fetch_depth: "50"
sub_fetch_depth: "50"
pass_if_unchanged: true
- name: Check vendor/postgres-v15 submodule reference
if: steps.check-if-submodules-changed.outputs.vendor == 'true'
uses: jtmullen/submodule-branch-check-action@v1
with:
path: "vendor/postgres-v15"
fetch_depth: "50"
sub_fetch_depth: "50"
pass_if_unchanged: true
- name: Check vendor/postgres-v16 submodule reference
if: steps.check-if-submodules-changed.outputs.vendor == 'true'
uses: jtmullen/submodule-branch-check-action@v1
with:
path: "vendor/postgres-v16"
fetch_depth: "50"
sub_fetch_depth: "50"
pass_if_unchanged: true
- name: Check vendor/postgres-v17 submodule reference
if: steps.check-if-submodules-changed.outputs.vendor == 'true'
uses: jtmullen/submodule-branch-check-action@v1
with:
path: "vendor/postgres-v17"
fetch_depth: "50"
sub_fetch_depth: "50"
pass_if_unchanged: true
check-codestyle-rust: check-codestyle-rust:
needs: [ check-permissions, build-build-tools-image ] needs: [ check-permissions, build-build-tools-image ]
strategy: strategy:
@@ -212,10 +159,6 @@ jobs:
# This will catch compiler & clippy warnings in all feature combinations. # This will catch compiler & clippy warnings in all feature combinations.
# TODO: use cargo hack for build and test as well, but, that's quite expensive. # TODO: use cargo hack for build and test as well, but, that's quite expensive.
# NB: keep clippy args in sync with ./run_clippy.sh # NB: keep clippy args in sync with ./run_clippy.sh
#
# The only difference between "clippy --debug" and "clippy --release" is that in --release mode,
# #[cfg(debug_assertions)] blocks are not built. It's not worth building everything for second
# time just for that, so skip "clippy --release".
- run: | - run: |
CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")" CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
if [ "$CLIPPY_COMMON_ARGS" = "" ]; then if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
@@ -225,6 +168,8 @@ jobs:
echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
- name: Run cargo clippy (debug) - name: Run cargo clippy (debug)
run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
- name: Run cargo clippy (release)
run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
- name: Check documentation generation - name: Check documentation generation
run: cargo doc --workspace --no-deps --document-private-items run: cargo doc --workspace --no-deps --document-private-items
@@ -266,7 +211,7 @@ jobs:
build-tag: ${{ needs.tag.outputs.build-tag }} build-tag: ${{ needs.tag.outputs.build-tag }}
build-type: ${{ matrix.build-type }} build-type: ${{ matrix.build-type }}
# Run tests on all Postgres versions in release builds and only on the latest version in debug builds # Run tests on all Postgres versions in release builds and only on the latest version in debug builds
pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16", "v17"]' || '["v17"]' }} pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16"]' || '["v16"]' }}
secrets: inherit secrets: inherit
# Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking # Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
@@ -412,7 +357,6 @@ jobs:
}) })
coverage-report: coverage-report:
if: ${{ !startsWith(github.ref_name, 'release') }}
needs: [ check-permissions, build-build-tools-image, build-and-test-locally ] needs: [ check-permissions, build-build-tools-image, build-and-test-locally ]
runs-on: [ self-hosted, small ] runs-on: [ self-hosted, small ]
container: container:
@@ -429,8 +373,8 @@ jobs:
coverage-html: ${{ steps.upload-coverage-report-new.outputs.report-url }} coverage-html: ${{ steps.upload-coverage-report-new.outputs.report-url }}
coverage-json: ${{ steps.upload-coverage-report-new.outputs.summary-json }} coverage-json: ${{ steps.upload-coverage-report-new.outputs.summary-json }}
steps: steps:
# Need `fetch-depth: 0` for differential coverage (to get diff between two commits) - name: Checkout
- uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
submodules: true submodules: true
fetch-depth: 0 fetch-depth: 0
@@ -531,9 +475,11 @@ jobs:
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
steps: steps:
- uses: actions/checkout@v4 - name: Checkout
uses: actions/checkout@v4
with: with:
submodules: true submodules: true
fetch-depth: 0
- uses: ./.github/actions/set-docker-config-dir - uses: ./.github/actions/set-docker-config-dir
- uses: docker/setup-buildx-action@v3 - uses: docker/setup-buildx-action@v3
@@ -602,28 +548,17 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
version: version: [ v14, v15, v16 ]
# Much data was already generated on old PG versions with bullseye's
# libraries, the locales of which can cause data incompatibilities.
# However, new PG versions should check if they can be built on newer
# images, as that reduces the support burden of old and ancient
# distros.
- pg: v14
debian: bullseye-slim
- pg: v15
debian: bullseye-slim
- pg: v16
debian: bullseye-slim
- pg: v17
debian: bookworm-slim
arch: [ x64, arm64 ] arch: [ x64, arm64 ]
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
steps: steps:
- uses: actions/checkout@v4 - name: Checkout
uses: actions/checkout@v4
with: with:
submodules: true submodules: true
fetch-depth: 0
- uses: ./.github/actions/set-docker-config-dir - uses: ./.github/actions/set-docker-config-dir
- uses: docker/setup-buildx-action@v3 - uses: docker/setup-buildx-action@v3
@@ -658,46 +593,41 @@ jobs:
context: . context: .
build-args: | build-args: |
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }} GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
PG_VERSION=${{ matrix.version.pg }} PG_VERSION=${{ matrix.version }}
BUILD_TAG=${{ needs.tag.outputs.build-tag }} BUILD_TAG=${{ needs.tag.outputs.build-tag }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }} TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
DEBIAN_FLAVOR=${{ matrix.version.debian }}
provenance: false provenance: false
push: true push: true
pull: true pull: true
file: compute/Dockerfile.compute-node file: Dockerfile.compute-node
cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.arch }} cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version }}:cache-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version.pg, matrix.arch) || '' }} cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
tags: | tags: |
neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }} neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
- name: Build neon extensions test image - name: Build neon extensions test image
if: matrix.version.pg == 'v16' if: matrix.version == 'v16'
uses: docker/build-push-action@v6 uses: docker/build-push-action@v6
with: with:
context: . context: .
build-args: | build-args: |
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }} GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
PG_VERSION=${{ matrix.version.pg }} PG_VERSION=${{ matrix.version }}
BUILD_TAG=${{ needs.tag.outputs.build-tag }} BUILD_TAG=${{ needs.tag.outputs.build-tag }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }} TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
DEBIAN_FLAVOR=${{ matrix.version.debian }}
provenance: false provenance: false
push: true push: true
pull: true pull: true
file: compute/Dockerfile.compute-node file: Dockerfile.compute-node
target: neon-pg-ext-test target: neon-pg-ext-test
cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.arch }} cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }}
cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version.pg, matrix.arch) || '' }} cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
tags: | tags: |
neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }} neondatabase/neon-test-extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}-${{ matrix.arch }}
- name: Build compute-tools image - name: Build compute-tools image
# compute-tools are Postgres independent, so build it only once # compute-tools are Postgres independent, so build it only once
# We pick 16, because that builds on debian 11 with older glibc (and is if: matrix.version == 'v16'
# thus compatible with newer glibc), rather than 17 on Debian 12, as
# that isn't guaranteed to be compatible with Debian 11
if: matrix.version.pg == 'v16'
uses: docker/build-push-action@v6 uses: docker/build-push-action@v6
with: with:
target: compute-tools-image target: compute-tools-image
@@ -706,11 +636,10 @@ jobs:
GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }} GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
BUILD_TAG=${{ needs.tag.outputs.build-tag }} BUILD_TAG=${{ needs.tag.outputs.build-tag }}
TAG=${{ needs.build-build-tools-image.outputs.image-tag }} TAG=${{ needs.build-build-tools-image.outputs.image-tag }}
DEBIAN_FLAVOR=${{ matrix.version.debian }}
provenance: false provenance: false
push: true push: true
pull: true pull: true
file: compute/Dockerfile.compute-node file: Dockerfile.compute-node
tags: | tags: |
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }} neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}
@@ -720,7 +649,7 @@ jobs:
strategy: strategy:
matrix: matrix:
version: [ v14, v15, v16, v17 ] version: [ v14, v15, v16 ]
steps: steps:
- uses: docker/login-action@v3 - uses: docker/login-action@v3
@@ -742,7 +671,7 @@ jobs:
neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64 neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64
- name: Create multi-arch compute-tools image - name: Create multi-arch compute-tools image
if: matrix.version == 'v17' if: matrix.version == 'v16'
run: | run: |
docker buildx imagetools create -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} \ docker buildx imagetools create -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} \
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-x64 \ neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-x64 \
@@ -760,7 +689,7 @@ jobs:
neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
- name: Push multi-arch compute-tools image to ECR - name: Push multi-arch compute-tools image to ECR
if: matrix.version == 'v17' if: matrix.version == 'v16'
run: | run: |
docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }} \ docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }} \
neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}
@@ -771,12 +700,15 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
version: [ v14, v15, v16, v17 ] version: [ v14, v15, v16 ]
env: env:
VM_BUILDER_VERSION: v0.29.3 VM_BUILDER_VERSION: v0.29.3
steps: steps:
- uses: actions/checkout@v4 - name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Downloading vm-builder - name: Downloading vm-builder
run: | run: |
@@ -798,7 +730,7 @@ jobs:
- name: Build vm image - name: Build vm image
run: | run: |
./vm-builder \ ./vm-builder \
-spec=compute/vm-image-spec.yaml \ -spec=vm-image-spec.yaml \
-src=neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \ -src=neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
-dst=neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} -dst=neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}
@@ -816,7 +748,10 @@ jobs:
runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }} runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}
steps: steps:
- uses: actions/checkout@v4 - name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: ./.github/actions/set-docker-config-dir - uses: ./.github/actions/set-docker-config-dir
- uses: docker/login-action@v3 - uses: docker/login-action@v3
@@ -862,11 +797,8 @@ jobs:
needs: [ check-permissions, tag, test-images, vm-compute-node-image ] needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
permissions:
id-token: write # for `aws-actions/configure-aws-credentials`
env: env:
VERSIONS: v14 v15 v16 v17 VERSIONS: v14 v15 v16
steps: steps:
- uses: docker/login-action@v3 - uses: docker/login-action@v3
@@ -907,26 +839,20 @@ jobs:
done done
done done
docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \ docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }} neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
- name: Configure AWS-prod credentials
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
mask-aws-account-id: true
role-to-assume: ${{ secrets.PROD_GHA_OIDC_ROLE }}
- name: Login to prod ECR - name: Login to prod ECR
uses: docker/login-action@v3 uses: docker/login-action@v3
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
with: with:
registry: 093970136003.dkr.ecr.eu-central-1.amazonaws.com registry: 093970136003.dkr.ecr.eu-central-1.amazonaws.com
username: ${{ secrets.PROD_GHA_RUNNER_LIMITED_AWS_ACCESS_KEY_ID }}
password: ${{ secrets.PROD_GHA_RUNNER_LIMITED_AWS_SECRET_ACCESS_KEY }}
- name: Copy all images to prod ECR - name: Copy all images to prod ECR
if: github.ref_name == 'release'|| github.ref_name == 'release-proxy' if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
run: | run: |
for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16,v17}; do for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16}; do
docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \ docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \
369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
done done
@@ -938,7 +864,7 @@ jobs:
with: with:
client_id: ${{ vars.AZURE_DEV_CLIENT_ID }} client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
image_tag: ${{ needs.tag.outputs.build-tag }} image_tag: ${{ needs.tag.outputs.build-tag }}
images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17 images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }} registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }} subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
tenant_id: ${{ vars.AZURE_TENANT_ID }} tenant_id: ${{ vars.AZURE_TENANT_ID }}
@@ -950,7 +876,7 @@ jobs:
with: with:
client_id: ${{ vars.AZURE_PROD_CLIENT_ID }} client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
image_tag: ${{ needs.tag.outputs.build-tag }} image_tag: ${{ needs.tag.outputs.build-tag }}
images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17 images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }} registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }} subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
tenant_id: ${{ vars.AZURE_TENANT_ID }} tenant_id: ${{ vars.AZURE_TENANT_ID }}
@@ -1031,7 +957,6 @@ jobs:
deploy: deploy:
needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ] needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
# `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy') && !failure() && !cancelled() if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy') && !failure() && !cancelled()
runs-on: [ self-hosted, small ] runs-on: [ self-hosted, small ]
@@ -1046,12 +971,15 @@ jobs:
# #
git config --global --add safe.directory ${{ github.workspace }} git config --global --add safe.directory ${{ github.workspace }}
git config --global --add safe.directory ${GITHUB_WORKSPACE} git config --global --add safe.directory ${GITHUB_WORKSPACE}
for r in 14 15 16 17; do for r in 14 15 16; do
git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r" git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r" git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
done done
- uses: actions/checkout@v4 - name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Trigger deploy workflow - name: Trigger deploy workflow
env: env:
@@ -1130,8 +1058,7 @@ jobs:
# The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
promote-compatibility-data: promote-compatibility-data:
needs: [ deploy ] needs: [ deploy ]
# `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod` if: github.ref_name == 'release'
if: github.ref_name == 'release' && !failure() && !cancelled()
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
steps: steps:
@@ -1190,7 +1117,6 @@ jobs:
files_to_promote+=("s3://${BUCKET}/${s3_key}") files_to_promote+=("s3://${BUCKET}/${s3_key}")
# TODO Add v17
for pg_version in v14 v15 v16; do for pg_version in v14 v15 v16; do
# We run less tests for debug builds, so we don't need to promote them # We run less tests for debug builds, so we don't need to promote them
if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then

View File

@@ -1,102 +0,0 @@
name: Cloud Regression Test
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '45 1 * * *' # run once a day, timezone is utc
workflow_dispatch: # adds ability to run this manually
defaults:
run:
shell: bash -euxo pipefail {0}
concurrency:
# Allow only one workflow
group: ${{ github.workflow }}
cancel-in-progress: true
jobs:
regress:
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
DEFAULT_PG_VERSION: 16
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
runs-on: us-east-2
container:
image: neondatabase/build-tools:pinned
options: --init
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Patch the test
run: |
cd "vendor/postgres-v${DEFAULT_PG_VERSION}"
patch -p1 < "../../compute/patches/cloud_regress_pg${DEFAULT_PG_VERSION}.patch"
- name: Generate a random password
id: pwgen
run: |
set +x
DBPASS=$(dd if=/dev/random bs=48 count=1 2>/dev/null | base64)
echo "::add-mask::${DBPASS//\//}"
echo DBPASS="${DBPASS//\//}" >> "${GITHUB_OUTPUT}"
- name: Change tests according to the generated password
env:
DBPASS: ${{ steps.pwgen.outputs.DBPASS }}
run: |
cd vendor/postgres-v"${DEFAULT_PG_VERSION}"/src/test/regress
for fname in sql/*.sql expected/*.out; do
sed -i.bak s/NEON_PASSWORD_PLACEHOLDER/"'${DBPASS}'"/ "${fname}"
done
for ph in $(grep NEON_MD5_PLACEHOLDER expected/password.out | awk '{print $3;}' | sort | uniq); do
USER=$(echo "${ph}" | cut -c 22-)
MD5=md5$(echo -n "${DBPASS}${USER}" | md5sum | awk '{print $1;}')
sed -i.bak "s/${ph}/${MD5}/" expected/password.out
done
- name: Download Neon artifact
uses: ./.github/actions/download
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
- name: Run the regression tests
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
test_selection: cloud_regress
pg_version: ${{ env.DEFAULT_PG_VERSION }}
extra_params: -m remote_cluster
env:
BENCHMARK_CONNSTR: ${{ secrets.PG_REGRESS_CONNSTR }}
- name: Create Allure report
id: create-allure-report
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@v1
with:
channel-id: "C033QLM5P7D" # on-call-staging-stream
slack-message: |
Periodic pg_regress on staging: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
<${{ steps.create-allure-report.outputs.report-url }}|Allure report>
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -72,10 +72,6 @@ jobs:
id: pg_v16_rev id: pg_v16_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
- name: Set pg 17 revision for caching
id: pg_v17_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
- name: Cache postgres v14 build - name: Cache postgres v14 build
id: cache_pg_14 id: cache_pg_14
uses: actions/cache@v4 uses: actions/cache@v4
@@ -97,13 +93,6 @@ jobs:
path: pg_install/v16 path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }} key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v17 build
id: cache_pg_17
uses: actions/cache@v4
with:
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Set extra env for macOS - name: Set extra env for macOS
run: | run: |
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
@@ -131,10 +120,6 @@ jobs:
if: steps.cache_pg_16.outputs.cache-hit != 'true' if: steps.cache_pg_16.outputs.cache-hit != 'true'
run: make postgres-v16 -j$(sysctl -n hw.ncpu) run: make postgres-v16 -j$(sysctl -n hw.ncpu)
- name: Build postgres v17
if: steps.cache_pg_17.outputs.cache-hit != 'true'
run: make postgres-v17 -j$(sysctl -n hw.ncpu)
- name: Build neon extensions - name: Build neon extensions
run: make neon-pg-ext -j$(sysctl -n hw.ncpu) run: make neon-pg-ext -j$(sysctl -n hw.ncpu)
@@ -181,7 +166,7 @@ jobs:
run: make walproposer-lib -j$(nproc) run: make walproposer-lib -j$(nproc)
- name: Produce the build stats - name: Produce the build stats
run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc) run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release --timings -j$(nproc)
- name: Upload the build stats - name: Upload the build stats
id: upload-stats id: upload-stats

View File

@@ -34,8 +34,8 @@ jobs:
build-tag: ${{ steps.build-tag.outputs.tag }} build-tag: ${{ steps.build-tag.outputs.tag }}
steps: steps:
# Need `fetch-depth: 0` to count the number of commits in the branch - name: Checkout
- uses: actions/checkout@v4 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
@@ -102,12 +102,12 @@ jobs:
# Default set of platforms to run e2e tests on # Default set of platforms to run e2e tests on
platforms='["docker", "k8s"]' platforms='["docker", "k8s"]'
# If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or compute/Dockerfile.compute-node, add k8s-neonvm to the list of platforms. # If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
# If the workflow run is not a pull request, add k8s-neonvm to the list. # If the workflow run is not a pull request, add k8s-neonvm to the list.
if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
case "$f" in case "$f" in
vendor/*|pgxn/*|libs/vm_monitor/*|compute/Dockerfile.compute-node) vendor/*|pgxn/*|libs/vm_monitor/*|Dockerfile.compute-node)
platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique') platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
;; ;;
*) *)

4
.gitmodules vendored
View File

@@ -10,7 +10,3 @@
path = vendor/postgres-v16 path = vendor/postgres-v16
url = https://github.com/neondatabase/postgres.git url = https://github.com/neondatabase/postgres.git
branch = REL_16_STABLE_neon branch = REL_16_STABLE_neon
[submodule "vendor/postgres-v17"]
path = vendor/postgres-v17
url = https://github.com/neondatabase/postgres.git
branch = REL_17_STABLE_neon

461
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -76,6 +76,8 @@ clap = { version = "4.0", features = ["derive"] }
comfy-table = "7.1" comfy-table = "7.1"
const_format = "0.2" const_format = "0.2"
crc32c = "0.6" crc32c = "0.6"
crossbeam-deque = "0.8.5"
crossbeam-utils = "0.8.5"
dashmap = { version = "5.5.0", features = ["raw-api"] } dashmap = { version = "5.5.0", features = ["raw-api"] }
either = "1.8" either = "1.8"
enum-map = "2.4.2" enum-map = "2.4.2"
@@ -93,7 +95,7 @@ hdrhistogram = "7.5.2"
hex = "0.4" hex = "0.4"
hex-literal = "0.4" hex-literal = "0.4"
hmac = "0.12.1" hmac = "0.12.1"
hostname = "0.4" hostname = "0.3.1"
http = {version = "1.1.0", features = ["std"]} http = {version = "1.1.0", features = ["std"]}
http-types = { version = "2", default-features = false } http-types = { version = "2", default-features = false }
humantime = "2.1" humantime = "2.1"
@@ -102,6 +104,7 @@ hyper = "0.14"
tokio-tungstenite = "0.20.0" tokio-tungstenite = "0.20.0"
indexmap = "2" indexmap = "2"
indoc = "2" indoc = "2"
inotify = "0.10.2"
ipnet = "2.9.0" ipnet = "2.9.0"
itertools = "0.10" itertools = "0.10"
jsonwebtoken = "9" jsonwebtoken = "9"
@@ -110,7 +113,7 @@ libc = "0.2"
md5 = "0.7.0" md5 = "0.7.0"
measured = { version = "0.0.22", features=["lasso"] } measured = { version = "0.0.22", features=["lasso"] }
measured-process = { version = "0.0.22" } measured-process = { version = "0.0.22" }
memoffset = "0.9" memoffset = "0.8"
nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] } nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] }
notify = "6.0.0" notify = "6.0.0"
num_cpus = "1.15" num_cpus = "1.15"
@@ -139,6 +142,7 @@ rpds = "0.13"
rustc-hash = "1.1.0" rustc-hash = "1.1.0"
rustls = "0.22" rustls = "0.22"
rustls-pemfile = "2" rustls-pemfile = "2"
rustls-split = "0.3"
scopeguard = "1.1" scopeguard = "1.1"
sysinfo = "0.29.2" sysinfo = "0.29.2"
sd-notify = "0.4.1" sd-notify = "0.4.1"
@@ -160,6 +164,7 @@ strum_macros = "0.26"
svg_fmt = "0.4.3" svg_fmt = "0.4.3"
sync_wrapper = "0.1.2" sync_wrapper = "0.1.2"
tar = "0.4" tar = "0.4"
task-local-extensions = "0.1.4"
test-context = "0.3" test-context = "0.3"
thiserror = "1.0" thiserror = "1.0"
tikv-jemallocator = "0.5" tikv-jemallocator = "0.5"

View File

@@ -5,8 +5,6 @@
ARG REPOSITORY=neondatabase ARG REPOSITORY=neondatabase
ARG IMAGE=build-tools ARG IMAGE=build-tools
ARG TAG=pinned ARG TAG=pinned
ARG DEFAULT_PG_VERSION=17
ARG STABLE_PG_VERSION=16
# Build Postgres # Build Postgres
FROM $REPOSITORY/$IMAGE:$TAG AS pg-build FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
@@ -15,7 +13,6 @@ WORKDIR /home/nonroot
COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14 COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15 COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16 COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
COPY --chown=nonroot pgxn pgxn COPY --chown=nonroot pgxn pgxn
COPY --chown=nonroot Makefile Makefile COPY --chown=nonroot Makefile Makefile
COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
@@ -31,19 +28,16 @@ FROM $REPOSITORY/$IMAGE:$TAG AS build
WORKDIR /home/nonroot WORKDIR /home/nonroot
ARG GIT_VERSION=local ARG GIT_VERSION=local
ARG BUILD_TAG ARG BUILD_TAG
ARG STABLE_PG_VERSION
COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib COPY --from=pg-build /home/nonroot/pg_install/v16/lib pg_install/v16/lib
COPY --from=pg-build /home/nonroot/pg_install/v17/lib pg_install/v17/lib
COPY --chown=nonroot . . COPY --chown=nonroot . .
ARG ADDITIONAL_RUSTFLAGS ARG ADDITIONAL_RUSTFLAGS
RUN set -e \ RUN set -e \
&& PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \ && PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
--bin pg_sni_router \ --bin pg_sni_router \
--bin pageserver \ --bin pageserver \
--bin pagectl \ --bin pagectl \
@@ -58,7 +52,6 @@ RUN set -e \
# Build final image # Build final image
# #
FROM debian:bullseye-slim FROM debian:bullseye-slim
ARG DEFAULT_PG_VERSION
WORKDIR /data WORKDIR /data
RUN set -e \ RUN set -e \
@@ -84,7 +77,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubbe
COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/ COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/ COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/ COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
COPY --from=pg-build /home/nonroot/pg_install/v17 /usr/local/v17/
COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/ COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
# By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config. # By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
@@ -101,7 +93,7 @@ RUN mkdir -p /data/.neon/ && \
# When running a binary that links with libpq, default to using our most recent postgres version. Binaries # When running a binary that links with libpq, default to using our most recent postgres version. Binaries
# that want a particular postgres version will select it explicitly: this is just a default. # that want a particular postgres version will select it explicitly: this is just a default.
ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib ENV LD_LIBRARY_PATH=/usr/local/v16/lib
VOLUME ["/data"] VOLUME ["/data"]

View File

@@ -3,15 +3,13 @@ ARG REPOSITORY=neondatabase
ARG IMAGE=build-tools ARG IMAGE=build-tools
ARG TAG=pinned ARG TAG=pinned
ARG BUILD_TAG ARG BUILD_TAG
ARG DEBIAN_FLAVOR=bullseye-slim
######################################################################################### #########################################################################################
# #
# Layer "build-deps" # Layer "build-deps"
# #
######################################################################################### #########################################################################################
FROM debian:$DEBIAN_FLAVOR AS build-deps FROM debian:bullseye-slim AS build-deps
ARG DEBIAN_FLAVOR
RUN apt update && \ RUN apt update && \
apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \ apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \ zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
@@ -57,27 +55,22 @@ RUN cd postgres && \
# We could add the additional grant statements to the postgres repository but it would be hard to maintain, # We could add the additional grant statements to the postgres repository but it would be hard to maintain,
# whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork, # whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
# so we do it here. # so we do it here.
old_list="pg_stat_statements--1.0--1.1.sql pg_stat_statements--1.1--1.2.sql pg_stat_statements--1.2--1.3.sql pg_stat_statements--1.3--1.4.sql pg_stat_statements--1.4--1.5.sql pg_stat_statements--1.4.sql pg_stat_statements--1.5--1.6.sql"; \
# the first loop is for pg_stat_statement extension version <= 1.6
for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \ for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
filename=$(basename "$file"); \ filename=$(basename "$file"); \
# Note that there are no downgrade scripts for pg_stat_statements, so we \ if echo "$old_list" | grep -q -F "$filename"; then \
# don't have to modify any downgrade paths or (much) older versions: we only \
# have to make sure every creation of the pg_stat_statements_reset function \
# also adds execute permissions to the neon_superuser.
case $filename in \
pg_stat_statements--1.4.sql) \
# pg_stat_statements_reset is first created with 1.4
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \ echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
;; \ fi; \
pg_stat_statements--1.6--1.7.sql) \ done; \
# Then with the 1.6-1.7 migration it is re-created with a new signature, thus add the permissions back # the second loop is for pg_stat_statement extension versions >= 1.7,
# where pg_stat_statement_reset() got 3 additional arguments
for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
filename=$(basename "$file"); \
if ! echo "$old_list" | grep -q -F "$filename"; then \
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \ echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
;; \ fi; \
pg_stat_statements--1.10--1.11.sql) \ done
# Then with the 1.10-1.11 migration it is re-created with a new signature again, thus add the permissions back
echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO neon_superuser;' >> $file; \
;; \
esac; \
done;
######################################################################################### #########################################################################################
# #
@@ -86,7 +79,6 @@ RUN cd postgres && \
# #
######################################################################################### #########################################################################################
FROM build-deps AS postgis-build FROM build-deps AS postgis-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt update && \ RUN apt update && \
apt install -y cmake gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \ apt install -y cmake gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
@@ -95,11 +87,7 @@ RUN apt update && \
protobuf-c-compiler xsltproc protobuf-c-compiler xsltproc
# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2 # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
mkdir -p /sfcgal && \
echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
esac && \
wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \ echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \ mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \ cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -108,10 +96,7 @@ RUN case "${PG_VERSION}" in "v17") \
ENV PATH="/usr/local/pgsql/bin:$PATH" ENV PATH="/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
esac && \
wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \ echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \ mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\ find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
@@ -137,10 +122,7 @@ RUN case "${PG_VERSION}" in "v17") \
cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \ cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \
cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \ echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \ mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
mkdir build && cd build && \ mkdir build && cd build && \
@@ -160,19 +142,12 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS plv8-build FROM build-deps AS plv8-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN apt update && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
apt update && \
apt install -y ninja-build python3-dev libncurses5 binutils clang apt install -y ninja-build python3-dev libncurses5 binutils clang
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \ echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \ mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
# generate and copy upgrade scripts # generate and copy upgrade scripts
@@ -197,13 +172,9 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS h3-pg-build FROM build-deps AS h3-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN case "$(uname -m)" in \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
case "$(uname -m)" in \
"x86_64") \ "x86_64") \
export CMAKE_CHECKSUM=739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 \ export CMAKE_CHECKSUM=739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 \
;; \ ;; \
@@ -221,11 +192,7 @@ RUN case "${PG_VERSION}" in "v17") \
&& /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \ && /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
&& rm /tmp/cmake-install.sh && rm /tmp/cmake-install.sh
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
mkdir -p /h3/usr/ && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \ echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \ mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
mkdir build && cd build && \ mkdir build && cd build && \
@@ -235,10 +202,7 @@ RUN case "${PG_VERSION}" in "v17") \
cp -R /h3/usr / && \ cp -R /h3/usr / && \
rm -rf build rm -rf build
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \ echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \ mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
export PATH="/usr/local/pgsql/bin:$PATH" && \ export PATH="/usr/local/pgsql/bin:$PATH" && \
@@ -254,13 +218,9 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS unit-pg-build FROM build-deps AS unit-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \ echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \ mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -279,18 +239,14 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS vector-pg-build FROM build-deps AS vector-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY compute/patches/pgvector.patch /pgvector.patch COPY patches/pgvector.patch /pgvector.patch
# By default, pgvector Makefile uses `-march=native`. We don't want that, # By default, pgvector Makefile uses `-march=native`. We don't want that,
# because we build the images on different machines than where we run them. # because we build the images on different machines than where we run them.
# Pass OPTFLAGS="" to remove it. # Pass OPTFLAGS="" to remove it.
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \ echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \
mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \ mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
patch -p1 < /pgvector.patch && \ patch -p1 < /pgvector.patch && \
@@ -305,14 +261,10 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS pgjwt-pg-build FROM build-deps AS pgjwt-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021 # 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \ echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \ mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -325,13 +277,9 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS hypopg-pg-build FROM build-deps AS hypopg-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \ echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \ mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -345,13 +293,9 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS pg-hashids-pg-build FROM build-deps AS pg-hashids-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \ echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \ mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -365,15 +309,11 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS rum-pg-build FROM build-deps AS rum-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY compute/patches/rum.patch /rum.patch COPY patches/rum.patch /rum.patch
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \ echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \ mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
patch -p1 < /rum.patch && \ patch -p1 < /rum.patch && \
@@ -388,13 +328,9 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS pgtap-pg-build FROM build-deps AS pgtap-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \ echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \ mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -408,13 +344,9 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS ip4r-pg-build FROM build-deps AS ip4r-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \ echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \ mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -428,13 +360,9 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS prefix-pg-build FROM build-deps AS prefix-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \ echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \ mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -448,13 +376,9 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS hll-pg-build FROM build-deps AS hll-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \ echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \ mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -468,13 +392,9 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS plpgsql-check-pg-build FROM build-deps AS plpgsql-check-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \ echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \ mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -493,10 +413,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ARG PG_VERSION ARG PG_VERSION
ENV PATH="/usr/local/pgsql/bin:$PATH" ENV PATH="/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN case "${PG_VERSION}" in \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
case "${PG_VERSION}" in \
"v14" | "v15") \ "v14" | "v15") \
export TIMESCALEDB_VERSION=2.10.1 \ export TIMESCALEDB_VERSION=2.10.1 \
export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \ export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
@@ -529,10 +446,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ARG PG_VERSION ARG PG_VERSION
ENV PATH="/usr/local/pgsql/bin:$PATH" ENV PATH="/usr/local/pgsql/bin:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN case "${PG_VERSION}" in \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
case "${PG_VERSION}" in \
"v14") \ "v14") \
export PG_HINT_PLAN_VERSION=14_1_4_1 \ export PG_HINT_PLAN_VERSION=14_1_4_1 \
export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \ export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
@@ -545,9 +459,6 @@ RUN case "${PG_VERSION}" in "v17") \
export PG_HINT_PLAN_VERSION=16_1_6_0 \ export PG_HINT_PLAN_VERSION=16_1_6_0 \
export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \ export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \
;; \ ;; \
"v17") \
echo "TODO: PG17 pg_hint_plan support" && exit 0 \
;; \
*) \ *) \
echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \ echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
;; \ ;; \
@@ -567,14 +478,10 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS pg-cron-pg-build FROM build-deps AS pg-cron-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \ echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \ mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -588,13 +495,9 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS rdkit-pg-build FROM build-deps AS rdkit-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN case "${PG_VERSION}" in "v17") \ RUN apt-get update && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
apt-get update && \
apt-get install -y \ apt-get install -y \
cmake \ cmake \
libboost-iostreams1.74-dev \ libboost-iostreams1.74-dev \
@@ -604,10 +507,7 @@ RUN case "${PG_VERSION}" in "v17") \
libeigen3-dev libeigen3-dev
ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH" ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \ echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \ mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
cmake \ cmake \
@@ -644,14 +544,10 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS pg-uuidv7-pg-build FROM build-deps AS pg-uuidv7-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \ echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \ mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -665,14 +561,10 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS pg-roaringbitmap-pg-build FROM build-deps AS pg-roaringbitmap-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
echo "v17 extensions is not supported yet by pg_roaringbitmap. Quit" && exit 0;; \
esac && \
wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \ echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \ mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -686,14 +578,10 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS pg-semver-pg-build FROM build-deps AS pg-semver-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
echo "v17 is not supported yet by pg_semver. Quit" && exit 0;; \
esac && \
wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \ echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \ mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -732,14 +620,10 @@ RUN case "${PG_VERSION}" in \
# #
######################################################################################### #########################################################################################
FROM build-deps AS pg-anon-pg-build FROM build-deps AS pg-anon-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
esac && \
wget https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \ echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9 pg_anon.tar.gz" | sha256sum --check && \
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \ mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\ find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
@@ -757,7 +641,6 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS rust-extensions-build FROM build-deps AS rust-extensions-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
RUN apt-get update && \ RUN apt-get update && \
@@ -768,11 +651,9 @@ ENV HOME=/home/nonroot
ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH" ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
USER nonroot USER nonroot
WORKDIR /home/nonroot WORKDIR /home/nonroot
ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
echo "v17 is not supported yet by pgrx. Quit" && exit 0;; \
esac && \
curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
chmod +x rustup-init && \ chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
rm rustup-init && \ rm rustup-init && \
@@ -791,10 +672,7 @@ USER root
FROM rust-extensions-build AS pg-jsonschema-pg-build FROM rust-extensions-build AS pg-jsonschema-pg-build
ARG PG_VERSION ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
echo "pg_jsonschema does not yet have a release that supports pg17" && exit 0;; \
esac && \
wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
echo "61df3db1ed83cf24f6aa39c826f8818bfa4f0bd33b587fd6b2b1747985642297 pg_jsonschema.tar.gz" | sha256sum --check && \ echo "61df3db1ed83cf24f6aa39c826f8818bfa4f0bd33b587fd6b2b1747985642297 pg_jsonschema.tar.gz" | sha256sum --check && \
mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \ mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
# see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8 # see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8
@@ -816,10 +694,7 @@ RUN case "${PG_VERSION}" in "v17") \
FROM rust-extensions-build AS pg-graphql-pg-build FROM rust-extensions-build AS pg-graphql-pg-build
ARG PG_VERSION ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
echo "pg_graphql does not yet have a release that supports pg17 as of now" && exit 0;; \
esac && \
wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
echo "2b3e567a5b31019cb97ae0e33263c1bcc28580be5a444ac4c8ece5c4be2aea41 pg_graphql.tar.gz" | sha256sum --check && \ echo "2b3e567a5b31019cb97ae0e33263c1bcc28580be5a444ac4c8ece5c4be2aea41 pg_graphql.tar.gz" | sha256sum --check && \
mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \ mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \ sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -839,10 +714,7 @@ FROM rust-extensions-build AS pg-tiktoken-pg-build
ARG PG_VERSION ARG PG_VERSION
# 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023 # 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
echo "pg_tiktoken does not have versions, nor support for pg17" && exit 0;; \
esac && \
wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \ echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \ mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
# TODO update pgrx version in the pg_tiktoken repo and remove this line # TODO update pgrx version in the pg_tiktoken repo and remove this line
@@ -861,10 +733,7 @@ RUN case "${PG_VERSION}" in "v17") \
FROM rust-extensions-build AS pg-pgx-ulid-build FROM rust-extensions-build AS pg-pgx-ulid-build
ARG PG_VERSION ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
echo "pgx_ulid does not support pg17 as of the latest version (0.1.5)" && exit 0;; \
esac && \
wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \ echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \ mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
sed -i 's/pgrx = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \ sed -i 's/pgrx = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -879,14 +748,10 @@ RUN case "${PG_VERSION}" in "v17") \
######################################################################################### #########################################################################################
FROM build-deps AS wal2json-pg-build FROM build-deps AS wal2json-pg-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
echo "We'll need to update wal2json to 2.6+ for pg17 support" && exit 0;; \
esac && \
wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \ echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \ mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -899,14 +764,10 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS pg-ivm-build FROM build-deps AS pg-ivm-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
echo "We'll need to update pg_ivm to 1.9+ for pg17 support" && exit 0;; \
esac && \
wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \ echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \ mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -920,14 +781,10 @@ RUN case "${PG_VERSION}" in "v17") \
# #
######################################################################################### #########################################################################################
FROM build-deps AS pg-partman-build FROM build-deps AS pg-partman-build
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
ENV PATH="/usr/local/pgsql/bin/:$PATH" ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN case "${PG_VERSION}" in "v17") \ RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
echo "pg_partman doesn't support PG17 yet" && exit 0;; \
esac && \
wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \ echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \ mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
make -j $(getconf _NPROCESSORS_ONLN) && \ make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -997,8 +854,8 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
case "${PG_VERSION}" in \ case "${PG_VERSION}" in \
"v14" | "v15") \ "v14" | "v15") \
;; \ ;; \
"v16" | "v17") \ "v16") \
echo "Skipping HNSW for PostgreSQL ${PG_VERSION}" && exit 0 \ echo "Skipping HNSW for PostgreSQL 16" && exit 0 \
;; \ ;; \
*) \ *) \
echo "unexpected PostgreSQL version" && exit 1 \ echo "unexpected PostgreSQL version" && exit 1 \
@@ -1029,47 +886,10 @@ RUN cd compute_tools && mold -run cargo build --locked --profile release-line-de
# #
######################################################################################### #########################################################################################
FROM debian:$DEBIAN_FLAVOR AS compute-tools-image FROM debian:bullseye-slim AS compute-tools-image
ARG DEBIAN_FLAVOR
COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
#########################################################################################
#
# Layer "pgbouncer"
#
#########################################################################################
FROM debian:$DEBIAN_FLAVOR AS pgbouncer
ARG DEBIAN_FLAVOR
RUN set -e \
&& apt-get update \
&& apt-get install -y \
build-essential \
git \
libevent-dev \
libtool \
pkg-config
# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
ENV PGBOUNCER_TAG=pgbouncer_1_22_1
RUN set -e \
&& git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
&& cd pgbouncer \
&& ./autogen.sh \
&& LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \
&& make -j $(nproc) dist_man_MANS= \
&& make install dist_man_MANS=
#########################################################################################
#
# Layers "postgres-exporter" and "sql-exporter"
#
#########################################################################################
FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter
FROM burningalchemist/sql_exporter:0.13 AS sql-exporter
######################################################################################### #########################################################################################
# #
# Clean up postgres folder before inclusion # Clean up postgres folder before inclusion
@@ -1079,7 +899,7 @@ FROM neon-pg-ext-build AS postgres-cleanup-layer
COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql
# Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise) # Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
RUN cd /usr/local/pgsql/bin && rm -f ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
# Remove headers that we won't need anymore - we've completed installation of all extensions # Remove headers that we won't need anymore - we've completed installation of all extensions
RUN rm -r /usr/local/pgsql/include RUN rm -r /usr/local/pgsql/include
@@ -1098,10 +918,7 @@ RUN rm /usr/local/pgsql/lib/lib*.a
FROM neon-pg-ext-build AS neon-pg-ext-test FROM neon-pg-ext-build AS neon-pg-ext-test
ARG PG_VERSION ARG PG_VERSION
RUN case "${PG_VERSION}" in "v17") \ RUN mkdir /ext-src
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
mkdir /ext-src
#COPY --from=postgis-build /postgis.tar.gz /ext-src/ #COPY --from=postgis-build /postgis.tar.gz /ext-src/
#COPY --from=postgis-build /sfcgal/* /usr #COPY --from=postgis-build /sfcgal/* /usr
@@ -1117,7 +934,7 @@ COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
COPY --from=rum-pg-build /rum.tar.gz /ext-src COPY --from=rum-pg-build /rum.tar.gz /ext-src
COPY compute/patches/rum.patch /ext-src COPY patches/rum.patch /ext-src
#COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src #COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
COPY --from=prefix-pg-build /prefix.tar.gz /ext-src COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
@@ -1125,9 +942,9 @@ COPY --from=hll-pg-build /hll.tar.gz /ext-src
COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
#COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src #COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
COPY compute/patches/pg_hint_plan.patch /ext-src COPY patches/pg_hint_plan.patch /ext-src
COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
COPY compute/patches/pg_cron.patch /ext-src COPY patches/pg_cron.patch /ext-src
#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src #COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
#COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src #COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
@@ -1136,42 +953,21 @@ COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src #COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src #COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
COPY compute/patches/pg_anon.patch /ext-src COPY patches/pg_anon.patch /ext-src
COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
RUN case "${PG_VERSION}" in "v17") \ RUN cd /ext-src/ && for f in *.tar.gz; \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
cd /ext-src/ && for f in *.tar.gz; \
do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \ do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \ rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
|| exit 1; rm -f $f; done || exit 1; rm -f $f; done
RUN case "${PG_VERSION}" in "v17") \ RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
echo "v17 extensions are not supported yet. Quit" && exit 0;; \ RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
esac && \
cd /ext-src/rum-src && patch -p1 <../rum.patch
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
# cmake is required for the h3 test # cmake is required for the h3 test
RUN case "${PG_VERSION}" in "v17") \ RUN apt-get update && apt-get install -y cmake
echo "v17 extensions are not supported yet. Quit" && exit 0;; \ RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
esac && \
apt-get update && apt-get install -y cmake
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
RUN case "${PG_VERSION}" in "v17") \ RUN patch -p1 </ext-src/pg_anon.patch
echo "v17 extensions are not supported yet. Quit" && exit 0;; \ RUN patch -p1 </ext-src/pg_cron.patch
esac && \
patch -p1 </ext-src/pg_anon.patch
RUN case "${PG_VERSION}" in "v17") \
echo "v17 extensions are not supported yet. Quit" && exit 0;; \
esac && \
patch -p1 </ext-src/pg_cron.patch
ENV PATH=/usr/local/pgsql/bin:$PATH ENV PATH=/usr/local/pgsql/bin:$PATH
ENV PGHOST=compute ENV PGHOST=compute
ENV PGPORT=55433 ENV PGPORT=55433
@@ -1183,9 +979,7 @@ ENV PGDATABASE=postgres
# Put it all together into the final image # Put it all together into the final image
# #
######################################################################################### #########################################################################################
FROM debian:$DEBIAN_FLAVOR FROM debian:bullseye-slim
ARG DEBIAN_FLAVOR
ENV DEBIAN_FLAVOR=$DEBIAN_FLAVOR
# Add user postgres # Add user postgres
RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
echo "postgres:test_console_pass" | chpasswd && \ echo "postgres:test_console_pass" | chpasswd && \
@@ -1201,50 +995,23 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
# pgbouncer and its config
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
# Metrics exporter binaries and configuration files
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter
COPY --chmod=0644 compute/etc/sql_exporter.yml /etc/sql_exporter.yml
COPY --chmod=0644 compute/etc/neon_collector.yml /etc/neon_collector.yml
COPY --chmod=0644 compute/etc/sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml
COPY --chmod=0644 compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
# Create remote extension download directory # Create remote extension download directory
RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions
# Install: # Install:
# libreadline8 for psql # libreadline8 for psql
# libicu67, locales for collations (including ICU and plpgsql_check)
# liblz4-1 for lz4 # liblz4-1 for lz4
# libossp-uuid16 for extension ossp-uuid # libossp-uuid16 for extension ossp-uuid
# libgeos, libsfcgal1, and libprotobuf-c1 for PostGIS # libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
# libxml2, libxslt1.1 for xml2 # libxml2, libxslt1.1 for xml2
# libzstd1 for zstd # libzstd1 for zstd
# libboost* for rdkit # libboost* for rdkit
# ca-certificates for communicating with s3 by compute_ctl # ca-certificates for communicating with s3 by compute_ctl
RUN apt update && \
RUN apt update && \
case $DEBIAN_FLAVOR in \
# Version-specific installs for Bullseye (PG14-PG16):
# libicu67, locales for collations (including ICU and plpgsql_check)
# libgdal28, libproj19 for PostGIS
bullseye*) \
VERSION_INSTALLS="libicu67 libgdal28 libproj19"; \
;; \
# Version-specific installs for Bookworm (PG17):
# libicu72, locales for collations (including ICU and plpgsql_check)
# libgdal32, libproj25 for PostGIS
bookworm*) \
VERSION_INSTALLS="libicu72 libgdal32 libproj25"; \
;; \
esac && \
apt install --no-install-recommends -y \ apt install --no-install-recommends -y \
gdb \ gdb \
libicu67 \
liblz4-1 \ liblz4-1 \
libreadline8 \ libreadline8 \
libboost-iostreams1.74.0 \ libboost-iostreams1.74.0 \
@@ -1253,6 +1020,8 @@ RUN apt update && \
libboost-system1.74.0 \ libboost-system1.74.0 \
libossp-uuid16 \ libossp-uuid16 \
libgeos-c1v5 \ libgeos-c1v5 \
libgdal28 \
libproj19 \
libprotobuf-c1 \ libprotobuf-c1 \
libsfcgal1 \ libsfcgal1 \
libxml2 \ libxml2 \
@@ -1261,8 +1030,7 @@ RUN apt update && \
libcurl4-openssl-dev \ libcurl4-openssl-dev \
locales \ locales \
procps \ procps \
ca-certificates \ ca-certificates && \
$VERSION_INSTALLS && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8

View File

@@ -119,8 +119,6 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
# I'm not sure why it wouldn't work, but this is the only place (apart from # I'm not sure why it wouldn't work, but this is the only place (apart from
# the "build-all-versions" entry points) where direct mention of PostgreSQL # the "build-all-versions" entry points) where direct mention of PostgreSQL
# versions is used. # versions is used.
.PHONY: postgres-configure-v17
postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
.PHONY: postgres-configure-v16 .PHONY: postgres-configure-v16
postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
.PHONY: postgres-configure-v15 .PHONY: postgres-configure-v15
@@ -217,31 +215,29 @@ neon-pg-clean-ext-%:
# they depend on openssl and other libraries that are not included in our # they depend on openssl and other libraries that are not included in our
# Rust build. # Rust build.
.PHONY: walproposer-lib .PHONY: walproposer-lib
walproposer-lib: neon-pg-ext-v17 walproposer-lib: neon-pg-ext-v16
+@echo "Compiling walproposer-lib" +@echo "Compiling walproposer-lib"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \ -C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib -f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
ifeq ($(UNAME_S),Linux)
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \ $(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
pg_strong_random.o pg_strong_random.o
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \ $(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
checksum_helper.o \ pg_crc32c.o \
cryptohash_openssl.o \
hmac_openssl.o \ hmac_openssl.o \
cryptohash_openssl.o \
scram-common.o \
md5_common.o \ md5_common.o \
parse_manifest.o \ checksum_helper.o
scram-common.o
ifeq ($(UNAME_S),Linux)
$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
pg_crc32c.o
endif endif
.PHONY: walproposer-lib-clean .PHONY: walproposer-lib-clean
walproposer-lib-clean: walproposer-lib-clean:
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config \
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \ -C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean -f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
@@ -249,44 +245,38 @@ walproposer-lib-clean:
neon-pg-ext: \ neon-pg-ext: \
neon-pg-ext-v14 \ neon-pg-ext-v14 \
neon-pg-ext-v15 \ neon-pg-ext-v15 \
neon-pg-ext-v16 \ neon-pg-ext-v16
neon-pg-ext-v17
.PHONY: neon-pg-clean-ext .PHONY: neon-pg-clean-ext
neon-pg-clean-ext: \ neon-pg-clean-ext: \
neon-pg-clean-ext-v14 \ neon-pg-clean-ext-v14 \
neon-pg-clean-ext-v15 \ neon-pg-clean-ext-v15 \
neon-pg-clean-ext-v16 \ neon-pg-clean-ext-v16
neon-pg-clean-ext-v17
# shorthand to build all Postgres versions # shorthand to build all Postgres versions
.PHONY: postgres .PHONY: postgres
postgres: \ postgres: \
postgres-v14 \ postgres-v14 \
postgres-v15 \ postgres-v15 \
postgres-v16 \ postgres-v16
postgres-v17
.PHONY: postgres-headers .PHONY: postgres-headers
postgres-headers: \ postgres-headers: \
postgres-headers-v14 \ postgres-headers-v14 \
postgres-headers-v15 \ postgres-headers-v15 \
postgres-headers-v16 \ postgres-headers-v16
postgres-headers-v17
.PHONY: postgres-clean .PHONY: postgres-clean
postgres-clean: \ postgres-clean: \
postgres-clean-v14 \ postgres-clean-v14 \
postgres-clean-v15 \ postgres-clean-v15 \
postgres-clean-v16 \ postgres-clean-v16
postgres-clean-v17
.PHONY: postgres-check .PHONY: postgres-check
postgres-check: \ postgres-check: \
postgres-check-v14 \ postgres-check-v14 \
postgres-check-v15 \ postgres-check-v15 \
postgres-check-v16 \ postgres-check-v16
postgres-check-v17
# This doesn't remove the effects of 'configure'. # This doesn't remove the effects of 'configure'.
.PHONY: clean .PHONY: clean
@@ -331,13 +321,13 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
rm -f pg*.BAK rm -f pg*.BAK
# Indent pxgn/neon. # Indent pxgn/neon.
.PHONY: neon-pgindent .PHONY: pgindent
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17 neon-pgindent: postgres-v16-pg-bsd-indent neon-pg-ext-v16
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \ FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/find_typedef \
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \ INDENT=$(POSTGRES_INSTALL_DIR)/build/v16/src/tools/pg_bsd_indent/pg_bsd_indent \
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \ PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/pgindent/pgindent \
-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \ -C $(POSTGRES_INSTALL_DIR)/build/neon-v16 \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent -f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent

View File

@@ -1,21 +0,0 @@
This directory contains files that are needed to build the compute
images, or included in the compute images.
Dockerfile.compute-node
To build the compute image
vm-image-spec.yaml
Instructions for vm-builder, to turn the compute-node image into
corresponding vm-compute-node image.
etc/
Configuration files included in /etc in the compute image
patches/
Some extensions need to be patched to work with Neon. This
directory contains such patches. They are applied to the extension
sources in Dockerfile.compute-node
In addition to these, postgres itself, the neon postgres extension,
and compute_ctl are built and copied into the compute image by
Dockerfile.compute-node.

View File

@@ -1,247 +0,0 @@
collector_name: neon_collector
metrics:
- metric_name: lfc_misses
type: gauge
help: 'lfc_misses'
key_labels:
values: [lfc_misses]
query: |
select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
- metric_name: lfc_used
type: gauge
help: 'LFC chunks used (chunk = 1MB)'
key_labels:
values: [lfc_used]
query: |
select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
- metric_name: lfc_hits
type: gauge
help: 'lfc_hits'
key_labels:
values: [lfc_hits]
query: |
select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
- metric_name: lfc_writes
type: gauge
help: 'lfc_writes'
key_labels:
values: [lfc_writes]
query: |
select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
- metric_name: lfc_cache_size_limit
type: gauge
help: 'LFC cache size limit in bytes'
key_labels:
values: [lfc_cache_size_limit]
query: |
select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
- metric_name: connection_counts
type: gauge
help: 'Connection counts'
key_labels:
- datname
- state
values: [count]
query: |
select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state;
- metric_name: pg_stats_userdb
type: gauge
help: 'Stats for several oldest non-system dbs'
key_labels:
- datname
value_label: kind
values:
- db_size
- deadlocks
# Rows
- inserted
- updated
- deleted
# We export stats for 10 non-system database. Without this limit
# it is too easy to abuse the system by creating lots of databases.
query: |
select pg_database_size(datname) as db_size, deadlocks,
tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted,
datname
from pg_stat_database
where datname IN (
select datname
from pg_database
where datname <> 'postgres' and not datistemplate
order by oid
limit 10
);
- metric_name: max_cluster_size
type: gauge
help: 'neon.max_cluster_size setting'
key_labels:
values: [max_cluster_size]
query: |
select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size';
- metric_name: db_total_size
type: gauge
help: 'Size of all databases'
key_labels:
values: [total]
query: |
select sum(pg_database_size(datname)) as total from pg_database;
# DEPRECATED
- metric_name: lfc_approximate_working_set_size
type: gauge
help: 'Approximate working set size in pages of 8192 bytes'
key_labels:
values: [approximate_working_set_size]
query: |
select neon.approximate_working_set_size(false) as approximate_working_set_size;
- metric_name: lfc_approximate_working_set_size_windows
type: gauge
help: 'Approximate working set size in pages of 8192 bytes'
key_labels: [duration]
values: [size]
# NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection
# of durations in a pretty-printed form.
query: |
select
x as duration,
neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size
from
(values ('5m'),('15m'),('1h')) as t (x);
- metric_name: compute_current_lsn
type: gauge
help: 'Current LSN of the database'
key_labels:
values: [lsn]
query: |
select
case
when pg_catalog.pg_is_in_recovery()
then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
else (pg_current_wal_lsn() - '0/0')::FLOAT8
end as lsn;
- metric_name: compute_receive_lsn
type: gauge
help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication'
key_labels:
values: [lsn]
query: |
SELECT
CASE
WHEN pg_catalog.pg_is_in_recovery()
THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
ELSE 0
END AS lsn;
- metric_name: replication_delay_bytes
type: gauge
help: 'Bytes between received and replayed LSN'
key_labels:
values: [replication_delay_bytes]
# We use a GREATEST call here because this calculation can be negative.
# The calculation is not atomic, meaning after we've gotten the receive
# LSN, the replay LSN may have advanced past the receive LSN we
# are using for the calculation.
query: |
SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
- metric_name: replication_delay_seconds
type: gauge
help: 'Time since last LSN was replayed'
key_labels:
values: [replication_delay_seconds]
query: |
SELECT
CASE
WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
END AS replication_delay_seconds;
- metric_name: checkpoints_req
type: gauge
help: 'Number of requested checkpoints'
key_labels:
values: [checkpoints_req]
query: |
SELECT checkpoints_req FROM pg_stat_bgwriter;
- metric_name: checkpoints_timed
type: gauge
help: 'Number of scheduled checkpoints'
key_labels:
values: [checkpoints_timed]
query: |
SELECT checkpoints_timed FROM pg_stat_bgwriter;
- metric_name: compute_logical_snapshot_files
type: gauge
help: 'Number of snapshot files in pg_logical/snapshot'
key_labels:
- timeline_id
values: [num_logical_snapshot_files]
query: |
SELECT
(SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
-- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These
-- temporary snapshot files are renamed to the actual snapshot files after they are
-- completely built. We only WAL-log the completely built snapshot files.
(SELECT COUNT(*) FROM pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
# In all the below metrics, we cast LSNs to floats because Prometheus only supports floats.
# It's probably fine because float64 can store integers from -2^53 to +2^53 exactly.
# Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad.
- metric_name: logical_slot_restart_lsn
type: gauge
help: 'restart_lsn of logical slots'
key_labels:
- slot_name
values: [restart_lsn]
query: |
select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
from pg_replication_slots
where slot_type = 'logical';
- metric_name: compute_subscriptions_count
type: gauge
help: 'Number of logical replication subscriptions grouped by enabled/disabled'
key_labels:
- enabled
values: [subscriptions_count]
query: |
select subenabled::text as enabled, count(*) as subscriptions_count
from pg_subscription
group by subenabled;
- metric_name: retained_wal
type: gauge
help: 'Retained WAL in inactive replication slots'
key_labels:
- slot_name
values: [retained_wal]
query: |
SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
FROM pg_replication_slots
WHERE active = false;
- metric_name: wal_is_lost
type: gauge
help: 'Whether or not the replication slot wal_status is lost'
key_labels:
- slot_name
values: [wal_is_lost]
query: |
SELECT slot_name,
CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost
FROM pg_replication_slots;

View File

@@ -1,55 +0,0 @@
collector_name: neon_collector_autoscaling
metrics:
- metric_name: lfc_misses
type: gauge
help: 'lfc_misses'
key_labels:
values: [lfc_misses]
query: |
select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
- metric_name: lfc_used
type: gauge
help: 'LFC chunks used (chunk = 1MB)'
key_labels:
values: [lfc_used]
query: |
select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
- metric_name: lfc_hits
type: gauge
help: 'lfc_hits'
key_labels:
values: [lfc_hits]
query: |
select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
- metric_name: lfc_writes
type: gauge
help: 'lfc_writes'
key_labels:
values: [lfc_writes]
query: |
select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
- metric_name: lfc_cache_size_limit
type: gauge
help: 'LFC cache size limit in bytes'
key_labels:
values: [lfc_cache_size_limit]
query: |
select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
- metric_name: lfc_approximate_working_set_size_windows
type: gauge
help: 'Approximate working set size in pages of 8192 bytes'
key_labels: [duration_seconds]
values: [size]
# NOTE: This is the "internal" / "machine-readable" version. This outputs the working set
# size looking back 1..60 minutes, labeled with the number of minutes.
query: |
select
x::text as duration_seconds,
neon.approximate_working_set_size_seconds(x) as size
from
(select generate_series * 60 as x from generate_series(1, 60)) as t (x);

View File

@@ -1,17 +0,0 @@
[databases]
*=host=localhost port=5432 auth_user=cloud_admin
[pgbouncer]
listen_port=6432
listen_addr=0.0.0.0
auth_type=scram-sha-256
auth_user=cloud_admin
auth_dbname=postgres
client_tls_sslmode=disable
server_tls_sslmode=disable
pool_mode=transaction
max_client_conn=10000
default_pool_size=64
max_prepared_statements=0
admin_users=postgres
unix_socket_dir=/tmp/
unix_socket_mode=0777

View File

@@ -1,33 +0,0 @@
# Configuration for sql_exporter
# Global defaults.
global:
# If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
scrape_timeout: 10s
# Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
scrape_timeout_offset: 500ms
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
min_interval: 0s
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
# as will concurrent scrapes.
max_connections: 1
# Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
# always be the same as max_connections.
max_idle_connections: 1
# Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
# If 0, connections are not closed due to a connection's age.
max_connection_lifetime: 5m
# The target to monitor and the collectors to execute on it.
target:
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
# the schema gets dropped or replaced to match the driver expected DSN format.
data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter'
# Collectors (referenced by name) to execute on the target.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collectors: [neon_collector]
# Collector files specifies a list of globs. One collector definition is read from each matching file.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collector_files:
- "neon_collector.yml"

View File

@@ -1,33 +0,0 @@
# Configuration for sql_exporter for autoscaling-agent
# Global defaults.
global:
# If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
scrape_timeout: 10s
# Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
scrape_timeout_offset: 500ms
# Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
min_interval: 0s
# Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
# as will concurrent scrapes.
max_connections: 1
# Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
# always be the same as max_connections.
max_idle_connections: 1
# Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
# If 0, connections are not closed due to a connection's age.
max_connection_lifetime: 5m
# The target to monitor and the collectors to execute on it.
target:
# Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
# the schema gets dropped or replaced to match the driver expected DSN format.
data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling'
# Collectors (referenced by name) to execute on the target.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collectors: [neon_collector_autoscaling]
# Collector files specifies a list of globs. One collector definition is read from each matching file.
# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
collector_files:
- "neon_collector_autoscaling.yml"

File diff suppressed because it is too large Load Diff

View File

@@ -1,112 +0,0 @@
# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.
---
commands:
- name: cgconfigparser
user: root
sysvInitAction: sysinit
shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
# restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
# running it as root.
- name: chmod-resize-swap
user: root
sysvInitAction: sysinit
shell: 'chmod 711 /neonvm/bin/resize-swap'
- name: pgbouncer
user: postgres
sysvInitAction: respawn
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
- name: postgres-exporter
user: nobody
sysvInitAction: respawn
shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
- name: sql-exporter
user: nobody
sysvInitAction: respawn
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'
- name: sql-exporter-autoscaling
user: nobody
sysvInitAction: respawn
shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
shutdownHook: |
su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
files:
- filename: compute_ctl-resize-swap
content: |
# Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
# as root without requiring entering a password (NOPASSWD), regardless of hostname (ALL)
postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap
- filename: cgconfig.conf
content: |
# Configuration for cgroups in VM compute nodes
group neon-postgres {
perm {
admin {
uid = postgres;
}
task {
gid = users;
}
}
memory {}
}
build: |
# Build cgroup-tools
#
# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
# requires cgroup v2, so we'll build cgroup-tools ourselves.
FROM debian:bullseye-slim as libcgroup-builder
ENV LIBCGROUP_VERSION=v2.0.3
RUN set -exu \
&& apt update \
&& apt install --no-install-recommends -y \
git \
ca-certificates \
automake \
cmake \
make \
gcc \
byacc \
flex \
libtool \
libpam0g-dev \
&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
&& INSTALL_DIR="/libcgroup-install" \
&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
&& cd libcgroup \
# extracted from bootstrap.sh, with modified flags:
&& (test -d m4 || mkdir m4) \
&& autoreconf -fi \
&& rm -rf autom4te.cache \
&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
# actually build the thing...
&& make install
merge: |
# tweak nofile limits
RUN set -e \
&& echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \
&& test ! -e /etc/security || ( \
echo '* - nofile 1048576' >>/etc/security/limits.conf \
&& echo 'root - nofile 1048576' >>/etc/security/limits.conf \
)
# Allow postgres user (compute_ctl) to run swap resizer.
# Need to install sudo in order to allow this.
#
# Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
RUN set -e \
&& apt update \
&& apt install --no-install-recommends -y \
sudo \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
COPY compute_ctl-resize-swap /etc/sudoers.d/compute_ctl-resize-swap
COPY cgconfig.conf /etc/cgconfig.conf
RUN set -e \
&& chmod 0644 /etc/cgconfig.conf
COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/

View File

@@ -11,6 +11,7 @@ testing = []
[dependencies] [dependencies]
anyhow.workspace = true anyhow.workspace = true
async-compression.workspace = true
chrono.workspace = true chrono.workspace = true
cfg-if.workspace = true cfg-if.workspace = true
clap.workspace = true clap.workspace = true
@@ -23,6 +24,7 @@ num_cpus.workspace = true
opentelemetry.workspace = true opentelemetry.workspace = true
postgres.workspace = true postgres.workspace = true
regex.workspace = true regex.workspace = true
serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
signal-hook.workspace = true signal-hook.workspace = true
tar.workspace = true tar.workspace = true
@@ -41,6 +43,7 @@ url.workspace = true
compute_api.workspace = true compute_api.workspace = true
utils.workspace = true utils.workspace = true
workspace_hack.workspace = true workspace_hack.workspace = true
toml_edit.workspace = true
remote_storage = { version = "0.1", path = "../libs/remote_storage/" } remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" } vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
zstd = "0.13" zstd = "0.13"

View File

@@ -1052,19 +1052,26 @@ impl ComputeNode {
let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?; let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?;
let config_time = Utc::now(); let config_time = Utc::now();
if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates { if pspec.spec.mode == ComputeMode::Primary {
let pgdata_path = Path::new(&self.pgdata); if !pspec.spec.skip_pg_catalog_updates {
// temporarily reset max_cluster_size in config let pgdata_path = Path::new(&self.pgdata);
// to avoid the possibility of hitting the limit, while we are applying config: // temporarily reset max_cluster_size in config
// creating new extensions, roles, etc... // to avoid the possibility of hitting the limit, while we are applying config:
config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || { // creating new extensions, roles, etc...
config::with_compute_ctl_tmp_override(
pgdata_path,
"neon.max_cluster_size=-1",
|| {
self.pg_reload_conf()?;
self.apply_config(&compute_state)?;
Ok(())
},
)?;
self.pg_reload_conf()?; self.pg_reload_conf()?;
}
self.apply_config(&compute_state)?; self.post_apply_config()?;
Ok(())
})?;
self.pg_reload_conf()?;
} }
let startup_end_time = Utc::now(); let startup_end_time = Utc::now();

View File

@@ -124,7 +124,6 @@ fn parse_pg_version(human_version: &str) -> &str {
"14" => return "v14", "14" => return "v14",
"15" => return "v15", "15" => return "v15",
"16" => return "v16", "16" => return "v16",
"17" => return "v17",
_ => {} _ => {}
}, },
_ => {} _ => {}

View File

@@ -1 +0,0 @@
GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;

View File

@@ -793,9 +793,6 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
include_str!( include_str!(
"./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql" "./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
), ),
include_str!(
"./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
),
]; ];
MigrationRunner::new(client, &migrations).run_migrations()?; MigrationRunner::new(client, &migrations).run_migrations()?;

View File

@@ -9,9 +9,13 @@ anyhow.workspace = true
camino.workspace = true camino.workspace = true
clap.workspace = true clap.workspace = true
comfy-table.workspace = true comfy-table.workspace = true
futures.workspace = true
git-version.workspace = true
humantime.workspace = true humantime.workspace = true
nix.workspace = true nix.workspace = true
once_cell.workspace = true once_cell.workspace = true
postgres.workspace = true
hex.workspace = true
humantime-serde.workspace = true humantime-serde.workspace = true
hyper.workspace = true hyper.workspace = true
regex.workspace = true regex.workspace = true
@@ -19,6 +23,8 @@ reqwest = { workspace = true, features = ["blocking", "json"] }
scopeguard.workspace = true scopeguard.workspace = true
serde.workspace = true serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
serde_with.workspace = true
tar.workspace = true
thiserror.workspace = true thiserror.workspace = true
toml.workspace = true toml.workspace = true
toml_edit.workspace = true toml_edit.workspace = true

View File

@@ -151,7 +151,7 @@ where
print!("."); print!(".");
io::stdout().flush().unwrap(); io::stdout().flush().unwrap();
} }
tokio::time::sleep(RETRY_INTERVAL).await; thread::sleep(RETRY_INTERVAL);
} }
Err(e) => { Err(e) => {
println!("error starting process {process_name:?}: {e:#}"); println!("error starting process {process_name:?}: {e:#}");

View File

@@ -34,14 +34,12 @@ use safekeeper_api::{
DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT, DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT, DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
}; };
use std::borrow::Cow;
use std::collections::{BTreeSet, HashMap}; use std::collections::{BTreeSet, HashMap};
use std::path::PathBuf; use std::path::PathBuf;
use std::process::exit; use std::process::exit;
use std::str::FromStr; use std::str::FromStr;
use std::time::Duration; use std::time::Duration;
use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR; use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
use tokio::task::JoinSet;
use url::Host; use url::Host;
use utils::{ use utils::{
auth::{Claims, Scope}, auth::{Claims, Scope},
@@ -89,35 +87,34 @@ fn main() -> Result<()> {
// Check for 'neon init' command first. // Check for 'neon init' command first.
let subcommand_result = if sub_name == "init" { let subcommand_result = if sub_name == "init" {
handle_init(sub_args).map(|env| Some(Cow::Owned(env))) handle_init(sub_args).map(Some)
} else { } else {
// all other commands need an existing config // all other commands need an existing config
let mut env =
let env = LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?; LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
let original_env = env.clone(); let original_env = env.clone();
let env = Box::leak(Box::new(env));
let rt = tokio::runtime::Builder::new_current_thread() let rt = tokio::runtime::Builder::new_current_thread()
.enable_all() .enable_all()
.build() .build()
.unwrap(); .unwrap();
let subcommand_result = match sub_name { let subcommand_result = match sub_name {
"tenant" => rt.block_on(handle_tenant(sub_args, env)), "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
"timeline" => rt.block_on(handle_timeline(sub_args, env)), "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
"start" => rt.block_on(handle_start_all(env, get_start_timeout(sub_args))), "start" => rt.block_on(handle_start_all(&env, get_start_timeout(sub_args))),
"stop" => rt.block_on(handle_stop_all(sub_args, env)), "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
"pageserver" => rt.block_on(handle_pageserver(sub_args, env)), "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
"storage_controller" => rt.block_on(handle_storage_controller(sub_args, env)), "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
"storage_broker" => rt.block_on(handle_storage_broker(sub_args, env)), "safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
"safekeeper" => rt.block_on(handle_safekeeper(sub_args, env)), "endpoint" => rt.block_on(handle_endpoint(sub_args, &env)),
"endpoint" => rt.block_on(handle_endpoint(sub_args, env)), "mappings" => handle_mappings(sub_args, &mut env),
"mappings" => handle_mappings(sub_args, env),
"pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"), "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"),
_ => bail!("unexpected subcommand {sub_name}"), _ => bail!("unexpected subcommand {sub_name}"),
}; };
if &original_env != env { if original_env != env {
subcommand_result.map(|()| Some(Cow::Borrowed(env))) subcommand_result.map(|()| Some(env))
} else { } else {
subcommand_result.map(|()| None) subcommand_result.map(|()| None)
} }
@@ -1248,122 +1245,49 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
Ok(()) Ok(())
} }
async fn handle_storage_broker(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
let (sub_name, sub_args) = match sub_match.subcommand() {
Some(broker_command_data) => broker_command_data,
None => bail!("no broker subcommand provided"),
};
match sub_name {
"start" => {
if let Err(e) = broker::start_broker_process(env, get_start_timeout(sub_args)).await {
eprintln!("broker start failed: {e}");
exit(1);
}
}
"stop" => {
if let Err(e) = broker::stop_broker_process(env) {
eprintln!("broker stop failed: {e}");
exit(1);
}
}
_ => bail!("Unexpected broker subcommand '{}'", sub_name),
}
Ok(())
}
async fn handle_start_all( async fn handle_start_all(
env: &'static local_env::LocalEnv, env: &local_env::LocalEnv,
retry_timeout: &Duration, retry_timeout: &Duration,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let Err(errors) = handle_start_all_impl(env, *retry_timeout).await else {
neon_start_status_check(env, retry_timeout)
.await
.context("status check after successful startup of all services")?;
return Ok(());
};
eprintln!("startup failed because one or more services could not be started");
for e in errors {
eprintln!("{e}");
let debug_repr = format!("{e:?}");
for line in debug_repr.lines() {
eprintln!(" {line}");
}
}
try_stop_all(env, true).await;
exit(2);
}
/// Returns Ok() if and only if all services could be started successfully.
/// Otherwise, returns the list of errors that occurred during startup.
async fn handle_start_all_impl(
env: &'static local_env::LocalEnv,
retry_timeout: Duration,
) -> Result<(), Vec<anyhow::Error>> {
// Endpoints are not started automatically // Endpoints are not started automatically
let mut js = JoinSet::new(); broker::start_broker_process(env, retry_timeout).await?;
// force infalliblity through closure // Only start the storage controller if the pageserver is configured to need it
#[allow(clippy::redundant_closure_call)] if env.control_plane_api.is_some() {
(|| { let storage_controller = StorageController::from_env(env);
js.spawn(async move { if let Err(e) = storage_controller
let retry_timeout = retry_timeout; .start(NeonStorageControllerStartArgs::with_default_instance_id(
broker::start_broker_process(env, &retry_timeout).await (*retry_timeout).into(),
}); ))
.await
// Only start the storage controller if the pageserver is configured to need it {
if env.control_plane_api.is_some() { eprintln!("storage_controller start failed: {:#}", e);
js.spawn(async move { try_stop_all(env, true).await;
let storage_controller = StorageController::from_env(env); exit(1);
storage_controller
.start(NeonStorageControllerStartArgs::with_default_instance_id(
retry_timeout.into(),
))
.await
.map_err(|e| e.context("start storage_controller"))
});
}
for ps_conf in &env.pageservers {
js.spawn(async move {
let pageserver = PageServerNode::from_env(env, ps_conf);
pageserver
.start(&retry_timeout)
.await
.map_err(|e| e.context(format!("start pageserver {}", ps_conf.id)))
});
}
for node in env.safekeepers.iter() {
js.spawn(async move {
let safekeeper = SafekeeperNode::from_env(env, node);
safekeeper
.start(vec![], &retry_timeout)
.await
.map_err(|e| e.context(format!("start safekeeper {}", safekeeper.id)))
});
}
})();
let mut errors = Vec::new();
while let Some(result) = js.join_next().await {
let result = result.expect("we don't panic or cancel the tasks");
if let Err(e) = result {
errors.push(e);
} }
} }
if !errors.is_empty() { for ps_conf in &env.pageservers {
return Err(errors); let pageserver = PageServerNode::from_env(env, ps_conf);
if let Err(e) = pageserver.start(retry_timeout).await {
eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
try_stop_all(env, true).await;
exit(1);
}
} }
for node in env.safekeepers.iter() {
let safekeeper = SafekeeperNode::from_env(env, node);
if let Err(e) = safekeeper.start(vec![], retry_timeout).await {
eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e);
try_stop_all(env, false).await;
exit(1);
}
}
neon_start_status_check(env, retry_timeout).await?;
Ok(()) Ok(())
} }
@@ -1748,19 +1672,6 @@ fn cli() -> Command {
.arg(stop_mode_arg.clone()) .arg(stop_mode_arg.clone())
.arg(instance_id)) .arg(instance_id))
) )
.subcommand(
Command::new("storage_broker")
.arg_required_else_help(true)
.about("Manage broker")
.subcommand(Command::new("start")
.about("Start broker")
.arg(timeout_arg.clone())
)
.subcommand(Command::new("stop")
.about("Stop broker")
.arg(stop_mode_arg.clone())
)
)
.subcommand( .subcommand(
Command::new("safekeeper") Command::new("safekeeper")
.arg_required_else_help(true) .arg_required_else_help(true)

View File

@@ -702,7 +702,7 @@ impl Endpoint {
} }
} }
} }
tokio::time::sleep(ATTEMPT_INTERVAL).await; std::thread::sleep(ATTEMPT_INTERVAL);
} }
// disarm the scopeguard, let the child outlive this function (and neon_local invoction) // disarm the scopeguard, let the child outlive this function (and neon_local invoction)

View File

@@ -342,7 +342,7 @@ impl LocalEnv {
#[allow(clippy::manual_range_patterns)] #[allow(clippy::manual_range_patterns)]
match pg_version { match pg_version {
14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))), 14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
_ => bail!("Unsupported postgres version: {}", pg_version), _ => bail!("Unsupported postgres version: {}", pg_version),
} }
} }

View File

@@ -17,7 +17,9 @@ use std::time::Duration;
use anyhow::{bail, Context}; use anyhow::{bail, Context};
use camino::Utf8PathBuf; use camino::Utf8PathBuf;
use pageserver_api::models::{self, AuxFilePolicy, TenantInfo, TimelineInfo}; use pageserver_api::models::{
self, AuxFilePolicy, LocationConfig, TenantHistorySize, TenantInfo, TimelineInfo,
};
use pageserver_api::shard::TenantShardId; use pageserver_api::shard::TenantShardId;
use pageserver_client::mgmt_api; use pageserver_client::mgmt_api;
use postgres_backend::AuthType; use postgres_backend::AuthType;
@@ -322,6 +324,22 @@ impl PageServerNode {
background_process::stop_process(immediate, "pageserver", &self.pid_file()) background_process::stop_process(immediate, "pageserver", &self.pid_file())
} }
pub async fn page_server_psql_client(
&self,
) -> anyhow::Result<(
tokio_postgres::Client,
tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
)> {
let mut config = self.pg_connection_config.clone();
if self.conf.pg_auth_type == AuthType::NeonJWT {
let token = self
.env
.generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
config = config.set_password(Some(token));
}
Ok(config.connect_no_tls().await?)
}
pub async fn check_status(&self) -> mgmt_api::Result<()> { pub async fn check_status(&self) -> mgmt_api::Result<()> {
self.http_client.status().await self.http_client.status().await
} }
@@ -522,6 +540,19 @@ impl PageServerNode {
Ok(()) Ok(())
} }
pub async fn location_config(
&self,
tenant_shard_id: TenantShardId,
config: LocationConfig,
flush_ms: Option<Duration>,
lazy: bool,
) -> anyhow::Result<()> {
Ok(self
.http_client
.location_config(tenant_shard_id, config, flush_ms, lazy)
.await?)
}
pub async fn timeline_list( pub async fn timeline_list(
&self, &self,
tenant_shard_id: &TenantShardId, tenant_shard_id: &TenantShardId,
@@ -605,4 +636,14 @@ impl PageServerNode {
Ok(()) Ok(())
} }
pub async fn tenant_synthetic_size(
&self,
tenant_shard_id: TenantShardId,
) -> anyhow::Result<TenantHistorySize> {
Ok(self
.http_client
.tenant_synthetic_size(tenant_shard_id)
.await?)
}
} }

View File

@@ -4,10 +4,13 @@
/// NOTE: This doesn't implement the full, correct postgresql.conf syntax. Just /// NOTE: This doesn't implement the full, correct postgresql.conf syntax. Just
/// enough to extract a few settings we need in Neon, assuming you don't do /// enough to extract a few settings we need in Neon, assuming you don't do
/// funny stuff like include-directives or funny escaping. /// funny stuff like include-directives or funny escaping.
use anyhow::{bail, Context, Result};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex; use regex::Regex;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt; use std::fmt;
use std::io::BufRead;
use std::str::FromStr;
/// In-memory representation of a postgresql.conf file /// In-memory representation of a postgresql.conf file
#[derive(Default, Debug)] #[derive(Default, Debug)]
@@ -16,16 +19,84 @@ pub struct PostgresConf {
hash: HashMap<String, String>, hash: HashMap<String, String>,
} }
static CONF_LINE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^((?:\w|\.)+)\s*=\s*(\S+)$").unwrap());
impl PostgresConf { impl PostgresConf {
pub fn new() -> PostgresConf { pub fn new() -> PostgresConf {
PostgresConf::default() PostgresConf::default()
} }
/// Read file into memory
pub fn read(read: impl std::io::Read) -> Result<PostgresConf> {
let mut result = Self::new();
for line in std::io::BufReader::new(read).lines() {
let line = line?;
// Store each line in a vector, in original format
result.lines.push(line.clone());
// Also parse each line and insert key=value lines into a hash map.
//
// FIXME: This doesn't match exactly the flex/bison grammar in PostgreSQL.
// But it's close enough for our usage.
let line = line.trim();
if line.starts_with('#') {
// comment, ignore
continue;
} else if let Some(caps) = CONF_LINE_RE.captures(line) {
let name = caps.get(1).unwrap().as_str();
let raw_val = caps.get(2).unwrap().as_str();
if let Ok(val) = deescape_str(raw_val) {
// Note: if there's already an entry in the hash map for
// this key, this will replace it. That's the behavior what
// we want; when PostgreSQL reads the file, each line
// overrides any previous value for the same setting.
result.hash.insert(name.to_string(), val.to_string());
}
}
}
Ok(result)
}
/// Return the current value of 'option' /// Return the current value of 'option'
pub fn get(&self, option: &str) -> Option<&str> { pub fn get(&self, option: &str) -> Option<&str> {
self.hash.get(option).map(|x| x.as_ref()) self.hash.get(option).map(|x| x.as_ref())
} }
/// Return the current value of a field, parsed to the right datatype.
///
/// This calls the FromStr::parse() function on the value of the field. If
/// the field does not exist, or parsing fails, returns an error.
///
pub fn parse_field<T>(&self, field_name: &str, context: &str) -> Result<T>
where
T: FromStr,
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
{
self.get(field_name)
.with_context(|| format!("could not find '{}' option {}", field_name, context))?
.parse::<T>()
.with_context(|| format!("could not parse '{}' option {}", field_name, context))
}
pub fn parse_field_optional<T>(&self, field_name: &str, context: &str) -> Result<Option<T>>
where
T: FromStr,
<T as FromStr>::Err: std::error::Error + Send + Sync + 'static,
{
if let Some(val) = self.get(field_name) {
let result = val
.parse::<T>()
.with_context(|| format!("could not parse '{}' option {}", field_name, context))?;
Ok(Some(result))
} else {
Ok(None)
}
}
/// ///
/// Note: if you call this multiple times for the same option, the config /// Note: if you call this multiple times for the same option, the config
/// file will a line for each call. It would be nice to have a function /// file will a line for each call. It would be nice to have a function
@@ -83,8 +154,48 @@ fn escape_str(s: &str) -> String {
} }
} }
/// De-escape a possibly-quoted value.
///
/// See `DeescapeQuotedString` function in PostgreSQL sources for how PostgreSQL
/// does this.
fn deescape_str(s: &str) -> Result<String> {
// If the string has a quote at the beginning and end, strip them out.
if s.len() >= 2 && s.starts_with('\'') && s.ends_with('\'') {
let mut result = String::new();
let mut iter = s[1..(s.len() - 1)].chars().peekable();
while let Some(c) = iter.next() {
let newc = if c == '\\' {
match iter.next() {
Some('b') => '\x08',
Some('f') => '\x0c',
Some('n') => '\n',
Some('r') => '\r',
Some('t') => '\t',
Some('0'..='7') => {
// TODO
bail!("octal escapes not supported");
}
Some(n) => n,
None => break,
}
} else if c == '\'' && iter.peek() == Some(&'\'') {
// doubled quote becomes just one quote
iter.next().unwrap()
} else {
c
};
result.push(newc);
}
Ok(result)
} else {
Ok(s.to_string())
}
}
#[test] #[test]
fn test_postgresql_conf_escapes() -> anyhow::Result<()> { fn test_postgresql_conf_escapes() -> Result<()> {
assert_eq!(escape_str("foo bar"), "'foo bar'"); assert_eq!(escape_str("foo bar"), "'foo bar'");
// these don't need to be quoted // these don't need to be quoted
assert_eq!(escape_str("foo"), "foo"); assert_eq!(escape_str("foo"), "foo");
@@ -103,5 +214,13 @@ fn test_postgresql_conf_escapes() -> anyhow::Result<()> {
assert_eq!(escape_str("fo\\o"), "'fo\\\\o'"); assert_eq!(escape_str("fo\\o"), "'fo\\\\o'");
assert_eq!(escape_str("10 cats"), "'10 cats'"); assert_eq!(escape_str("10 cats"), "'10 cats'");
// Test de-escaping
assert_eq!(deescape_str(&escape_str("foo"))?, "foo");
assert_eq!(deescape_str(&escape_str("fo'o\nba\\r"))?, "fo'o\nba\\r");
assert_eq!(deescape_str("'\\b\\f\\n\\r\\t'")?, "\x08\x0c\n\r\t");
// octal-escapes are currently not supported
assert!(deescape_str("'foo\\7\\07\\007'").is_err());
Ok(()) Ok(())
} }

View File

@@ -28,7 +28,6 @@ use utils::{
auth::{encode_from_key_file, Claims, Scope}, auth::{encode_from_key_file, Claims, Scope},
id::{NodeId, TenantId}, id::{NodeId, TenantId},
}; };
use whoami::username;
pub struct StorageController { pub struct StorageController {
env: LocalEnv, env: LocalEnv,
@@ -184,7 +183,7 @@ impl StorageController {
/// to other versions if that one isn't found. Some automated tests create circumstances /// to other versions if that one isn't found. Some automated tests create circumstances
/// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`. /// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> { async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> {
let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 16, 15, 14]; let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14];
for v in prefer_versions { for v in prefer_versions {
let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap(); let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap();
@@ -212,16 +211,7 @@ impl StorageController {
/// Readiness check for our postgres process /// Readiness check for our postgres process
async fn pg_isready(&self, pg_bin_dir: &Utf8Path, postgres_port: u16) -> anyhow::Result<bool> { async fn pg_isready(&self, pg_bin_dir: &Utf8Path, postgres_port: u16) -> anyhow::Result<bool> {
let bin_path = pg_bin_dir.join("pg_isready"); let bin_path = pg_bin_dir.join("pg_isready");
let args = [ let args = ["-h", "localhost", "-p", &format!("{}", postgres_port)];
"-h",
"localhost",
"-U",
&username(),
"-d",
DB_NAME,
"-p",
&format!("{}", postgres_port),
];
let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?; let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?;
Ok(exitcode.success()) Ok(exitcode.success())
@@ -235,11 +225,7 @@ impl StorageController {
/// ///
/// Returns the database url /// Returns the database url
pub async fn setup_database(&self, postgres_port: u16) -> anyhow::Result<String> { pub async fn setup_database(&self, postgres_port: u16) -> anyhow::Result<String> {
let database_url = format!( let database_url = format!("postgresql://localhost:{}/{DB_NAME}", postgres_port);
"postgresql://{}@localhost:{}/{DB_NAME}",
&username(),
postgres_port
);
let pg_bin_dir = self.get_pg_bin_dir().await?; let pg_bin_dir = self.get_pg_bin_dir().await?;
let createdb_path = pg_bin_dir.join("createdb"); let createdb_path = pg_bin_dir.join("createdb");
@@ -249,10 +235,6 @@ impl StorageController {
"localhost", "localhost",
"-p", "-p",
&format!("{}", postgres_port), &format!("{}", postgres_port),
"-U",
&username(),
"-O",
&username(),
DB_NAME, DB_NAME,
]) ])
.output() .output()
@@ -289,7 +271,7 @@ impl StorageController {
// But tokio-postgres fork doesn't have this upstream commit: // But tokio-postgres fork doesn't have this upstream commit:
// https://github.com/sfackler/rust-postgres/commit/cb609be758f3fb5af537f04b584a2ee0cebd5e79 // https://github.com/sfackler/rust-postgres/commit/cb609be758f3fb5af537f04b584a2ee0cebd5e79
// => we should rebase our fork => TODO https://github.com/neondatabase/neon/issues/8399 // => we should rebase our fork => TODO https://github.com/neondatabase/neon/issues/8399
.user(&username()) .user(&whoami::username())
.dbname(DB_NAME) .dbname(DB_NAME)
.connect(tokio_postgres::NoTls) .connect(tokio_postgres::NoTls)
.await .await
@@ -346,19 +328,6 @@ impl StorageController {
let pg_log_path = pg_data_path.join("postgres.log"); let pg_log_path = pg_data_path.join("postgres.log");
if !tokio::fs::try_exists(&pg_data_path).await? { if !tokio::fs::try_exists(&pg_data_path).await? {
let initdb_args = [
"-D",
pg_data_path.as_ref(),
"--username",
&username(),
"--no-sync",
"--no-instructions",
];
tracing::info!(
"Initializing storage controller database with args: {:?}",
initdb_args
);
// Initialize empty database // Initialize empty database
let initdb_path = pg_bin_dir.join("initdb"); let initdb_path = pg_bin_dir.join("initdb");
let mut child = Command::new(&initdb_path) let mut child = Command::new(&initdb_path)
@@ -366,7 +335,7 @@ impl StorageController {
("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()), ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()), ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
]) ])
.args(initdb_args) .args(["-D", pg_data_path.as_ref()])
.spawn() .spawn()
.expect("Failed to spawn initdb"); .expect("Failed to spawn initdb");
let status = child.wait().await?; let status = child.wait().await?;
@@ -395,14 +364,8 @@ impl StorageController {
pg_data_path.as_ref(), pg_data_path.as_ref(),
"-l", "-l",
pg_log_path.as_ref(), pg_log_path.as_ref(),
"-U",
&username(),
"start", "start",
]; ];
tracing::info!(
"Starting storage controller database with args: {:?}",
db_start_args
);
background_process::start_process( background_process::start_process(
"storage_controller_db", "storage_controller_db",

View File

@@ -11,11 +11,14 @@ clap.workspace = true
comfy-table.workspace = true comfy-table.workspace = true
futures.workspace = true futures.workspace = true
humantime.workspace = true humantime.workspace = true
hyper.workspace = true
pageserver_api.workspace = true pageserver_api.workspace = true
pageserver_client.workspace = true pageserver_client.workspace = true
reqwest.workspace = true reqwest.workspace = true
serde.workspace = true
serde_json = { workspace = true, features = ["raw_value"] } serde_json = { workspace = true, features = ["raw_value"] }
storage_controller_client.workspace = true storage_controller_client.workspace = true
thiserror.workspace = true
tokio.workspace = true tokio.workspace = true
tracing.workspace = true tracing.workspace = true
utils.workspace = true utils.workspace = true

View File

@@ -4,8 +4,8 @@ use std::{str::FromStr, time::Duration};
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use pageserver_api::{ use pageserver_api::{
controller_api::{ controller_api::{
AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, ShardSchedulingPolicy,
ShardSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
}, },
models::{ models::{
EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
@@ -339,7 +339,7 @@ async fn main() -> anyhow::Result<()> {
listen_pg_port, listen_pg_port,
listen_http_addr, listen_http_addr,
listen_http_port, listen_http_port,
availability_zone_id: AvailabilityZone(availability_zone_id), availability_zone_id,
}), }),
) )
.await?; .await?;

View File

@@ -2,8 +2,8 @@
# Example docker compose configuration # Example docker compose configuration
The configuration in this directory is used for testing Neon docker images: it is The configuration in this directory is used for testing Neon docker images: it is
not intended for deploying a usable system. To run a development environment where not intended for deploying a usable system. To run a development environment where
you can experiment with a miniature Neon system, use `cargo neon` rather than container images. you can experiment with a minature Neon system, use `cargo neon` rather than container images.
This configuration does not start the storage controller, because the controller This configuration does not start the storage controller, because the controller
needs a way to reconfigure running computes, and no such thing exists in this setup. needs a way to reconfigure running computes, and no such thing exists in this setup.

View File

@@ -1,343 +0,0 @@
# Independent compute release
Created at: 2024-08-30. Author: Alexey Kondratov (@ololobus)
## Summary
This document proposes an approach to fully independent compute release flow. It attempts to
cover the following features:
- Process is automated as much as possible to minimize human errors.
- Compute<->storage protocol compatibility is ensured.
- A transparent release history is available with an easy rollback strategy.
- Although not in the scope of this document, there is a viable way to extend the proposed release
flow to achieve the canary and/or blue-green deployment strategies.
## Motivation
Previously, the compute release was tightly coupled to the storage release. This meant that once
some storage nodes got restarted with a newer version, all new compute starts using these nodes
automatically got a new version. Thus, two releases happen in parallel, which increases the blast
radius and makes ownership fuzzy.
Now, we practice a manual v0 independent compute release flow -- after getting a new compute release
image and tag, we pin it region by region using Admin UI. It's better, but it still has its own flaws:
1. It's a simple but fairly manual process, as you need to click through a few pages.
2. It's prone to human errors, e.g., you could mistype or copy the wrong compute tag.
3. We now require an additional approval in the Admin UI, which partially solves the 2.,
but also makes the whole process pretty annoying, as you constantly need to go back
and forth between two people.
## Non-goals
It's not the goal of this document to propose a design for some general-purpose release tool like Helm.
The document considers how the current compute fleet is orchestrated at Neon. Even if we later
decide to split the control plane further (e.g., introduce a separate compute controller), the proposed
release process shouldn't change much, i.e., the releases table and API will reside in
one of the parts.
Achieving the canary and/or blue-green deploy strategies is out of the scope of this document. They
were kept in mind, though, so it's expected that the proposed approach will lay down the foundation
for implementing them in future iterations.
## Impacted components
Compute, control plane, CI, observability (some Grafana dashboards may require changes).
## Prior art
One of the very close examples is how Helm tracks [releases history](https://helm.sh/docs/helm/helm_history/).
In the code:
- [Release](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/release.go#L20-L43)
- [Release info](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/info.go#L24-L40)
- [Release status](https://github.com/helm/helm/blob/2b30cf4b61d587d3f7594102bb202b787b9918db/pkg/release/status.go#L18-L42)
TL;DR it has several important attributes:
- Revision -- unique release ID/primary key. It is not the same as the application version,
because the same version can be deployed several times, e.g., after a newer version rollback.
- App version -- version of the application chart/code.
- Config -- set of overrides to the default config of the application.
- Status -- current status of the release in the history.
- Timestamps -- tracks when a release was created and deployed.
## Proposed implementation
### Separate release branch
We will use a separate release branch, `release-compute`, to have a clean history for releases and commits.
In order to avoid confusion with storage releases, we will use a different prefix for compute [git release
tags](https://github.com/neondatabase/neon/releases) -- `release-compute-XXXX`. We will use the same tag for
Docker images as well. The `neondatabase/compute-node-v16:release-compute-XXXX` looks longer and a bit redundant,
but it's better to have image and git tags in sync.
Currently, control plane relies on the numeric compute and storage release versions to decide on compute->storage
compatibility. Once we implement this proposal, we should drop this code as release numbers will be completely
independent. The only constraint we want is that it must monotonically increase within the same release branch.
### Compute config/settings manifest
We will create a new sub-directory `compute` and file `compute/manifest.yaml` with a structure:
```yaml
pg_settings:
# Common settings for primaries and secondaries of all versions.
common:
wal_log_hints: "off"
max_wal_size: "1024"
per_version:
14:
# Common settings for both replica and primary of version PG 14
common:
shared_preload_libraries: "neon,pg_stat_statements,extension_x"
15:
common:
shared_preload_libraries: "neon,pg_stat_statements,extension_x"
# Settings that should be applied only to
replica:
# Available only starting Postgres 15th
recovery_prefetch: "off"
# ...
17:
common:
# For example, if third-party `extension_x` is not yet available for PG 17
shared_preload_libraries: "neon,pg_stat_statements"
replica:
recovery_prefetch: "off"
```
**N.B.** Setting value should be a string with `on|off` for booleans and a number (as a string)
without units for all numeric settings. That's how the control plane currently operates.
The priority of settings will be (a higher number is a higher priority):
1. Any static and hard-coded settings in the control plane
2. `pg_settings->common`
3. Per-version `common`
4. Per-version `replica`
5. Any per-user/project/endpoint overrides in the control plane
6. Any dynamic setting calculated based on the compute size
**N.B.** For simplicity, we do not do any custom logic for `shared_preload_libraries`, so it's completely
overridden if specified on some level. Make sure that you include all necessary extensions in it when you
do any overrides.
**N.B.** There is a tricky question about what to do with custom compute image pinning we sometimes
do for particular projects and customers. That's usually some ad-hoc work and images are based on
the latest compute image, so it's relatively safe to assume that we could use settings from the latest compute
release. If for some reason that's not true, and further overrides are needed, it's also possible to do
on the project level together with pinning the image, so it's on-call/engineer/support responsibility to
ensure that compute starts with the specified custom image. The only real risk is that compute image will get
stale and settings from new releases will drift away, so eventually it will get something incompatible,
but i) this is some operational issue, as we do not want stale images anyway, and ii) base settings
receive something really new so rarely that the chance of this happening is very low. If we want to solve it completely,
then together with pinning the image we could also pin the matching release revision in the control plane.
The compute team will own the content of `compute/manifest.yaml`.
### Control plane: releases table
In order to store information about releases, the control plane will use a table `compute_releases` with the following
schema:
```sql
CREATE TABLE compute_releases (
-- Unique release ID
-- N.B. Revision won't by synchronized across all regions, because all control planes are technically independent
-- services. We have the same situation with Helm releases as well because they could be deployed and rolled back
-- independently in different clusters.
revision BIGSERIAL PRIMARY KEY,
-- Numeric version of the compute image, e.g. 9057
version BIGINT NOT NULL,
-- Compute image tag, e.g. `release-9057`
tag TEXT NOT NULL,
-- Current release status. Currently, it will be a simple enum
-- * `deployed` -- release is deployed and used for new compute starts.
-- Exactly one release can have this status at a time.
-- * `superseded` -- release has been replaced by a newer one.
-- But we can always extend it in the future when we need more statuses
-- for more complex deployment strategies.
status TEXT NOT NULL,
-- Any additional metadata for compute in the corresponding release
manifest JSONB NOT NULL,
-- Timestamp when release record was created in the control plane database
created_at TIMESTAMP NOT NULL DEFAULT now(),
-- Timestamp when release deployment was finished
deployed_at TIMESTAMP
);
```
We keep track of the old releases not only for the sake of audit, but also because we usually have ~30% of
old computes started using the image from one of the previous releases. Yet, when users want to reconfigure
them without restarting, the control plane needs to know what settings are applicable to them, so we also need
information about the previous releases that are readily available. There could be some other auxiliary info
needed as well: supported extensions, compute flags, etc.
**N.B.** Here, we can end up in an ambiguous situation when the same compute image is deployed twice, e.g.,
it was deployed once, then rolled back, and then deployed again, potentially with a different manifest. Yet,
we could've started some computes with the first deployment and some with the second. Thus, when we need to
look up the manifest for the compute by its image tag, we will see two records in the table with the same tag,
but different revision numbers. We can assume that this could happen only in case of rollbacks, so we
can just take the latest revision for the given tag.
### Control plane: management API
The control plane will implement new API methods to manage releases:
1. `POST /management/api/v2/compute_releases` to create a new release. With payload
```json
{
"version": 9057,
"tag": "release-9057",
"manifest": {}
}
```
and response
```json
{
"revision": 53,
"version": 9057,
"tag": "release-9057",
"status": "deployed",
"manifest": {},
"created_at": "2024-08-15T15:52:01.0000Z",
"deployed_at": "2024-08-15T15:52:01.0000Z",
}
```
Here, we can actually mix-in custom (remote) extensions metadata into the `manifest`, so that the control plane
will get information about all available extensions not bundled into compute image. The corresponding
workflow in `neondatabase/build-custom-extensions` should produce it as an artifact and make
it accessible to the workflow in the `neondatabase/infra`. See the complete release flow below. Doing that,
we put a constraint that new custom extension requires new compute release, which is good for the safety,
but is not exactly what we want operational-wise (we want to be able to deploy new extensions without new
images). Yet, it can be solved incrementally: v0 -- do not do anything with extensions at all;
v1 -- put them into the same manifest; v2 -- make them separate entities with their own lifecycle.
**N.B.** This method is intended to be used in CI workflows, and CI/network can be flaky. It's reasonable
to assume that we could retry the request several times, even though it's already succeeded. Although it's
not a big deal to create several identical releases one-by-one, it's better to avoid it, so the control plane
should check if the latest release is identical and just return `304 Not Modified` in this case.
2. `POST /management/api/v2/compute_releases/rollback` to rollback to any previously deployed release. With payload
including the revision of the release to rollback to:
```json
{
"revision": 52
}
```
Rollback marks the current release as `superseded` and creates a new release with all the same data as the
requested revision, but with a new revision number.
This rollback API is not strictly needed, as we can just use `infra` repo workflow to deploy any
available tag. It's still nice to have for on-call and any urgent matters, for example, if we need
to rollback and GitHub is down. It's much easier to specify only the revision number vs. crafting
all the necessary data for the new release payload.
### Compute->storage compatibility tests
In order to safely release new compute versions independently from storage, we need to ensure that the currently
deployed storage is compatible with the new compute version. Currently, we maintain backward compatibility
in storage, but newer computes may require a newer storage version.
Remote end-to-end (e2e) tests [already accept](https://github.com/neondatabase/cloud/blob/e3468d433e0d73d02b7d7e738d027f509b522408/.github/workflows/testing.yml#L43-L48)
`storage_image_tag` and `compute_image_tag` as separate inputs. That means that we could reuse e2e tests to ensure
compatibility between storage and compute:
1. Pick the latest storage release tag and use it as `storage_image_tag`.
2. Pick a new compute tag built in the current compute release PR and use it as `compute_image_tag`.
Here, we should use a temporary ECR image tag, because the final tag will be known only after the release PR is merged.
3. Trigger e2e tests as usual.
### Release flow
```mermaid
sequenceDiagram
actor oncall as Compute on-call person
participant neon as neondatabase/neon
box private
participant cloud as neondatabase/cloud
participant exts as neondatabase/build-custom-extensions
participant infra as neondatabase/infra
end
box cloud
participant preprod as Pre-prod control plane
participant prod as Production control plane
participant k8s as Compute k8s
end
oncall ->> neon: Open release PR into release-compute
activate neon
neon ->> cloud: CI: trigger e2e compatibility tests
activate cloud
cloud -->> neon: CI: e2e tests pass
deactivate cloud
neon ->> neon: CI: pass PR checks, get approvals
deactivate neon
oncall ->> neon: Merge release PR into release-compute
activate neon
neon ->> neon: CI: pass checks, build and push images
neon ->> exts: CI: trigger extensions build
activate exts
exts -->> neon: CI: extensions are ready
deactivate exts
neon ->> neon: CI: create release tag
neon ->> infra: Trigger release workflow using the produced tag
deactivate neon
activate infra
infra ->> infra: CI: pass checks
infra ->> preprod: Release new compute image to pre-prod automatically <br/> POST /management/api/v2/compute_releases
activate preprod
preprod -->> infra: 200 OK
deactivate preprod
infra ->> infra: CI: wait for per-region production deploy approvals
oncall ->> infra: CI: approve deploys region by region
infra ->> k8s: Prewarm new compute image
infra ->> prod: POST /management/api/v2/compute_releases
activate prod
prod -->> infra: 200 OK
deactivate prod
deactivate infra
```
## Further work
As briefly mentioned in other sections, eventually, we would like to use more complex deployment strategies.
For example, we can pass a fraction of the total compute starts that should use the new release. Then we can
mark the release as `partial` or `canary` and monitor its performance. If everything is fine, we can promote it
to `deployed` status. If not, we can roll back to the previous one.
## Alternatives
In theory, we can try using Helm as-is:
1. Write a compute Helm chart. That will actually have only some config map, which the control plane can access and read.
N.B. We could reuse the control plane chart as well, but then it's not a fully independent release again and even more fuzzy.
2. The control plane will read it and start using the new compute version for new starts.
Drawbacks:
1. Helm releases work best if the workload is controlled by the Helm chart itself. Then you can have different
deployment strategies like rolling update or canary or blue/green deployments. At Neon, the compute starts are controlled
by control plane, so it makes it much more tricky.
2. Releases visibility will suffer, i.e. instead of a nice table in the control plane and Admin UI, we would need to use
`helm` cli and/or K8s UIs like K8sLens.
3. We do not restart all computes shortly after the new version release. This means that for some features and compatibility
purpose (see above) control plane may need some auxiliary info from the previous releases.

View File

@@ -8,6 +8,7 @@ license.workspace = true
anyhow.workspace = true anyhow.workspace = true
chrono.workspace = true chrono.workspace = true
serde.workspace = true serde.workspace = true
serde_with.workspace = true
serde_json.workspace = true serde_json.workspace = true
regex.workspace = true regex.workspace = true

View File

@@ -5,6 +5,9 @@ edition = "2021"
license = "Apache-2.0" license = "Apache-2.0"
[dependencies] [dependencies]
anyhow.workspace = true
chrono = { workspace = true, features = ["serde"] } chrono = { workspace = true, features = ["serde"] }
rand.workspace = true rand.workspace = true
serde.workspace = true serde.workspace = true
serde_with.workspace = true
utils.workspace = true

View File

@@ -5,7 +5,7 @@ use chrono::{DateTime, Utc};
use rand::Rng; use rand::Rng;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)] #[derive(Serialize, serde::Deserialize, Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
#[serde(tag = "type")] #[serde(tag = "type")]
pub enum EventType { pub enum EventType {
#[serde(rename = "absolute")] #[serde(rename = "absolute")]
@@ -107,7 +107,7 @@ pub const CHUNK_SIZE: usize = 1000;
// Just a wrapper around a slice of events // Just a wrapper around a slice of events
// to serialize it as `{"events" : [ ] } // to serialize it as `{"events" : [ ] }
#[derive(serde::Serialize, Deserialize)] #[derive(serde::Serialize, serde::Deserialize)]
pub struct EventChunk<'a, T: Clone> { pub struct EventChunk<'a, T: Clone> {
pub events: std::borrow::Cow<'a, [T]>, pub events: std::borrow::Cow<'a, [T]>,
} }

View File

@@ -12,4 +12,5 @@ bytes.workspace = true
utils.workspace = true utils.workspace = true
parking_lot.workspace = true parking_lot.workspace = true
hex.workspace = true hex.workspace = true
scopeguard.workspace = true
smallvec = { workspace = true, features = ["write"] } smallvec = { workspace = true, features = ["write"] }

View File

@@ -104,6 +104,9 @@ pub struct ConfigToml {
pub image_compression: ImageCompressionAlgorithm, pub image_compression: ImageCompressionAlgorithm,
pub ephemeral_bytes_per_memory_kb: usize, pub ephemeral_bytes_per_memory_kb: usize,
pub l0_flush: Option<crate::models::L0FlushConfig>, pub l0_flush: Option<crate::models::L0FlushConfig>,
#[serde(skip_serializing)]
// TODO(https://github.com/neondatabase/neon/issues/8184): remove after this field is removed from all pageserver.toml's
pub compact_level0_phase1_value_access: serde::de::IgnoredAny,
pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode, pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode,
pub io_buffer_alignment: usize, pub io_buffer_alignment: usize,
} }
@@ -170,6 +173,40 @@ impl Default for EvictionOrder {
} }
} }
#[derive(
Eq,
PartialEq,
Debug,
Copy,
Clone,
strum_macros::EnumString,
strum_macros::Display,
serde_with::DeserializeFromStr,
serde_with::SerializeDisplay,
)]
#[strum(serialize_all = "kebab-case")]
pub enum GetVectoredImpl {
Sequential,
Vectored,
}
#[derive(
Eq,
PartialEq,
Debug,
Copy,
Clone,
strum_macros::EnumString,
strum_macros::Display,
serde_with::DeserializeFromStr,
serde_with::SerializeDisplay,
)]
#[strum(serialize_all = "kebab-case")]
pub enum GetImpl {
Legacy,
Vectored,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[derive(Copy, Clone, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(transparent)] #[serde(transparent)]
pub struct MaxVectoredReadBytes(pub NonZeroUsize); pub struct MaxVectoredReadBytes(pub NonZeroUsize);
@@ -301,6 +338,8 @@ pub mod defaults {
pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm = pub const DEFAULT_IMAGE_COMPRESSION: ImageCompressionAlgorithm =
ImageCompressionAlgorithm::Zstd { level: Some(1) }; ImageCompressionAlgorithm::Zstd { level: Some(1) };
pub const DEFAULT_VALIDATE_VECTORED_GET: bool = false;
pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0; pub const DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB: usize = 0;
pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512; pub const DEFAULT_IO_BUFFER_ALIGNMENT: usize = 512;
@@ -337,10 +376,7 @@ impl Default for ConfigToml {
concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP) concurrent_tenant_warmup: (NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
.expect("Invalid default constant")), .expect("Invalid default constant")),
concurrent_tenant_size_logical_size_queries: NonZeroUsize::new( concurrent_tenant_size_logical_size_queries: NonZeroUsize::new(1).unwrap(),
DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES,
)
.unwrap(),
metric_collection_interval: (humantime::parse_duration( metric_collection_interval: (humantime::parse_duration(
DEFAULT_METRIC_COLLECTION_INTERVAL, DEFAULT_METRIC_COLLECTION_INTERVAL,
) )
@@ -381,6 +417,7 @@ impl Default for ConfigToml {
image_compression: (DEFAULT_IMAGE_COMPRESSION), image_compression: (DEFAULT_IMAGE_COMPRESSION),
ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB), ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
l0_flush: None, l0_flush: None,
compact_level0_phase1_value_access: Default::default(),
virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(), virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(),
io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT, io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT,
@@ -430,6 +467,8 @@ pub mod tenant_conf_defaults {
// By default ingest enough WAL for two new L0 layers before checking if new image // By default ingest enough WAL for two new L0 layers before checking if new image
// image layers should be created. // image layers should be created.
pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2; pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2;
pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;
} }
impl Default for TenantConfigToml { impl Default for TenantConfigToml {

View File

@@ -1,5 +1,4 @@
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::fmt::Display;
use std::str::FromStr; use std::str::FromStr;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
@@ -58,7 +57,7 @@ pub struct NodeRegisterRequest {
pub listen_http_addr: String, pub listen_http_addr: String,
pub listen_http_port: u16, pub listen_http_port: u16,
pub availability_zone_id: AvailabilityZone, pub availability_zone_id: String,
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
@@ -75,19 +74,10 @@ pub struct TenantPolicyRequest {
pub scheduling: Option<ShardSchedulingPolicy>, pub scheduling: Option<ShardSchedulingPolicy>,
} }
#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct AvailabilityZone(pub String);
impl Display for AvailabilityZone {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub struct ShardsPreferredAzsRequest { pub struct ShardsPreferredAzsRequest {
#[serde(flatten)] #[serde(flatten)]
pub preferred_az_ids: HashMap<TenantShardId, AvailabilityZone>, pub preferred_az_ids: HashMap<TenantShardId, String>,
} }
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]

View File

@@ -1,8 +1,8 @@
use anyhow::{bail, Result}; use anyhow::{bail, Result};
use byteorder::{ByteOrder, BE}; use byteorder::{ByteOrder, BE};
use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM}; use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
use postgres_ffi::Oid;
use postgres_ffi::RepOriginId; use postgres_ffi::RepOriginId;
use postgres_ffi::{Oid, TransactionId};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::{fmt, ops::Range}; use std::{fmt, ops::Range};
@@ -350,17 +350,7 @@ impl Key {
// 02 00000000 00000000 00000000 00 00000000 // 02 00000000 00000000 00000000 00 00000000
// //
// TwoPhaseFile: // TwoPhaseFile:
// // 02 00000000 00000000 00000000 00 XID
// 02 00000000 00000000 00XXXXXX XX XXXXXXXX
//
// \______XID_________/
//
// The 64-bit XID is stored a little awkwardly in field6, field5 and
// field4. PostgreSQL v16 and below only stored a 32-bit XID, which
// fit completely in field6, but starting with PostgreSQL v17, a full
// 64-bit XID is used. Most pageserver code that accesses
// TwoPhaseFiles now deals with 64-bit XIDs even on v16, the high bits
// are just unused.
// //
// ControlFile: // ControlFile:
// 03 00000000 00000000 00000000 00 00000000 // 03 00000000 00000000 00000000 00 00000000
@@ -592,36 +582,35 @@ pub const TWOPHASEDIR_KEY: Key = Key {
}; };
#[inline(always)] #[inline(always)]
pub fn twophase_file_key(xid: u64) -> Key { pub fn twophase_file_key(xid: TransactionId) -> Key {
Key { Key {
field1: 0x02, field1: 0x02,
field2: 0, field2: 0,
field3: 0, field3: 0,
field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32, field4: 0,
field5: ((xid & 0x000000FF00000000) >> 32) as u8, field5: 0,
field6: (xid & 0x00000000FFFFFFFF) as u32, field6: xid,
} }
} }
#[inline(always)] #[inline(always)]
pub fn twophase_key_range(xid: u64) -> Range<Key> { pub fn twophase_key_range(xid: TransactionId) -> Range<Key> {
// 64-bit XIDs really should not overflow
let (next_xid, overflowed) = xid.overflowing_add(1); let (next_xid, overflowed) = xid.overflowing_add(1);
Key { Key {
field1: 0x02, field1: 0x02,
field2: 0, field2: 0,
field3: 0, field3: 0,
field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32, field4: 0,
field5: ((xid & 0x000000FF00000000) >> 32) as u8, field5: 0,
field6: (xid & 0x00000000FFFFFFFF) as u32, field6: xid,
}..Key { }..Key {
field1: 0x02, field1: 0x02,
field2: 0, field2: 0,
field3: u32::from(overflowed), field3: 0,
field4: ((next_xid & 0xFFFFFF0000000000) >> 40) as u32, field4: 0,
field5: ((next_xid & 0x000000FF00000000) >> 32) as u8, field5: u8::from(overflowed),
field6: (next_xid & 0x00000000FFFFFFFF) as u32, field6: next_xid,
} }
} }

View File

@@ -37,11 +37,14 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
/// ```mermaid /// ```mermaid
/// stateDiagram-v2 /// stateDiagram-v2
/// ///
/// [*] --> Loading: spawn_load()
/// [*] --> Attaching: spawn_attach() /// [*] --> Attaching: spawn_attach()
/// ///
/// Loading --> Activating: activate()
/// Attaching --> Activating: activate() /// Attaching --> Activating: activate()
/// Activating --> Active: infallible /// Activating --> Active: infallible
/// ///
/// Loading --> Broken: load() failure
/// Attaching --> Broken: attach() failure /// Attaching --> Broken: attach() failure
/// ///
/// Active --> Stopping: set_stopping(), part of shutdown & detach /// Active --> Stopping: set_stopping(), part of shutdown & detach
@@ -65,6 +68,10 @@ use bytes::{Buf, BufMut, Bytes, BytesMut};
)] )]
#[serde(tag = "slug", content = "data")] #[serde(tag = "slug", content = "data")]
pub enum TenantState { pub enum TenantState {
/// This tenant is being loaded from local disk.
///
/// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
Loading,
/// This tenant is being attached to the pageserver. /// This tenant is being attached to the pageserver.
/// ///
/// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass. /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
@@ -114,6 +121,8 @@ impl TenantState {
// But, our attach task might still be fetching the remote timelines, etc. // But, our attach task might still be fetching the remote timelines, etc.
// So, return `Maybe` while Attaching, making Console wait for the attach task to finish. // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe, Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
// tenant mgr startup distinguishes attaching from loading via marker file.
Self::Loading | Self::Activating(ActivatingFrom::Loading) => Attached,
// We only reach Active after successful load / attach. // We only reach Active after successful load / attach.
// So, call atttachment status Attached. // So, call atttachment status Attached.
Self::Active => Attached, Self::Active => Attached,
@@ -182,11 +191,10 @@ impl LsnLease {
} }
/// The only [`TenantState`] variants we could be `TenantState::Activating` from. /// The only [`TenantState`] variants we could be `TenantState::Activating` from.
///
/// XXX: We used to have more variants here, but now it's just one, which makes this rather
/// useless. Remove, once we've checked that there's no client code left that looks at this.
#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum ActivatingFrom { pub enum ActivatingFrom {
/// Arrived to [`TenantState::Activating`] from [`TenantState::Loading`]
Loading,
/// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`] /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
Attaching, Attaching,
} }
@@ -487,7 +495,7 @@ pub struct CompactionAlgorithmSettings {
pub kind: CompactionAlgorithm, pub kind: CompactionAlgorithm,
} }
#[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)]
#[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)] #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
pub enum L0FlushConfig { pub enum L0FlushConfig {
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
@@ -1554,8 +1562,11 @@ mod tests {
#[test] #[test]
fn tenantstatus_activating_serde() { fn tenantstatus_activating_serde() {
let states = [TenantState::Activating(ActivatingFrom::Attaching)]; let states = [
let expected = "[{\"slug\":\"Activating\",\"data\":\"Attaching\"}]"; TenantState::Activating(ActivatingFrom::Loading),
TenantState::Activating(ActivatingFrom::Attaching),
];
let expected = "[{\"slug\":\"Activating\",\"data\":\"Loading\"},{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
let actual = serde_json::to_string(&states).unwrap(); let actual = serde_json::to_string(&states).unwrap();
@@ -1570,7 +1581,13 @@ mod tests {
fn tenantstatus_activating_strum() { fn tenantstatus_activating_strum() {
// tests added, because we use these for metrics // tests added, because we use these for metrics
let examples = [ let examples = [
(line!(), TenantState::Loading, "Loading"),
(line!(), TenantState::Attaching, "Attaching"), (line!(), TenantState::Attaching, "Attaching"),
(
line!(),
TenantState::Activating(ActivatingFrom::Loading),
"Activating",
),
( (
line!(), line!(),
TenantState::Activating(ActivatingFrom::Attaching), TenantState::Activating(ActivatingFrom::Attaching),

View File

@@ -5,8 +5,10 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
async-trait.workspace = true
anyhow.workspace = true anyhow.workspace = true
bytes.workspace = true bytes.workspace = true
futures.workspace = true
rustls.workspace = true rustls.workspace = true
serde.workspace = true serde.workspace = true
thiserror.workspace = true thiserror.workspace = true

View File

@@ -280,6 +280,16 @@ pub struct PostgresBackend<IO> {
pub type PostgresBackendTCP = PostgresBackend<tokio::net::TcpStream>; pub type PostgresBackendTCP = PostgresBackend<tokio::net::TcpStream>;
pub fn query_from_cstring(query_string: Bytes) -> Vec<u8> {
let mut query_string = query_string.to_vec();
if let Some(ch) = query_string.last() {
if *ch == 0 {
query_string.pop();
}
}
query_string
}
/// Cast a byte slice to a string slice, dropping null terminator if there's one. /// Cast a byte slice to a string slice, dropping null terminator if there's one.
fn cstr_to_str(bytes: &[u8]) -> anyhow::Result<&str> { fn cstr_to_str(bytes: &[u8]) -> anyhow::Result<&str> {
let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes); let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes);

View File

@@ -5,10 +5,13 @@ edition.workspace = true
license.workspace = true license.workspace = true
[dependencies] [dependencies]
rand.workspace = true
regex.workspace = true regex.workspace = true
bytes.workspace = true bytes.workspace = true
byteorder.workspace = true
anyhow.workspace = true anyhow.workspace = true
crc32c.workspace = true crc32c.workspace = true
hex.workspace = true
once_cell.workspace = true once_cell.workspace = true
log.workspace = true log.workspace = true
memoffset.workspace = true memoffset.workspace = true

View File

@@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> {
PathBuf::from("pg_install") PathBuf::from("pg_install")
}; };
for pg_version in &["v14", "v15", "v16", "v17"] { for pg_version in &["v14", "v15", "v16"] {
let mut pg_install_dir_versioned = pg_install_dir.join(pg_version); let mut pg_install_dir_versioned = pg_install_dir.join(pg_version);
if pg_install_dir_versioned.is_relative() { if pg_install_dir_versioned.is_relative() {
let cwd = env::current_dir().context("Failed to get current_dir")?; let cwd = env::current_dir().context("Failed to get current_dir")?;

View File

@@ -57,7 +57,6 @@ macro_rules! for_all_postgres_versions {
$macro!(v14); $macro!(v14);
$macro!(v15); $macro!(v15);
$macro!(v16); $macro!(v16);
$macro!(v17);
}; };
} }
@@ -92,7 +91,6 @@ macro_rules! dispatch_pgversion {
14 : v14, 14 : v14,
15 : v15, 15 : v15,
16 : v16, 16 : v16,
17 : v17,
] ]
) )
}; };
@@ -123,7 +121,6 @@ macro_rules! enum_pgversion_dispatch {
V14 : v14, V14 : v14,
V15 : v15, V15 : v15,
V16 : v16, V16 : v16,
V17 : v17,
] ]
) )
}; };
@@ -153,7 +150,6 @@ macro_rules! enum_pgversion {
V14 : v14, V14 : v14,
V15 : v15, V15 : v15,
V16 : v16, V16 : v16,
V17 : v17,
] ]
} }
}; };
@@ -166,7 +162,6 @@ macro_rules! enum_pgversion {
V14 : v14, V14 : v14,
V15 : v15, V15 : v15,
V16 : v16, V16 : v16,
V17 : v17,
] ]
} }
}; };

View File

@@ -9,8 +9,8 @@
//! comments on them. //! comments on them.
//! //!
use crate::PageHeaderData;
use crate::BLCKSZ; use crate::BLCKSZ;
use crate::{PageHeaderData, XLogRecord};
// //
// From pg_tablespace_d.h // From pg_tablespace_d.h
@@ -152,9 +152,6 @@ pub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8; pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;
pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8; pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
// From heapam_xlog.h
pub const XLOG_HEAP2_REWRITE: u8 = 0x00;
// From replication/message.h // From replication/message.h
pub const XLOG_LOGICAL_MESSAGE: u8 = 0x00; pub const XLOG_LOGICAL_MESSAGE: u8 = 0x00;
@@ -194,6 +191,8 @@ pub const XLR_RMGR_INFO_MASK: u8 = 0xF0;
pub const XLOG_TBLSPC_CREATE: u8 = 0x00; pub const XLOG_TBLSPC_CREATE: u8 = 0x00;
pub const XLOG_TBLSPC_DROP: u8 = 0x10; pub const XLOG_TBLSPC_DROP: u8 = 0x10;
pub const SIZEOF_XLOGRECORD: u32 = size_of::<XLogRecord>() as u32;
// //
// from xlogrecord.h // from xlogrecord.h
// //
@@ -217,21 +216,18 @@ pub const BKPIMAGE_HAS_HOLE: u8 = 0x01; /* page image has "hole" */
/* From transam.h */ /* From transam.h */
pub const FIRST_NORMAL_TRANSACTION_ID: u32 = 3; pub const FIRST_NORMAL_TRANSACTION_ID: u32 = 3;
pub const INVALID_TRANSACTION_ID: u32 = 0; pub const INVALID_TRANSACTION_ID: u32 = 0;
pub const FIRST_BOOTSTRAP_OBJECT_ID: u32 = 12000;
pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;
/* pg_control.h */
pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00; pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10; pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
pub const XLOG_PARAMETER_CHANGE: u8 = 0x60; pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
pub const XLOG_END_OF_RECOVERY: u8 = 0x90; pub const XLP_LONG_HEADER: u16 = 0x0002;
/* From xlog.h */ /* From xlog.h */
pub const XLOG_REPLORIGIN_SET: u8 = 0x00; pub const XLOG_REPLORIGIN_SET: u8 = 0x00;
pub const XLOG_REPLORIGIN_DROP: u8 = 0x10; pub const XLOG_REPLORIGIN_DROP: u8 = 0x10;
/* xlog_internal.h */
pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
pub const XLP_LONG_HEADER: u16 = 0x0002;
/* From replication/slot.h */ /* From replication/slot.h */
pub const REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN: usize = 4*4 /* offset of `slotdata` in ReplicationSlotOnDisk */ pub const REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN: usize = 4*4 /* offset of `slotdata` in ReplicationSlotOnDisk */
+ 64 /* NameData */ + 4*4; + 64 /* NameData */ + 4*4;
@@ -249,6 +245,33 @@ pub const VM_HEAPBLOCKS_PER_PAGE: u32 =
/* From origin.c */ /* From origin.c */
pub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE; pub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE;
// List of subdirectories inside pgdata.
// Copied from src/bin/initdb/initdb.c
pub const PGDATA_SUBDIRS: [&str; 22] = [
"global",
"pg_wal/archive_status",
"pg_commit_ts",
"pg_dynshmem",
"pg_notify",
"pg_serial",
"pg_snapshots",
"pg_subtrans",
"pg_twophase",
"pg_multixact",
"pg_multixact/members",
"pg_multixact/offsets",
"base",
"base/1",
"pg_replslot",
"pg_tblspc",
"pg_stat",
"pg_stat_tmp",
"pg_xact",
"pg_logical",
"pg_logical/snapshots",
"pg_logical/mappings",
];
// Don't include postgresql.conf as it is inconvenient on node start: // Don't include postgresql.conf as it is inconvenient on node start:
// we need postgresql.conf before basebackup to synchronize safekeepers // we need postgresql.conf before basebackup to synchronize safekeepers
// so no point in overwriting it during backup restore. Rest of the files // so no point in overwriting it during backup restore. Rest of the files

View File

@@ -5,33 +5,6 @@ pub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */ pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */ pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */
// List of subdirectories inside pgdata.
// Copied from src/bin/initdb/initdb.c
pub const PGDATA_SUBDIRS: [&str; 22] = [
"global",
"pg_wal/archive_status",
"pg_commit_ts",
"pg_dynshmem",
"pg_notify",
"pg_serial",
"pg_snapshots",
"pg_subtrans",
"pg_twophase",
"pg_multixact",
"pg_multixact/members",
"pg_multixact/offsets",
"base",
"base/1",
"pg_replslot",
"pg_tblspc",
"pg_stat",
"pg_stat_tmp",
"pg_xact",
"pg_logical",
"pg_logical/snapshots",
"pg_logical/mappings",
];
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool { pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
(bimg_info & BKPIMAGE_IS_COMPRESSED) != 0 (bimg_info & BKPIMAGE_IS_COMPRESSED) != 0
} }

View File

@@ -11,8 +11,6 @@ pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */ pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */
pub use super::super::v14::bindings::PGDATA_SUBDIRS;
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool { pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD; const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;

View File

@@ -11,8 +11,6 @@ pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */ pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */
pub use super::super::v14::bindings::PGDATA_SUBDIRS;
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool { pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD; const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;

View File

@@ -1,55 +0,0 @@
pub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;
pub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;
pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;
pub const XLOG_DBASE_DROP: u8 = 0x20;
pub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */
pub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */
pub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */
pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */
// List of subdirectories inside pgdata.
// Copied from src/bin/initdb/initdb.c
pub const PGDATA_SUBDIRS: [&str; 23] = [
"global",
"pg_wal/archive_status",
"pg_wal/summaries",
"pg_commit_ts",
"pg_dynshmem",
"pg_notify",
"pg_serial",
"pg_snapshots",
"pg_subtrans",
"pg_twophase",
"pg_multixact",
"pg_multixact/members",
"pg_multixact/offsets",
"base",
"base/1",
"pg_replslot",
"pg_tblspc",
"pg_stat",
"pg_stat_tmp",
"pg_xact",
"pg_logical",
"pg_logical/snapshots",
"pg_logical/mappings",
];
pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
(bimg_info & ANY_COMPRESS_FLAG) != 0
}
pub const XLOG_HEAP2_PRUNE_ON_ACCESS: u8 = 0x10;
pub const XLOG_HEAP2_PRUNE_VACUUM_SCAN: u8 = 0x20;
pub const XLOG_HEAP2_PRUNE_VACUUM_CLEANUP: u8 = 0x30;
pub const XLOG_OVERWRITE_CONTRECORD: u8 = 0xD0;
pub const XLOG_CHECKPOINT_REDO: u8 = 0xE0;

View File

@@ -26,12 +26,11 @@ use bytes::{Buf, Bytes};
use log::*; use log::*;
use serde::Serialize; use serde::Serialize;
use std::ffi::OsStr;
use std::fs::File; use std::fs::File;
use std::io::prelude::*; use std::io::prelude::*;
use std::io::ErrorKind; use std::io::ErrorKind;
use std::io::SeekFrom; use std::io::SeekFrom;
use std::path::Path; use std::path::{Path, PathBuf};
use std::time::SystemTime; use std::time::SystemTime;
use utils::bin_ser::DeserializeError; use utils::bin_ser::DeserializeError;
use utils::bin_ser::SerializeError; use utils::bin_ser::SerializeError;
@@ -79,34 +78,19 @@ pub fn XLogFileName(tli: TimeLineID, logSegNo: XLogSegNo, wal_segsz_bytes: usize
) )
} }
pub fn XLogFromFileName( pub fn XLogFromFileName(fname: &str, wal_seg_size: usize) -> (XLogSegNo, TimeLineID) {
fname: &OsStr, let tli = u32::from_str_radix(&fname[0..8], 16).unwrap();
wal_seg_size: usize, let log = u32::from_str_radix(&fname[8..16], 16).unwrap() as XLogSegNo;
) -> anyhow::Result<(XLogSegNo, TimeLineID)> { let seg = u32::from_str_radix(&fname[16..24], 16).unwrap() as XLogSegNo;
if let Some(fname_str) = fname.to_str() { (log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli)
let tli = u32::from_str_radix(&fname_str[0..8], 16)?;
let log = u32::from_str_radix(&fname_str[8..16], 16)? as XLogSegNo;
let seg = u32::from_str_radix(&fname_str[16..24], 16)? as XLogSegNo;
Ok((log * XLogSegmentsPerXLogId(wal_seg_size) + seg, tli))
} else {
anyhow::bail!("non-ut8 filename: {:?}", fname);
}
} }
pub fn IsXLogFileName(fname: &OsStr) -> bool { pub fn IsXLogFileName(fname: &str) -> bool {
if let Some(fname) = fname.to_str() { return fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit());
fname.len() == XLOG_FNAME_LEN && fname.chars().all(|c| c.is_ascii_hexdigit())
} else {
false
}
} }
pub fn IsPartialXLogFileName(fname: &OsStr) -> bool { pub fn IsPartialXLogFileName(fname: &str) -> bool {
if let Some(fname) = fname.to_str() { fname.ends_with(".partial") && IsXLogFileName(&fname[0..fname.len() - 8])
fname.ends_with(".partial") && IsXLogFileName(OsStr::new(&fname[0..fname.len() - 8]))
} else {
false
}
} }
/// If LSN points to the beginning of the page, then shift it to first record, /// If LSN points to the beginning of the page, then shift it to first record,
@@ -276,6 +260,13 @@ fn open_wal_segment(seg_file_path: &Path) -> anyhow::Result<Option<File>> {
} }
} }
pub fn main() {
let mut data_dir = PathBuf::new();
data_dir.push(".");
let wal_end = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, Lsn(0)).unwrap();
println!("wal_end={:?}", wal_end);
}
impl XLogRecord { impl XLogRecord {
pub fn from_slice(buf: &[u8]) -> Result<XLogRecord, DeserializeError> { pub fn from_slice(buf: &[u8]) -> Result<XLogRecord, DeserializeError> {
use utils::bin_ser::LeSer; use utils::bin_ser::LeSer;

View File

@@ -9,6 +9,7 @@ anyhow.workspace = true
clap.workspace = true clap.workspace = true
env_logger.workspace = true env_logger.workspace = true
log.workspace = true log.workspace = true
once_cell.workspace = true
postgres.workspace = true postgres.workspace = true
postgres_ffi.workspace = true postgres_ffi.workspace = true
camino-tempfile.workspace = true camino-tempfile.workspace = true

View File

@@ -7,7 +7,6 @@ use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
use postgres_ffi::{ use postgres_ffi::{
XLOG_SIZE_OF_XLOG_LONG_PHD, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD, XLOG_SIZE_OF_XLOG_LONG_PHD, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD,
}; };
use std::ffi::OsStr;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::process::Command; use std::process::Command;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
@@ -27,6 +26,7 @@ macro_rules! xlog_utils_test {
postgres_ffi::for_all_postgres_versions! { xlog_utils_test } postgres_ffi::for_all_postgres_versions! { xlog_utils_test }
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Conf { pub struct Conf {
pub pg_version: u32, pub pg_version: u32,
pub pg_distrib_dir: PathBuf, pub pg_distrib_dir: PathBuf,
@@ -53,7 +53,7 @@ impl Conf {
#[allow(clippy::manual_range_patterns)] #[allow(clippy::manual_range_patterns)]
match self.pg_version { match self.pg_version {
14 | 15 | 16 | 17 => Ok(path.join(format!("v{}", self.pg_version))), 14 | 15 | 16 => Ok(path.join(format!("v{}", self.pg_version))),
_ => bail!("Unsupported postgres version: {}", self.pg_version), _ => bail!("Unsupported postgres version: {}", self.pg_version),
} }
} }
@@ -136,8 +136,8 @@ impl Conf {
pub fn pg_waldump( pub fn pg_waldump(
&self, &self,
first_segment_name: &OsStr, first_segment_name: &str,
last_segment_name: &OsStr, last_segment_name: &str,
) -> anyhow::Result<std::process::Output> { ) -> anyhow::Result<std::process::Output> {
let first_segment_file = self.datadir.join(first_segment_name); let first_segment_file = self.datadir.join(first_segment_name);
let last_segment_file = self.datadir.join(last_segment_name); let last_segment_file = self.datadir.join(last_segment_name);

View File

@@ -4,7 +4,6 @@ use super::*;
use crate::{error, info}; use crate::{error, info};
use regex::Regex; use regex::Regex;
use std::cmp::min; use std::cmp::min;
use std::ffi::OsStr;
use std::fs::{self, File}; use std::fs::{self, File};
use std::io::Write; use std::io::Write;
use std::{env, str::FromStr}; use std::{env, str::FromStr};
@@ -55,7 +54,7 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
.wal_dir() .wal_dir()
.read_dir() .read_dir()
.unwrap() .unwrap()
.map(|f| f.unwrap().file_name()) .map(|f| f.unwrap().file_name().into_string().unwrap())
.filter(|fname| IsXLogFileName(fname)) .filter(|fname| IsXLogFileName(fname))
.max() .max()
.unwrap(); .unwrap();
@@ -71,11 +70,11 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
start_lsn start_lsn
); );
for file in fs::read_dir(cfg.wal_dir()).unwrap().flatten() { for file in fs::read_dir(cfg.wal_dir()).unwrap().flatten() {
let fname = file.file_name(); let fname = file.file_name().into_string().unwrap();
if !IsXLogFileName(&fname) { if !IsXLogFileName(&fname) {
continue; continue;
} }
let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE).unwrap(); let (segno, _) = XLogFromFileName(&fname, WAL_SEGMENT_SIZE);
let seg_start_lsn = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE); let seg_start_lsn = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE);
if seg_start_lsn > u64::from(*start_lsn) { if seg_start_lsn > u64::from(*start_lsn) {
continue; continue;
@@ -94,10 +93,10 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
} }
} }
fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &OsStr) -> Lsn { fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &str) -> Lsn {
// Get the actual end of WAL by pg_waldump // Get the actual end of WAL by pg_waldump
let waldump_output = cfg let waldump_output = cfg
.pg_waldump(OsStr::new("000000010000000000000001"), last_segment) .pg_waldump("000000010000000000000001", last_segment)
.unwrap() .unwrap()
.stderr; .stderr;
let waldump_output = std::str::from_utf8(&waldump_output).unwrap(); let waldump_output = std::str::from_utf8(&waldump_output).unwrap();
@@ -118,7 +117,7 @@ fn find_pg_waldump_end_of_wal(cfg: &crate::Conf, last_segment: &OsStr) -> Lsn {
fn check_end_of_wal( fn check_end_of_wal(
cfg: &crate::Conf, cfg: &crate::Conf,
last_segment: &OsStr, last_segment: &str,
start_lsn: Lsn, start_lsn: Lsn,
expected_end_of_wal: Lsn, expected_end_of_wal: Lsn,
) { ) {
@@ -133,8 +132,7 @@ fn check_end_of_wal(
// Rename file to partial to actually find last valid lsn, then rename it back. // Rename file to partial to actually find last valid lsn, then rename it back.
fs::rename( fs::rename(
cfg.wal_dir().join(last_segment), cfg.wal_dir().join(last_segment),
cfg.wal_dir() cfg.wal_dir().join(format!("{}.partial", last_segment)),
.join(format!("{}.partial", last_segment.to_str().unwrap())),
) )
.unwrap(); .unwrap();
let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap(); let wal_end = find_end_of_wal(&cfg.wal_dir(), WAL_SEGMENT_SIZE, start_lsn).unwrap();
@@ -144,8 +142,7 @@ fn check_end_of_wal(
); );
assert_eq!(wal_end, expected_end_of_wal); assert_eq!(wal_end, expected_end_of_wal);
fs::rename( fs::rename(
cfg.wal_dir() cfg.wal_dir().join(format!("{}.partial", last_segment)),
.join(format!("{}.partial", last_segment.to_str().unwrap())),
cfg.wal_dir().join(last_segment), cfg.wal_dir().join(last_segment),
) )
.unwrap(); .unwrap();

View File

@@ -8,8 +8,10 @@ license.workspace = true
bytes.workspace = true bytes.workspace = true
byteorder.workspace = true byteorder.workspace = true
itertools.workspace = true itertools.workspace = true
pin-project-lite.workspace = true
postgres-protocol.workspace = true postgres-protocol.workspace = true
rand.workspace = true rand.workspace = true
tokio = { workspace = true, features = ["io-util"] } tokio = { workspace = true, features = ["io-util"] }
tracing.workspace = true
thiserror.workspace = true thiserror.workspace = true
serde.workspace = true serde.workspace = true

View File

@@ -13,11 +13,14 @@ aws-smithy-async.workspace = true
aws-smithy-types.workspace = true aws-smithy-types.workspace = true
aws-config.workspace = true aws-config.workspace = true
aws-sdk-s3.workspace = true aws-sdk-s3.workspace = true
aws-credential-types.workspace = true
bytes.workspace = true bytes.workspace = true
camino = { workspace = true, features = ["serde1"] } camino = { workspace = true, features = ["serde1"] }
humantime.workspace = true
humantime-serde.workspace = true humantime-serde.workspace = true
hyper = { workspace = true, features = ["stream"] } hyper = { workspace = true, features = ["stream"] }
futures.workspace = true futures.workspace = true
rand.workspace = true
serde.workspace = true serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
tokio = { workspace = true, features = ["sync", "fs", "io-util"] } tokio = { workspace = true, features = ["sync", "fs", "io-util"] }

View File

@@ -127,6 +127,10 @@ impl RemotePath {
&self.0 &self.0
} }
pub fn extension(&self) -> Option<&str> {
self.0.extension()
}
pub fn strip_prefix(&self, p: &RemotePath) -> Result<&Utf8Path, std::path::StripPrefixError> { pub fn strip_prefix(&self, p: &RemotePath) -> Result<&Utf8Path, std::path::StripPrefixError> {
self.0.strip_prefix(&p.0) self.0.strip_prefix(&p.0)
} }

View File

@@ -6,5 +6,6 @@ license.workspace = true
[dependencies] [dependencies]
serde.workspace = true serde.workspace = true
serde_with.workspace = true
const_format.workspace = true const_format.workspace = true
utils.workspace = true utils.workspace = true

View File

@@ -9,9 +9,8 @@ hyper.workspace = true
opentelemetry = { workspace = true, features=["rt-tokio"] } opentelemetry = { workspace = true, features=["rt-tokio"] }
opentelemetry-otlp = { workspace = true, default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] } opentelemetry-otlp = { workspace = true, default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
opentelemetry-semantic-conventions.workspace = true opentelemetry-semantic-conventions.workspace = true
reqwest = { workspace = true, default-features = false, features = ["rustls-tls"] }
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tracing.workspace = true tracing.workspace = true
tracing-opentelemetry.workspace = true tracing-opentelemetry.workspace = true
tracing-subscriber.workspace = true
[dev-dependencies]
tracing-subscriber.workspace = true # For examples in docs

View File

@@ -19,7 +19,6 @@ bincode.workspace = true
bytes.workspace = true bytes.workspace = true
camino.workspace = true camino.workspace = true
chrono.workspace = true chrono.workspace = true
git-version.workspace = true
hex = { workspace = true, features = ["serde"] } hex = { workspace = true, features = ["serde"] }
humantime.workspace = true humantime.workspace = true
hyper = { workspace = true, features = ["full"] } hyper = { workspace = true, features = ["full"] }
@@ -43,6 +42,7 @@ tracing.workspace = true
tracing-error.workspace = true tracing-error.workspace = true
tracing-subscriber = { workspace = true, features = ["json", "registry"] } tracing-subscriber = { workspace = true, features = ["json", "registry"] }
rand.workspace = true rand.workspace = true
serde_with.workspace = true
strum.workspace = true strum.workspace = true
strum_macros.workspace = true strum_macros.workspace = true
url.workspace = true url.workspace = true

33
libs/utils/src/accum.rs Normal file
View File

@@ -0,0 +1,33 @@
/// A helper to "accumulate" a value similar to `Iterator::reduce`, but lets you
/// feed the accumulated values by calling the 'accum' function, instead of having an
/// iterator.
///
/// For example, to calculate the smallest value among some integers:
///
/// ```
/// use utils::accum::Accum;
///
/// let values = [1, 2, 3];
///
/// let mut min_value: Accum<u32> = Accum(None);
/// for new_value in &values {
/// min_value.accum(std::cmp::min, *new_value);
/// }
///
/// assert_eq!(min_value.0.unwrap(), 1);
/// ```
pub struct Accum<T>(pub Option<T>);
impl<T: Copy> Accum<T> {
pub fn accum<F>(&mut self, func: F, new_value: T)
where
F: FnOnce(T, T) -> T,
{
// If there is no previous value, just store the new value.
// Otherwise call the function to decide which one to keep.
self.0 = Some(if let Some(accum) = self.0 {
func(accum, new_value)
} else {
new_value
});
}
}

View File

@@ -82,7 +82,7 @@ impl ApiError {
StatusCode::INTERNAL_SERVER_ERROR, StatusCode::INTERNAL_SERVER_ERROR,
), ),
ApiError::InternalServerError(err) => HttpErrorBody::response_from_msg_and_status( ApiError::InternalServerError(err) => HttpErrorBody::response_from_msg_and_status(
format!("{err:#}"), // use alternative formatting so that we give the cause without backtrace err.to_string(),
StatusCode::INTERNAL_SERVER_ERROR, StatusCode::INTERNAL_SERVER_ERROR,
), ),
} }

View File

@@ -88,6 +88,12 @@ impl<'de> Deserialize<'de> for Id {
} }
impl Id { impl Id {
pub fn get_from_buf(buf: &mut impl bytes::Buf) -> Id {
let mut arr = [0u8; 16];
buf.copy_to_slice(&mut arr);
Id::from(arr)
}
pub fn from_slice(src: &[u8]) -> Result<Id, IdError> { pub fn from_slice(src: &[u8]) -> Result<Id, IdError> {
if src.len() != 16 { if src.len() != 16 {
return Err(IdError::SliceParseError(src.len())); return Err(IdError::SliceParseError(src.len()));
@@ -173,6 +179,10 @@ impl fmt::Debug for Id {
macro_rules! id_newtype { macro_rules! id_newtype {
($t:ident) => { ($t:ident) => {
impl $t { impl $t {
pub fn get_from_buf(buf: &mut impl bytes::Buf) -> $t {
$t(Id::get_from_buf(buf))
}
pub fn from_slice(src: &[u8]) -> Result<$t, IdError> { pub fn from_slice(src: &[u8]) -> Result<$t, IdError> {
Ok($t(Id::from_slice(src)?)) Ok($t(Id::from_slice(src)?))
} }

View File

@@ -21,13 +21,7 @@
//! //!
//! Another explaination can be found here: <https://brandur.org/rate-limiting> //! Another explaination can be found here: <https://brandur.org/rate-limiting>
use std::{ use std::{sync::Mutex, time::Duration};
sync::{
atomic::{AtomicU64, Ordering},
Mutex,
},
time::Duration,
};
use tokio::{sync::Notify, time::Instant}; use tokio::{sync::Notify, time::Instant};
@@ -134,7 +128,6 @@ impl LeakyBucketState {
pub struct RateLimiter { pub struct RateLimiter {
pub config: LeakyBucketConfig, pub config: LeakyBucketConfig,
pub sleep_counter: AtomicU64,
pub state: Mutex<LeakyBucketState>, pub state: Mutex<LeakyBucketState>,
/// a queue to provide this fair ordering. /// a queue to provide this fair ordering.
pub queue: Notify, pub queue: Notify,
@@ -151,7 +144,6 @@ impl Drop for Requeue<'_> {
impl RateLimiter { impl RateLimiter {
pub fn with_initial_tokens(config: LeakyBucketConfig, initial_tokens: f64) -> Self { pub fn with_initial_tokens(config: LeakyBucketConfig, initial_tokens: f64) -> Self {
RateLimiter { RateLimiter {
sleep_counter: AtomicU64::new(0),
state: Mutex::new(LeakyBucketState::with_initial_tokens( state: Mutex::new(LeakyBucketState::with_initial_tokens(
&config, &config,
initial_tokens, initial_tokens,
@@ -171,16 +163,15 @@ impl RateLimiter {
/// returns true if we did throttle /// returns true if we did throttle
pub async fn acquire(&self, count: usize) -> bool { pub async fn acquire(&self, count: usize) -> bool {
let start = tokio::time::Instant::now(); let mut throttled = false;
let start_count = self.sleep_counter.load(Ordering::Acquire); let start = tokio::time::Instant::now();
let mut end_count = start_count;
// wait until we are the first in the queue // wait until we are the first in the queue
let mut notified = std::pin::pin!(self.queue.notified()); let mut notified = std::pin::pin!(self.queue.notified());
if !notified.as_mut().enable() { if !notified.as_mut().enable() {
throttled = true;
notified.await; notified.await;
end_count = self.sleep_counter.load(Ordering::Acquire);
} }
// notify the next waiter in the queue when we are done. // notify the next waiter in the queue when we are done.
@@ -193,22 +184,9 @@ impl RateLimiter {
.unwrap() .unwrap()
.add_tokens(&self.config, start, count as f64); .add_tokens(&self.config, start, count as f64);
match res { match res {
Ok(()) => return end_count > start_count, Ok(()) => return throttled,
Err(ready_at) => { Err(ready_at) => {
struct Increment<'a>(&'a AtomicU64); throttled = true;
impl Drop for Increment<'_> {
fn drop(&mut self) {
self.0.fetch_add(1, Ordering::AcqRel);
}
}
// increment the counter after we finish sleeping (or cancel this task).
// this ensures that tasks that have already started the acquire will observe
// the new sleep count when they are allowed to resume on the notify.
let _inc = Increment(&self.sleep_counter);
end_count += 1;
tokio::time::sleep_until(ready_at).await; tokio::time::sleep_until(ready_at).await;
} }
} }

View File

@@ -43,9 +43,16 @@ pub mod logging;
pub mod lock_file; pub mod lock_file;
pub mod pid_file; pub mod pid_file;
// Misc
pub mod accum;
pub mod shutdown;
// Utility for binding TcpListeners with proper socket options. // Utility for binding TcpListeners with proper socket options.
pub mod tcp_listener; pub mod tcp_listener;
// Utility for putting a raw file descriptor into non-blocking mode
pub mod nonblock;
// Default signal handling // Default signal handling
pub mod sentry_init; pub mod sentry_init;
pub mod signals; pub mod signals;
@@ -92,10 +99,6 @@ pub mod toml_edit_ext;
pub mod circuit_breaker; pub mod circuit_breaker;
// Re-export used in macro. Avoids adding git-version as dep in target crates.
#[doc(hidden)]
pub use git_version;
/// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
/// ///
/// we have several cases: /// we have several cases:
@@ -135,7 +138,7 @@ macro_rules! project_git_version {
($const_identifier:ident) => { ($const_identifier:ident) => {
// this should try GIT_VERSION first only then git_version::git_version! // this should try GIT_VERSION first only then git_version::git_version!
const $const_identifier: &::core::primitive::str = { const $const_identifier: &::core::primitive::str = {
const __COMMIT_FROM_GIT: &::core::primitive::str = $crate::git_version::git_version! { const __COMMIT_FROM_GIT: &::core::primitive::str = git_version::git_version! {
prefix = "", prefix = "",
fallback = "unknown", fallback = "unknown",
args = ["--abbrev=40", "--always", "--dirty=-modified"] // always use full sha args = ["--abbrev=40", "--always", "--dirty=-modified"] // always use full sha

View File

@@ -1,5 +1,6 @@
#![warn(missing_docs)] #![warn(missing_docs)]
use camino::Utf8Path;
use serde::{de::Visitor, Deserialize, Serialize}; use serde::{de::Visitor, Deserialize, Serialize};
use std::fmt; use std::fmt;
use std::ops::{Add, AddAssign}; use std::ops::{Add, AddAssign};
@@ -144,6 +145,14 @@ impl Lsn {
i128::from(self.0) - i128::from(other) i128::from(self.0) - i128::from(other)
} }
/// Parse an LSN from a filename in the form `0000000000000000`
pub fn from_filename<F>(filename: F) -> Result<Self, LsnParseError>
where
F: AsRef<Utf8Path>,
{
Lsn::from_hex(filename.as_ref().as_str())
}
/// Parse an LSN from a string in the form `0000000000000000` /// Parse an LSN from a string in the form `0000000000000000`
pub fn from_hex<S>(s: S) -> Result<Self, LsnParseError> pub fn from_hex<S>(s: S) -> Result<Self, LsnParseError>
where where

View File

@@ -0,0 +1,17 @@
use nix::fcntl::{fcntl, OFlag, F_GETFL, F_SETFL};
use std::os::unix::io::RawFd;
/// Put a file descriptor into non-blocking mode
pub fn set_nonblock(fd: RawFd) -> Result<(), std::io::Error> {
let bits = fcntl(fd, F_GETFL)?;
// If F_GETFL returns some unknown bits, they should be valid
// for passing back to F_SETFL, too. If we left them out, the F_SETFL
// would effectively clear them, which is not what we want.
let mut flags = OFlag::from_bits_retain(bits);
flags |= OFlag::O_NONBLOCK;
fcntl(fd, F_SETFL(flags))?;
Ok(())
}

View File

@@ -0,0 +1,7 @@
/// Immediately terminate the calling process without calling
/// atexit callbacks, C runtime destructors etc. We mainly use
/// this to protect coverage data from concurrent writes.
pub fn exit_now(code: u8) -> ! {
// SAFETY: exiting is safe, the ffi is not safe
unsafe { nix::libc::_exit(code as _) };
}

View File

@@ -120,6 +120,32 @@ impl<K: Ord, V> VecMap<K, V> {
Ok((None, delta_size)) Ok((None, delta_size))
} }
/// Split the map into two.
///
/// The left map contains everything before `cutoff` (exclusive).
/// Right map contains `cutoff` and everything after (inclusive).
pub fn split_at(&self, cutoff: &K) -> (Self, Self)
where
K: Clone,
V: Clone,
{
let split_idx = self
.data
.binary_search_by_key(&cutoff, extract_key)
.unwrap_or_else(std::convert::identity);
(
VecMap {
data: self.data[..split_idx].to_vec(),
ordering: self.ordering,
},
VecMap {
data: self.data[split_idx..].to_vec(),
ordering: self.ordering,
},
)
}
/// Move items from `other` to the end of `self`, leaving `other` empty. /// Move items from `other` to the end of `self`, leaving `other` empty.
/// If the `other` ordering is different from `self` ordering /// If the `other` ordering is different from `self` ordering
/// `ExtendOrderingError` error will be returned. /// `ExtendOrderingError` error will be returned.

View File

@@ -15,11 +15,13 @@ anyhow.workspace = true
axum.workspace = true axum.workspace = true
clap.workspace = true clap.workspace = true
futures.workspace = true futures.workspace = true
inotify.workspace = true
serde.workspace = true serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
sysinfo.workspace = true sysinfo.workspace = true
tokio = { workspace = true, features = ["rt-multi-thread"] } tokio = { workspace = true, features = ["rt-multi-thread"] }
tokio-postgres.workspace = true tokio-postgres.workspace = true
tokio-stream.workspace = true
tokio-util.workspace = true tokio-util.workspace = true
tracing.workspace = true tracing.workspace = true
tracing-subscriber.workspace = true tracing-subscriber.workspace = true

View File

@@ -5,8 +5,6 @@ use std::{env, path::PathBuf, process::Command};
use anyhow::{anyhow, Context}; use anyhow::{anyhow, Context};
const WALPROPOSER_PG_VERSION: &str = "v17";
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
// Tell cargo to invalidate the built crate whenever the wrapper changes // Tell cargo to invalidate the built crate whenever the wrapper changes
println!("cargo:rerun-if-changed=bindgen_deps.h"); println!("cargo:rerun-if-changed=bindgen_deps.h");
@@ -38,10 +36,7 @@ fn main() -> anyhow::Result<()> {
// Rebuild crate when libwalproposer.a changes // Rebuild crate when libwalproposer.a changes
println!("cargo:rerun-if-changed={walproposer_lib_search_str}/libwalproposer.a"); println!("cargo:rerun-if-changed={walproposer_lib_search_str}/libwalproposer.a");
let pg_config_bin = pg_install_abs let pg_config_bin = pg_install_abs.join("v16").join("bin").join("pg_config");
.join(WALPROPOSER_PG_VERSION)
.join("bin")
.join("pg_config");
let inc_server_path: String = if pg_config_bin.exists() { let inc_server_path: String = if pg_config_bin.exists() {
let output = Command::new(pg_config_bin) let output = Command::new(pg_config_bin)
.arg("--includedir-server") .arg("--includedir-server")
@@ -58,7 +53,7 @@ fn main() -> anyhow::Result<()> {
.into() .into()
} else { } else {
let server_path = pg_install_abs let server_path = pg_install_abs
.join(WALPROPOSER_PG_VERSION) .join("v16")
.join("include") .join("include")
.join("postgresql") .join("postgresql")
.join("server") .join("server")

View File

@@ -15,6 +15,7 @@ anyhow.workspace = true
arc-swap.workspace = true arc-swap.workspace = true
async-compression.workspace = true async-compression.workspace = true
async-stream.workspace = true async-stream.workspace = true
async-trait.workspace = true
bit_field.workspace = true bit_field.workspace = true
byteorder.workspace = true byteorder.workspace = true
bytes.workspace = true bytes.workspace = true
@@ -22,11 +23,15 @@ camino.workspace = true
camino-tempfile.workspace = true camino-tempfile.workspace = true
chrono = { workspace = true, features = ["serde"] } chrono = { workspace = true, features = ["serde"] }
clap = { workspace = true, features = ["string"] } clap = { workspace = true, features = ["string"] }
const_format.workspace = true
consumption_metrics.workspace = true consumption_metrics.workspace = true
crc32c.workspace = true crc32c.workspace = true
crossbeam-utils.workspace = true
either.workspace = true either.workspace = true
flate2.workspace = true
fail.workspace = true fail.workspace = true
futures.workspace = true futures.workspace = true
git-version.workspace = true
hex.workspace = true hex.workspace = true
humantime.workspace = true humantime.workspace = true
humantime-serde.workspace = true humantime-serde.workspace = true
@@ -52,6 +57,10 @@ serde.workspace = true
serde_json = { workspace = true, features = ["raw_value"] } serde_json = { workspace = true, features = ["raw_value"] }
serde_path_to_error.workspace = true serde_path_to_error.workspace = true
serde_with.workspace = true serde_with.workspace = true
signal-hook.workspace = true
smallvec = { workspace = true, features = ["write"] }
svg_fmt.workspace = true
sync_wrapper.workspace = true
sysinfo.workspace = true sysinfo.workspace = true
tokio-tar.workspace = true tokio-tar.workspace = true
thiserror.workspace = true thiserror.workspace = true
@@ -64,6 +73,7 @@ tokio-stream.workspace = true
tokio-util.workspace = true tokio-util.workspace = true
toml_edit = { workspace = true, features = [ "serde" ] } toml_edit = { workspace = true, features = [ "serde" ] }
tracing.workspace = true tracing.workspace = true
twox-hash.workspace = true
url.workspace = true url.workspace = true
walkdir.workspace = true walkdir.workspace = true
metrics.workspace = true metrics.workspace = true

View File

@@ -1,7 +1,7 @@
//! Quantify a single walredo manager's throughput under N concurrent callers. //! Quantify a single walredo manager's throughput under N concurrent callers.
//! //!
//! The benchmark implementation ([`bench_impl`]) is parametrized by //! The benchmark implementation ([`bench_impl`]) is parametrized by
//! - `redo_work` => an async closure that takes a `PostgresRedoManager` and performs one redo //! - `redo_work` => [`Request::short_request`] or [`Request::medium_request`]
//! - `n_redos` => number of times the benchmark shell execute the `redo_work` //! - `n_redos` => number of times the benchmark shell execute the `redo_work`
//! - `nclients` => number of clients (more on this shortly). //! - `nclients` => number of clients (more on this shortly).
//! //!
@@ -10,7 +10,7 @@
//! Each task executes the `redo_work` `n_redos/nclients` times. //! Each task executes the `redo_work` `n_redos/nclients` times.
//! //!
//! We exercise the following combinations: //! We exercise the following combinations:
//! - `redo_work = ping / short / medium`` //! - `redo_work = short / medium``
//! - `nclients = [1, 2, 4, 8, 16, 32, 64, 128]` //! - `nclients = [1, 2, 4, 8, 16, 32, 64, 128]`
//! //!
//! We let `criterion` determine the `n_redos` using `iter_custom`. //! We let `criterion` determine the `n_redos` using `iter_custom`.
@@ -27,43 +27,33 @@
//! //!
//! # Reference Numbers //! # Reference Numbers
//! //!
//! 2024-09-18 on im4gn.2xlarge //! 2024-04-15 on i3en.3xlarge
//! //!
//! ```text //! ```text
//! ping/1 time: [21.789 µs 21.918 µs 22.078 µs] //! short/1 time: [24.584 µs 24.737 µs 24.922 µs]
//! ping/2 time: [27.686 µs 27.812 µs 27.970 µs] //! short/2 time: [33.479 µs 33.660 µs 33.888 µs]
//! ping/4 time: [35.468 µs 35.671 µs 35.926 µs] //! short/4 time: [42.713 µs 43.046 µs 43.440 µs]
//! ping/8 time: [59.682 µs 59.987 µs 60.363 µs] //! short/8 time: [71.814 µs 72.478 µs 73.240 µs]
//! ping/16 time: [101.79 µs 102.37 µs 103.08 µs] //! short/16 time: [132.73 µs 134.45 µs 136.22 µs]
//! ping/32 time: [184.18 µs 185.15 µs 186.36 µs] //! short/32 time: [258.31 µs 260.73 µs 263.27 µs]
//! ping/64 time: [349.86 µs 351.45 µs 353.47 µs] //! short/64 time: [511.61 µs 514.44 µs 517.51 µs]
//! ping/128 time: [684.53 µs 687.98 µs 692.17 µs] //! short/128 time: [992.64 µs 998.23 µs 1.0042 ms]
//! short/1 time: [31.833 µs 32.126 µs 32.428 µs] //! medium/1 time: [110.11 µs 110.50 µs 110.96 µs]
//! short/2 time: [35.558 µs 35.756 µs 35.992 µs] //! medium/2 time: [153.06 µs 153.85 µs 154.99 µs]
//! short/4 time: [44.850 µs 45.138 µs 45.484 µs] //! medium/4 time: [317.51 µs 319.92 µs 322.85 µs]
//! short/8 time: [65.985 µs 66.379 µs 66.853 µs] //! medium/8 time: [638.30 µs 644.68 µs 652.12 µs]
//! short/16 time: [127.06 µs 127.90 µs 128.87 µs] //! medium/16 time: [1.2651 ms 1.2773 ms 1.2914 ms]
//! short/32 time: [252.98 µs 254.70 µs 256.73 µs] //! medium/32 time: [2.5117 ms 2.5410 ms 2.5720 ms]
//! short/64 time: [497.13 µs 499.86 µs 503.26 µs] //! medium/64 time: [4.8088 ms 4.8555 ms 4.9047 ms]
//! short/128 time: [987.46 µs 993.45 µs 1.0004 ms] //! medium/128 time: [8.8311 ms 8.9849 ms 9.1263 ms]
//! medium/1 time: [137.91 µs 138.55 µs 139.35 µs]
//! medium/2 time: [192.00 µs 192.91 µs 194.07 µs]
//! medium/4 time: [389.62 µs 391.55 µs 394.01 µs]
//! medium/8 time: [776.80 µs 780.33 µs 784.77 µs]
//! medium/16 time: [1.5323 ms 1.5383 ms 1.5459 ms]
//! medium/32 time: [3.0120 ms 3.0226 ms 3.0350 ms]
//! medium/64 time: [5.7405 ms 5.7787 ms 5.8166 ms]
//! medium/128 time: [10.412 ms 10.574 ms 10.718 ms]
//! ``` //! ```
use anyhow::Context; use anyhow::Context;
use bytes::{Buf, Bytes}; use bytes::{Buf, Bytes};
use criterion::{BenchmarkId, Criterion}; use criterion::{BenchmarkId, Criterion};
use once_cell::sync::Lazy;
use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager}; use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager};
use pageserver_api::{key::Key, shard::TenantShardId}; use pageserver_api::{key::Key, shard::TenantShardId};
use std::{ use std::{
future::Future,
sync::Arc, sync::Arc,
time::{Duration, Instant}, time::{Duration, Instant},
}; };
@@ -71,59 +61,40 @@ use tokio::{sync::Barrier, task::JoinSet};
use utils::{id::TenantId, lsn::Lsn}; use utils::{id::TenantId, lsn::Lsn};
fn bench(c: &mut Criterion) { fn bench(c: &mut Criterion) {
macro_rules! bench_group { {
($name:expr, $redo_work:expr) => {{ let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
let name: &str = $name; for nclients in nclients {
let nclients = [1, 2, 4, 8, 16, 32, 64, 128]; let mut group = c.benchmark_group("short");
for nclients in nclients { group.bench_with_input(
let mut group = c.benchmark_group(name); BenchmarkId::from_parameter(nclients),
group.bench_with_input( &nclients,
BenchmarkId::from_parameter(nclients), |b, nclients| {
&nclients, let redo_work = Arc::new(Request::short_input());
|b, nclients| { b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
b.iter_custom(|iters| bench_impl($redo_work, iters, *nclients)); },
}, );
); }
} }
}}; {
let nclients = [1, 2, 4, 8, 16, 32, 64, 128];
for nclients in nclients {
let mut group = c.benchmark_group("medium");
group.bench_with_input(
BenchmarkId::from_parameter(nclients),
&nclients,
|b, nclients| {
let redo_work = Arc::new(Request::medium_input());
b.iter_custom(|iters| bench_impl(Arc::clone(&redo_work), iters, *nclients));
},
);
}
} }
//
// benchmark the protocol implementation
//
let pg_version = 14;
bench_group!(
"ping",
Arc::new(move |mgr: Arc<PostgresRedoManager>| async move {
let _: () = mgr.ping(pg_version).await.unwrap();
})
);
//
// benchmarks with actual record redo
//
let make_redo_work = |req: &'static Request| {
Arc::new(move |mgr: Arc<PostgresRedoManager>| async move {
let page = req.execute(&mgr).await.unwrap();
assert_eq!(page.remaining(), 8192);
})
};
bench_group!("short", {
static REQUEST: Lazy<Request> = Lazy::new(Request::short_input);
make_redo_work(&REQUEST)
});
bench_group!("medium", {
static REQUEST: Lazy<Request> = Lazy::new(Request::medium_input);
make_redo_work(&REQUEST)
});
} }
criterion::criterion_group!(benches, bench); criterion::criterion_group!(benches, bench);
criterion::criterion_main!(benches); criterion::criterion_main!(benches);
// Returns the sum of each client's wall-clock time spent executing their share of the n_redos. // Returns the sum of each client's wall-clock time spent executing their share of the n_redos.
fn bench_impl<F, Fut>(redo_work: Arc<F>, n_redos: u64, nclients: u64) -> Duration fn bench_impl(redo_work: Arc<Request>, n_redos: u64, nclients: u64) -> Duration {
where
F: Fn(Arc<PostgresRedoManager>) -> Fut + Send + Sync + 'static,
Fut: Future<Output = ()> + Send + 'static,
{
let repo_dir = camino_tempfile::tempdir_in(env!("CARGO_TARGET_TMPDIR")).unwrap(); let repo_dir = camino_tempfile::tempdir_in(env!("CARGO_TARGET_TMPDIR")).unwrap();
let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf()); let conf = PageServerConf::dummy_conf(repo_dir.path().to_path_buf());
@@ -164,20 +135,17 @@ where
}) })
} }
async fn client<F, Fut>( async fn client(
mgr: Arc<PostgresRedoManager>, mgr: Arc<PostgresRedoManager>,
start: Arc<Barrier>, start: Arc<Barrier>,
redo_work: Arc<F>, redo_work: Arc<Request>,
n_redos: u64, n_redos: u64,
) -> Duration ) -> Duration {
where
F: Fn(Arc<PostgresRedoManager>) -> Fut + Send + Sync + 'static,
Fut: Future<Output = ()> + Send + 'static,
{
start.wait().await; start.wait().await;
let start = Instant::now(); let start = Instant::now();
for _ in 0..n_redos { for _ in 0..n_redos {
redo_work(Arc::clone(&mgr)).await; let page = redo_work.execute(&mgr).await.unwrap();
assert_eq!(page.remaining(), 8192);
// The real pageserver will rarely if ever do 2 walredos in a row without // The real pageserver will rarely if ever do 2 walredos in a row without
// yielding to the executor. // yielding to the executor.
tokio::task::yield_now().await; tokio::task::yield_now().await;

View File

@@ -432,7 +432,7 @@ impl Client {
self.mgmt_api_endpoint self.mgmt_api_endpoint
); );
self.request(Method::PUT, &uri, req) self.request(Method::POST, &uri, req)
.await? .await?
.json() .json()
.await .await

View File

@@ -9,18 +9,41 @@ default = []
[dependencies] [dependencies]
anyhow.workspace = true anyhow.workspace = true
async-compression.workspace = true
async-stream.workspace = true async-stream.workspace = true
byteorder.workspace = true
bytes.workspace = true
chrono = { workspace = true, features = ["serde"] }
clap = { workspace = true, features = ["string"] } clap = { workspace = true, features = ["string"] }
const_format.workspace = true
consumption_metrics.workspace = true
crossbeam-utils.workspace = true
either.workspace = true
flate2.workspace = true
fail.workspace = true
futures.workspace = true futures.workspace = true
git-version.workspace = true
hex.workspace = true
humantime.workspace = true
humantime-serde.workspace = true
itertools.workspace = true itertools.workspace = true
once_cell.workspace = true once_cell.workspace = true
pageserver_api.workspace = true pageserver_api.workspace = true
pin-project-lite.workspace = true pin-project-lite.workspace = true
rand.workspace = true rand.workspace = true
smallvec = { workspace = true, features = ["write"] }
svg_fmt.workspace = true svg_fmt.workspace = true
sync_wrapper.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] } tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
tokio-io-timeout.workspace = true
tokio-util.workspace = true
tracing.workspace = true tracing.workspace = true
tracing-error.workspace = true
tracing-subscriber.workspace = true tracing-subscriber.workspace = true
url.workspace = true
walkdir.workspace = true
metrics.workspace = true
utils.workspace = true utils.workspace = true
workspace_hack.workspace = true workspace_hack.workspace = true

View File

@@ -8,8 +8,10 @@ license.workspace = true
[dependencies] [dependencies]
anyhow.workspace = true anyhow.workspace = true
bytes.workspace = true
camino.workspace = true camino.workspace = true
clap = { workspace = true, features = ["string"] } clap = { workspace = true, features = ["string"] }
git-version.workspace = true
humantime.workspace = true humantime.workspace = true
pageserver = { path = ".." } pageserver = { path = ".." }
pageserver_api.workspace = true pageserver_api.workspace = true
@@ -22,4 +24,5 @@ toml_edit.workspace = true
utils.workspace = true utils.workspace = true
svg_fmt.workspace = true svg_fmt.workspace = true
workspace_hack.workspace = true workspace_hack.workspace = true
serde.workspace = true
serde_json.workspace = true serde_json.workspace = true

View File

@@ -79,24 +79,16 @@ pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
return None; return None;
} }
let keys: Vec<&str> = split[0].split('-').collect(); let keys: Vec<&str> = split[0].split('-').collect();
let lsn_and_opt_generation: Vec<&str> = split[1].split('v').collect(); let mut lsns: Vec<&str> = split[1].split('-').collect();
let lsns: Vec<&str> = lsn_and_opt_generation[0].split('-').collect(); let is_delta = if lsns.len() == 1 {
let the_lsns: [&str; 2]; lsns.push(lsns[0]);
/*
* Generations add a -vX-XXXXXX postfix, which causes issues when we try to
* parse 'vX' as an LSN.
*/
let is_delta = if lsns.len() == 1 || lsns[1].is_empty() {
the_lsns = [lsns[0], lsns[0]];
false false
} else { } else {
the_lsns = [lsns[0], lsns[1]];
true true
}; };
let key_range = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap(); let key_range = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
let lsn_range = Lsn::from_hex(the_lsns[0]).unwrap()..Lsn::from_hex(the_lsns[1]).unwrap(); let lsn_range = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap();
let holes = Vec::new(); let holes = Vec::new();
Some(LayerFile { Some(LayerFile {
key_range, key_range,

View File

@@ -30,8 +30,9 @@ use pageserver_api::reltag::{RelTag, SlruKind};
use postgres_ffi::dispatch_pgversion; use postgres_ffi::dispatch_pgversion;
use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID}; use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PG_HBA}; use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM}; use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
use postgres_ffi::TransactionId;
use postgres_ffi::XLogFileName; use postgres_ffi::XLogFileName;
use postgres_ffi::PG_TLI; use postgres_ffi::PG_TLI;
use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE}; use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
@@ -254,11 +255,8 @@ where
let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup; let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;
let pgversion = self.timeline.pg_version;
let subdirs = dispatch_pgversion!(pgversion, &pgv::bindings::PGDATA_SUBDIRS[..]);
// Create pgdata subdirs structure // Create pgdata subdirs structure
for dir in subdirs.iter() { for dir in PGDATA_SUBDIRS.iter() {
let header = new_tar_header_dir(dir)?; let header = new_tar_header_dir(dir)?;
self.ar self.ar
.append(&header, &mut io::empty()) .append(&header, &mut io::empty())
@@ -608,7 +606,7 @@ where
// //
// Extract twophase state files // Extract twophase state files
// //
async fn add_twophase_file(&mut self, xid: u64) -> Result<(), BasebackupError> { async fn add_twophase_file(&mut self, xid: TransactionId) -> Result<(), BasebackupError> {
let img = self let img = self
.timeline .timeline
.get_twophase_file(xid, self.lsn, self.ctx) .get_twophase_file(xid, self.lsn, self.ctx)
@@ -619,11 +617,7 @@ where
buf.extend_from_slice(&img[..]); buf.extend_from_slice(&img[..]);
let crc = crc32c::crc32c(&img[..]); let crc = crc32c::crc32c(&img[..]);
buf.put_u32_le(crc); buf.put_u32_le(crc);
let path = if self.timeline.pg_version < 17 { let path = format!("pg_twophase/{:>08X}", xid);
format!("pg_twophase/{:>08X}", xid)
} else {
format!("pg_twophase/{:>016X}", xid)
};
let header = new_tar_header(&path, buf.len() as u64)?; let header = new_tar_header(&path, buf.len() as u64)?;
self.ar self.ar
.append(&header, &buf[..]) .append(&header, &buf[..])

View File

@@ -13,6 +13,7 @@ use pageserver_api::{
use remote_storage::{RemotePath, RemoteStorageConfig}; use remote_storage::{RemotePath, RemoteStorageConfig};
use std::env; use std::env;
use storage_broker::Uri; use storage_broker::Uri;
use utils::crashsafe::path_with_suffix_extension;
use utils::logging::SecretString; use utils::logging::SecretString;
use once_cell::sync::OnceCell; use once_cell::sync::OnceCell;
@@ -32,7 +33,7 @@ use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}; use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
use crate::virtual_file; use crate::virtual_file;
use crate::virtual_file::io_engine; use crate::virtual_file::io_engine;
use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME}; use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, TIMELINE_DELETE_MARK_SUFFIX};
/// Global state of pageserver. /// Global state of pageserver.
/// ///
@@ -256,6 +257,17 @@ impl PageServerConf {
.join(timeline_id.to_string()) .join(timeline_id.to_string())
} }
pub(crate) fn timeline_delete_mark_file_path(
&self,
tenant_shard_id: TenantShardId,
timeline_id: TimelineId,
) -> Utf8PathBuf {
path_with_suffix_extension(
self.timeline_path(&tenant_shard_id, &timeline_id),
TIMELINE_DELETE_MARK_SUFFIX,
)
}
/// Turns storage remote path of a file into its local path. /// Turns storage remote path of a file into its local path.
pub fn local_path(&self, remote_path: &RemotePath) -> Utf8PathBuf { pub fn local_path(&self, remote_path: &RemotePath) -> Utf8PathBuf {
remote_path.with_base(&self.workdir) remote_path.with_base(&self.workdir)
@@ -269,7 +281,7 @@ impl PageServerConf {
#[allow(clippy::manual_range_patterns)] #[allow(clippy::manual_range_patterns)]
match pg_version { match pg_version {
14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))), 14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
_ => bail!("Unsupported postgres version: {}", pg_version), _ => bail!("Unsupported postgres version: {}", pg_version),
} }
} }
@@ -324,6 +336,7 @@ impl PageServerConf {
max_vectored_read_bytes, max_vectored_read_bytes,
image_compression, image_compression,
ephemeral_bytes_per_memory_kb, ephemeral_bytes_per_memory_kb,
compact_level0_phase1_value_access: _,
l0_flush, l0_flush,
virtual_file_direct_io, virtual_file_direct_io,
concurrent_tenant_warmup, concurrent_tenant_warmup,
@@ -478,6 +491,11 @@ pub struct ConfigurableSemaphore {
} }
impl ConfigurableSemaphore { impl ConfigurableSemaphore {
pub const DEFAULT_INITIAL: NonZeroUsize = match NonZeroUsize::new(1) {
Some(x) => x,
None => panic!("const unwrap is not yet stable"),
};
/// Initializse using a non-zero amount of permits. /// Initializse using a non-zero amount of permits.
/// ///
/// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a /// Require a non-zero initial permits, because using permits == 0 is a crude way to disable a
@@ -498,6 +516,12 @@ impl ConfigurableSemaphore {
} }
} }
impl Default for ConfigurableSemaphore {
fn default() -> Self {
Self::new(Self::DEFAULT_INITIAL)
}
}
impl PartialEq for ConfigurableSemaphore { impl PartialEq for ConfigurableSemaphore {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
// the number of permits can be increased at runtime, so we cannot really fulfill the // the number of permits can be increased at runtime, so we cannot really fulfill the
@@ -534,6 +558,16 @@ mod tests {
.expect("parse_and_validate"); .expect("parse_and_validate");
} }
#[test]
fn test_compactl0_phase1_access_mode_is_ignored_silently() {
let input = indoc::indoc! {r#"
[compact_level0_phase1_value_access]
mode = "streaming-kmerge"
validate = "key-lsn-value"
"#};
toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input).unwrap();
}
/// If there's a typo in the pageserver config, we'd rather catch that typo /// If there's a typo in the pageserver config, we'd rather catch that typo
/// and fail pageserver startup than silently ignoring the typo, leaving whoever /// and fail pageserver startup than silently ignoring the typo, leaving whoever
/// made it in the believe that their config change is effective. /// made it in the believe that their config change is effective.

View File

@@ -178,7 +178,7 @@ async fn collect_metrics(
) )
.await; .await;
if let Err(e) = res { if let Err(e) = res {
tracing::error!("failed to upload to remote storage: {e:#}"); tracing::error!("failed to upload to S3: {e:#}");
} }
} }
}; };

View File

@@ -2,7 +2,7 @@ use std::collections::HashMap;
use futures::Future; use futures::Future;
use pageserver_api::{ use pageserver_api::{
controller_api::{AvailabilityZone, NodeRegisterRequest}, controller_api::NodeRegisterRequest,
shard::TenantShardId, shard::TenantShardId,
upcall_api::{ upcall_api::{
ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest, ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest,
@@ -148,10 +148,10 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
.and_then(|jv| jv.as_str().map(|str| str.to_owned())); .and_then(|jv| jv.as_str().map(|str| str.to_owned()));
match az_id_from_metadata { match az_id_from_metadata {
Some(az_id) => Some(AvailabilityZone(az_id)), Some(az_id) => Some(az_id),
None => { None => {
tracing::warn!("metadata.json does not contain an 'availability_zone_id' field"); tracing::warn!("metadata.json does not contain an 'availability_zone_id' field");
conf.availability_zone.clone().map(AvailabilityZone) conf.availability_zone.clone()
} }
} }
}; };

View File

@@ -2955,7 +2955,7 @@ pub fn make_router(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive", "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive",
|r| api_handler(r, timeline_preserve_initdb_handler), |r| api_handler(r, timeline_preserve_initdb_handler),
) )
.put( .post(
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/archival_config", "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/archival_config",
|r| api_handler(r, timeline_archival_config_handler), |r| api_handler(r, timeline_archival_config_handler),
) )

View File

@@ -580,11 +580,9 @@ async fn import_file(
import_slru(modification, slru, file_path, reader, len, ctx).await?; import_slru(modification, slru, file_path, reader, len, ctx).await?;
debug!("imported multixact members slru"); debug!("imported multixact members slru");
} else if file_path.starts_with("pg_twophase") { } else if file_path.starts_with("pg_twophase") {
let bytes = read_all_bytes(reader).await?; let xid = u32::from_str_radix(file_name.as_ref(), 16)?;
// In PostgreSQL v17, this is a 64-bit FullTransactionid. In previous versions, let bytes = read_all_bytes(reader).await?;
// it's a 32-bit TransactionId, which fits in u64 anyway.
let xid = u64::from_str_radix(file_name.as_ref(), 16)?;
modification modification
.put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]), ctx) .put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]), ctx)
.await?; .await?;

View File

@@ -1177,10 +1177,10 @@ pub(crate) mod virtual_file_io_engine {
} }
struct GlobalAndPerTimelineHistogramTimer<'a, 'c> { struct GlobalAndPerTimelineHistogramTimer<'a, 'c> {
global_latency_histo: &'a Histogram, global_metric: &'a Histogram,
// Optional because not all op types are tracked per-timeline // Optional because not all op types are tracked per-timeline
per_timeline_latency_histo: Option<&'a Histogram>, timeline_metric: Option<&'a Histogram>,
ctx: &'c RequestContext, ctx: &'c RequestContext,
start: std::time::Instant, start: std::time::Instant,
@@ -1212,10 +1212,9 @@ impl<'a, 'c> Drop for GlobalAndPerTimelineHistogramTimer<'a, 'c> {
elapsed elapsed
} }
}; };
self.global_latency_histo self.global_metric.observe(ex_throttled.as_secs_f64());
.observe(ex_throttled.as_secs_f64()); if let Some(timeline_metric) = self.timeline_metric {
if let Some(per_timeline_getpage_histo) = self.per_timeline_latency_histo { timeline_metric.observe(ex_throttled.as_secs_f64());
per_timeline_getpage_histo.observe(ex_throttled.as_secs_f64());
} }
} }
} }
@@ -1241,32 +1240,10 @@ pub enum SmgrQueryType {
#[derive(Debug)] #[derive(Debug)]
pub(crate) struct SmgrQueryTimePerTimeline { pub(crate) struct SmgrQueryTimePerTimeline {
global_started: [IntCounter; SmgrQueryType::COUNT], global_metrics: [Histogram; SmgrQueryType::COUNT],
global_latency: [Histogram; SmgrQueryType::COUNT], per_timeline_getpage: Histogram,
per_timeline_getpage_started: IntCounter,
per_timeline_getpage_latency: Histogram,
} }
static SMGR_QUERY_STARTED_GLOBAL: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
// it's a counter, but, name is prepared to extend it to a histogram of queue depth
"pageserver_smgr_query_started_global_count",
"Number of smgr queries started, aggregated by query type.",
&["smgr_query_type"],
)
.expect("failed to define a metric")
});
static SMGR_QUERY_STARTED_PER_TENANT_TIMELINE: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
// it's a counter, but, name is prepared to extend it to a histogram of queue depth
"pageserver_smgr_query_started_count",
"Number of smgr queries started, aggregated by query type and tenant/timeline.",
&["smgr_query_type", "tenant_id", "shard_id", "timeline_id"],
)
.expect("failed to define a metric")
});
static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| { static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
register_histogram_vec!( register_histogram_vec!(
"pageserver_smgr_query_seconds", "pageserver_smgr_query_seconds",
@@ -1342,20 +1319,14 @@ impl SmgrQueryTimePerTimeline {
let tenant_id = tenant_shard_id.tenant_id.to_string(); let tenant_id = tenant_shard_id.tenant_id.to_string();
let shard_slug = format!("{}", tenant_shard_id.shard_slug()); let shard_slug = format!("{}", tenant_shard_id.shard_slug());
let timeline_id = timeline_id.to_string(); let timeline_id = timeline_id.to_string();
let global_started = std::array::from_fn(|i| { let global_metrics = std::array::from_fn(|i| {
let op = SmgrQueryType::from_repr(i).unwrap();
SMGR_QUERY_STARTED_GLOBAL
.get_metric_with_label_values(&[op.into()])
.unwrap()
});
let global_latency = std::array::from_fn(|i| {
let op = SmgrQueryType::from_repr(i).unwrap(); let op = SmgrQueryType::from_repr(i).unwrap();
SMGR_QUERY_TIME_GLOBAL SMGR_QUERY_TIME_GLOBAL
.get_metric_with_label_values(&[op.into()]) .get_metric_with_label_values(&[op.into()])
.unwrap() .unwrap()
}); });
let per_timeline_getpage_started = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE let per_timeline_getpage = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
.get_metric_with_label_values(&[ .get_metric_with_label_values(&[
SmgrQueryType::GetPageAtLsn.into(), SmgrQueryType::GetPageAtLsn.into(),
&tenant_id, &tenant_id,
@@ -1363,32 +1334,18 @@ impl SmgrQueryTimePerTimeline {
&timeline_id, &timeline_id,
]) ])
.unwrap(); .unwrap();
let per_timeline_getpage_latency = SMGR_QUERY_TIME_PER_TENANT_TIMELINE
.get_metric_with_label_values(&[
SmgrQueryType::GetPageAtLsn.into(),
&tenant_id,
&shard_slug,
&timeline_id,
])
.unwrap();
Self { Self {
global_started, global_metrics,
global_latency, per_timeline_getpage,
per_timeline_getpage_latency,
per_timeline_getpage_started,
} }
} }
pub(crate) fn start_timer<'c: 'a, 'a>( pub(crate) fn start_timer<'c: 'a, 'a>(
&'a self, &'a self,
op: SmgrQueryType, op: SmgrQueryType,
ctx: &'c RequestContext, ctx: &'c RequestContext,
) -> Option<impl Drop + 'a> { ) -> Option<impl Drop + '_> {
let global_metric = &self.global_metrics[op as usize];
let start = Instant::now(); let start = Instant::now();
self.global_started[op as usize].inc();
// We subtract time spent throttled from the observed latency.
match ctx.micros_spent_throttled.open() { match ctx.micros_spent_throttled.open() {
Ok(()) => (), Ok(()) => (),
Err(error) => { Err(error) => {
@@ -1407,16 +1364,15 @@ impl SmgrQueryTimePerTimeline {
} }
} }
let per_timeline_latency_histo = if matches!(op, SmgrQueryType::GetPageAtLsn) { let timeline_metric = if matches!(op, SmgrQueryType::GetPageAtLsn) {
self.per_timeline_getpage_started.inc(); Some(&self.per_timeline_getpage)
Some(&self.per_timeline_getpage_latency)
} else { } else {
None None
}; };
Some(GlobalAndPerTimelineHistogramTimer { Some(GlobalAndPerTimelineHistogramTimer {
global_latency_histo: &self.global_latency[op as usize], global_metric,
per_timeline_latency_histo, timeline_metric,
ctx, ctx,
start, start,
op, op,
@@ -1467,12 +1423,9 @@ mod smgr_query_time_tests {
let get_counts = || { let get_counts = || {
let global: u64 = ops let global: u64 = ops
.iter() .iter()
.map(|op| metrics.global_latency[*op as usize].get_sample_count()) .map(|op| metrics.global_metrics[*op as usize].get_sample_count())
.sum(); .sum();
( (global, metrics.per_timeline_getpage.get_sample_count())
global,
metrics.per_timeline_getpage_latency.get_sample_count(),
)
}; };
let (pre_global, pre_per_tenant_timeline) = get_counts(); let (pre_global, pre_per_tenant_timeline) = get_counts();
@@ -1534,7 +1487,7 @@ impl BasebackupQueryTime {
pub(crate) fn start_recording<'c: 'a, 'a>( pub(crate) fn start_recording<'c: 'a, 'a>(
&'a self, &'a self,
ctx: &'c RequestContext, ctx: &'c RequestContext,
) -> BasebackupQueryTimeOngoingRecording<'a, 'a> { ) -> BasebackupQueryTimeOngoingRecording<'_, '_> {
let start = Instant::now(); let start = Instant::now();
match ctx.micros_spent_throttled.open() { match ctx.micros_spent_throttled.open() {
Ok(()) => (), Ok(()) => (),
@@ -1824,7 +1777,7 @@ pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
.expect("failed to define a metric"), .expect("failed to define a metric"),
upload_heatmap_duration: register_histogram!( upload_heatmap_duration: register_histogram!(
"pageserver_secondary_upload_heatmap_duration", "pageserver_secondary_upload_heatmap_duration",
"Time to build and upload a heatmap, including any waiting inside the remote storage client" "Time to build and upload a heatmap, including any waiting inside the S3 client"
) )
.expect("failed to define a metric"), .expect("failed to define a metric"),
download_heatmap: register_int_counter!( download_heatmap: register_int_counter!(
@@ -2623,12 +2576,6 @@ impl TimelineMetrics {
let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]); let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]);
} }
let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[
SmgrQueryType::GetPageAtLsn.into(),
tenant_id,
shard_id,
timeline_id,
]);
let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[ let _ = SMGR_QUERY_TIME_PER_TENANT_TIMELINE.remove_label_values(&[
SmgrQueryType::GetPageAtLsn.into(), SmgrQueryType::GetPageAtLsn.into(),
tenant_id, tenant_id,
@@ -2645,8 +2592,6 @@ pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]); let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
} }
tenant_throttling::remove_tenant_metrics(tenant_shard_id);
// we leave the BROKEN_TENANTS_SET entry if any // we leave the BROKEN_TENANTS_SET entry if any
} }
@@ -3110,173 +3055,41 @@ pub mod tokio_epoll_uring {
pub(crate) mod tenant_throttling { pub(crate) mod tenant_throttling {
use metrics::{register_int_counter_vec, IntCounter}; use metrics::{register_int_counter_vec, IntCounter};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use utils::shard::TenantShardId;
use crate::tenant::{self, throttle::Metric}; use crate::tenant::{self, throttle::Metric};
struct GlobalAndPerTenantIntCounter {
global: IntCounter,
per_tenant: IntCounter,
}
impl GlobalAndPerTenantIntCounter {
#[inline(always)]
pub(crate) fn inc(&self) {
self.inc_by(1)
}
#[inline(always)]
pub(crate) fn inc_by(&self, n: u64) {
self.global.inc_by(n);
self.per_tenant.inc_by(n);
}
}
pub(crate) struct TimelineGet { pub(crate) struct TimelineGet {
count_accounted_start: GlobalAndPerTenantIntCounter, wait_time: IntCounter,
count_accounted_finish: GlobalAndPerTenantIntCounter, count: IntCounter,
wait_time: GlobalAndPerTenantIntCounter,
count_throttled: GlobalAndPerTenantIntCounter,
} }
static COUNT_ACCOUNTED_START: Lazy<metrics::IntCounterVec> = Lazy::new(|| { pub(crate) static TIMELINE_GET: Lazy<TimelineGet> = Lazy::new(|| {
register_int_counter_vec!( static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
"pageserver_tenant_throttling_count_accounted_start_global", register_int_counter_vec!(
"Count of tenant throttling starts, by kind of throttle.",
&["kind"]
)
.unwrap()
});
static COUNT_ACCOUNTED_START_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"pageserver_tenant_throttling_count_accounted_start",
"Count of tenant throttling starts, by kind of throttle.",
&["kind", "tenant_id", "shard_id"]
)
.unwrap()
});
static COUNT_ACCOUNTED_FINISH: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"pageserver_tenant_throttling_count_accounted_finish_global",
"Count of tenant throttling finishes, by kind of throttle.",
&["kind"]
)
.unwrap()
});
static COUNT_ACCOUNTED_FINISH_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"pageserver_tenant_throttling_count_accounted_finish",
"Count of tenant throttling finishes, by kind of throttle.",
&["kind", "tenant_id", "shard_id"]
)
.unwrap()
});
static WAIT_USECS: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"pageserver_tenant_throttling_wait_usecs_sum_global", "pageserver_tenant_throttling_wait_usecs_sum_global",
"Sum of microseconds that spent waiting throttle by kind of throttle.", "Sum of microseconds that tenants spent waiting for a tenant throttle of a given kind.",
&["kind"] &["kind"]
) )
.unwrap() .unwrap()
}); });
static WAIT_USECS_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!( static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
"pageserver_tenant_throttling_wait_usecs_sum", register_int_counter_vec!(
"Sum of microseconds that spent waiting throttle by kind of throttle.", "pageserver_tenant_throttling_count_global",
&["kind", "tenant_id", "shard_id"] "Count of tenant throttlings, by kind of throttle.",
) &["kind"]
.unwrap() )
.unwrap()
});
let kind = "timeline_get";
TimelineGet {
wait_time: WAIT_USECS.with_label_values(&[kind]),
count: WAIT_COUNT.with_label_values(&[kind]),
}
}); });
static WAIT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| { impl Metric for &'static TimelineGet {
register_int_counter_vec!(
"pageserver_tenant_throttling_count_global",
"Count of tenant throttlings, by kind of throttle.",
&["kind"]
)
.unwrap()
});
static WAIT_COUNT_PER_TENANT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"pageserver_tenant_throttling_count",
"Count of tenant throttlings, by kind of throttle.",
&["kind", "tenant_id", "shard_id"]
)
.unwrap()
});
const KIND: &str = "timeline_get";
impl TimelineGet {
pub(crate) fn new(tenant_shard_id: &TenantShardId) -> Self {
let per_tenant_label_values = &[
KIND,
&tenant_shard_id.tenant_id.to_string(),
&tenant_shard_id.shard_slug().to_string(),
];
TimelineGet {
count_accounted_start: {
GlobalAndPerTenantIntCounter {
global: COUNT_ACCOUNTED_START.with_label_values(&[KIND]),
per_tenant: COUNT_ACCOUNTED_START_PER_TENANT
.with_label_values(per_tenant_label_values),
}
},
count_accounted_finish: {
GlobalAndPerTenantIntCounter {
global: COUNT_ACCOUNTED_FINISH.with_label_values(&[KIND]),
per_tenant: COUNT_ACCOUNTED_FINISH_PER_TENANT
.with_label_values(per_tenant_label_values),
}
},
wait_time: {
GlobalAndPerTenantIntCounter {
global: WAIT_USECS.with_label_values(&[KIND]),
per_tenant: WAIT_USECS_PER_TENANT
.with_label_values(per_tenant_label_values),
}
},
count_throttled: {
GlobalAndPerTenantIntCounter {
global: WAIT_COUNT.with_label_values(&[KIND]),
per_tenant: WAIT_COUNT_PER_TENANT
.with_label_values(per_tenant_label_values),
}
},
}
}
}
pub(crate) fn preinitialize_global_metrics() {
Lazy::force(&COUNT_ACCOUNTED_START);
Lazy::force(&COUNT_ACCOUNTED_FINISH);
Lazy::force(&WAIT_USECS);
Lazy::force(&WAIT_COUNT);
}
pub(crate) fn remove_tenant_metrics(tenant_shard_id: &TenantShardId) {
for m in &[
&COUNT_ACCOUNTED_START_PER_TENANT,
&COUNT_ACCOUNTED_FINISH_PER_TENANT,
&WAIT_USECS_PER_TENANT,
&WAIT_COUNT_PER_TENANT,
] {
let _ = m.remove_label_values(&[
KIND,
&tenant_shard_id.tenant_id.to_string(),
&tenant_shard_id.shard_slug().to_string(),
]);
}
}
impl Metric for TimelineGet {
#[inline(always)]
fn accounting_start(&self) {
self.count_accounted_start.inc();
}
#[inline(always)]
fn accounting_finish(&self) {
self.count_accounted_finish.inc();
}
#[inline(always)] #[inline(always)]
fn observe_throttling( fn observe_throttling(
&self, &self,
@@ -3284,7 +3097,7 @@ pub(crate) mod tenant_throttling {
) { ) {
let val = u64::try_from(wait_time.as_micros()).unwrap(); let val = u64::try_from(wait_time.as_micros()).unwrap();
self.wait_time.inc_by(val); self.wait_time.inc_by(val);
self.count_throttled.inc(); self.count.inc();
} }
} }
} }
@@ -3414,14 +3227,11 @@ pub fn preinitialize_metrics() {
} }
// countervecs // countervecs
[ [&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT]
&BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT, .into_iter()
&SMGR_QUERY_STARTED_GLOBAL, .for_each(|c| {
] Lazy::force(c);
.into_iter() });
.for_each(|c| {
Lazy::force(c);
});
// gauges // gauges
WALRECEIVER_ACTIVE_MANAGERS.get(); WALRECEIVER_ACTIVE_MANAGERS.get();
@@ -3443,8 +3253,7 @@ pub fn preinitialize_metrics() {
// Custom // Custom
Lazy::force(&RECONSTRUCT_TIME); Lazy::force(&RECONSTRUCT_TIME);
Lazy::force(&tenant_throttling::TIMELINE_GET);
Lazy::force(&BASEBACKUP_QUERY_TIME); Lazy::force(&BASEBACKUP_QUERY_TIME);
Lazy::force(&COMPUTE_COMMANDS_COUNTERS); Lazy::force(&COMPUTE_COMMANDS_COUNTERS);
tenant_throttling::preinitialize_global_metrics();
} }

View File

@@ -633,7 +633,7 @@ impl Timeline {
pub(crate) async fn get_twophase_file( pub(crate) async fn get_twophase_file(
&self, &self,
xid: u64, xid: TransactionId,
lsn: Lsn, lsn: Lsn,
ctx: &RequestContext, ctx: &RequestContext,
) -> Result<Bytes, PageReconstructError> { ) -> Result<Bytes, PageReconstructError> {
@@ -646,19 +646,11 @@ impl Timeline {
&self, &self,
lsn: Lsn, lsn: Lsn,
ctx: &RequestContext, ctx: &RequestContext,
) -> Result<HashSet<u64>, PageReconstructError> { ) -> Result<HashSet<TransactionId>, PageReconstructError> {
// fetch directory entry // fetch directory entry
let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?; let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;
if self.pg_version >= 17 { Ok(TwoPhaseDirectory::des(&buf)?.xids)
Ok(TwoPhaseDirectoryV17::des(&buf)?.xids)
} else {
Ok(TwoPhaseDirectory::des(&buf)?
.xids
.iter()
.map(|x| u64::from(*x))
.collect())
}
} }
pub(crate) async fn get_control_file( pub(crate) async fn get_control_file(
@@ -840,36 +832,6 @@ impl Timeline {
Ok(total_size * BLCKSZ as u64) Ok(total_size * BLCKSZ as u64)
} }
/// Get a KeySpace that covers all the Keys that are in use at AND below the given LSN. This is only used
/// for gc-compaction.
///
/// gc-compaction cannot use the same `collect_keyspace` function as the legacy compaction because it
/// processes data at multiple LSNs and needs to be aware of the fact that some key ranges might need to
/// be kept only for a specific range of LSN.
///
/// Consider the case that the user created branches at LSN 10 and 20, where the user created a table A at
/// LSN 10 and dropped that table at LSN 20. `collect_keyspace` at LSN 10 will return the key range
/// corresponding to that table, while LSN 20 won't. The keyspace info at a single LSN is not enough to
/// determine which keys to retain/drop for gc-compaction.
///
/// For now, it only drops AUX-v1 keys. But in the future, the function will be extended to return the keyspace
/// to be retained for each of the branch LSN.
///
/// The return value is (dense keyspace, sparse keyspace).
pub(crate) async fn collect_gc_compaction_keyspace(
&self,
) -> Result<(KeySpace, SparseKeySpace), CollectKeySpaceError> {
let metadata_key_begin = Key::metadata_key_range().start;
let aux_v1_key = AUX_FILES_KEY;
let dense_keyspace = KeySpace {
ranges: vec![Key::MIN..aux_v1_key, aux_v1_key.next()..metadata_key_begin],
};
Ok((
dense_keyspace,
SparseKeySpace(KeySpace::single(Key::metadata_key_range())),
))
}
/// ///
/// Get a KeySpace that covers all the Keys that are in use at the given LSN. /// Get a KeySpace that covers all the Keys that are in use at the given LSN.
/// Anything that's not listed maybe removed from the underlying storage (from /// Anything that's not listed maybe removed from the underlying storage (from
@@ -940,13 +902,9 @@ impl Timeline {
// Then pg_twophase // Then pg_twophase
result.add_key(TWOPHASEDIR_KEY); result.add_key(TWOPHASEDIR_KEY);
let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;
let mut xids: Vec<u64> = self let twophase_dir = TwoPhaseDirectory::des(&buf)?;
.list_twophase_files(lsn, ctx) let mut xids: Vec<TransactionId> = twophase_dir.xids.iter().cloned().collect();
.await?
.iter()
.cloned()
.collect();
xids.sort_unstable(); xids.sort_unstable();
for xid in xids { for xid in xids {
result.add_key(twophase_file_key(xid)); result.add_key(twophase_file_key(xid));
@@ -1169,15 +1127,9 @@ impl<'a> DatadirModification<'a> {
// Create AuxFilesDirectory // Create AuxFilesDirectory
self.init_aux_dir()?; self.init_aux_dir()?;
let buf = if self.tline.pg_version >= 17 { let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
TwoPhaseDirectoryV17::ser(&TwoPhaseDirectoryV17 { xids: HashSet::new(),
xids: HashSet::new(), })?;
})
} else {
TwoPhaseDirectory::ser(&TwoPhaseDirectory {
xids: HashSet::new(),
})
}?;
self.pending_directory_entries self.pending_directory_entries
.push((DirectoryKind::TwoPhase, 0)); .push((DirectoryKind::TwoPhase, 0));
self.put(TWOPHASEDIR_KEY, Value::Image(buf.into())); self.put(TWOPHASEDIR_KEY, Value::Image(buf.into()));
@@ -1369,31 +1321,22 @@ impl<'a> DatadirModification<'a> {
pub async fn put_twophase_file( pub async fn put_twophase_file(
&mut self, &mut self,
xid: u64, xid: TransactionId,
img: Bytes, img: Bytes,
ctx: &RequestContext, ctx: &RequestContext,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
// Add it to the directory entry // Add it to the directory entry
let dirbuf = self.get(TWOPHASEDIR_KEY, ctx).await?; let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
let newdirbuf = if self.tline.pg_version >= 17 { let mut dir = TwoPhaseDirectory::des(&buf)?;
let mut dir = TwoPhaseDirectoryV17::des(&dirbuf)?; if !dir.xids.insert(xid) {
if !dir.xids.insert(xid) { anyhow::bail!("twophase file for xid {} already exists", xid);
anyhow::bail!("twophase file for xid {} already exists", xid); }
} self.pending_directory_entries
self.pending_directory_entries .push((DirectoryKind::TwoPhase, dir.xids.len()));
.push((DirectoryKind::TwoPhase, dir.xids.len())); self.put(
Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?) TWOPHASEDIR_KEY,
} else { Value::Image(Bytes::from(TwoPhaseDirectory::ser(&dir)?)),
let xid = xid as u32; );
let mut dir = TwoPhaseDirectory::des(&dirbuf)?;
if !dir.xids.insert(xid) {
anyhow::bail!("twophase file for xid {} already exists", xid);
}
self.pending_directory_entries
.push((DirectoryKind::TwoPhase, dir.xids.len()));
Bytes::from(TwoPhaseDirectory::ser(&dir)?)
};
self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));
self.put(twophase_file_key(xid), Value::Image(img)); self.put(twophase_file_key(xid), Value::Image(img));
Ok(()) Ok(())
@@ -1696,32 +1639,22 @@ impl<'a> DatadirModification<'a> {
/// This method is used for marking truncated SLRU files /// This method is used for marking truncated SLRU files
pub async fn drop_twophase_file( pub async fn drop_twophase_file(
&mut self, &mut self,
xid: u64, xid: TransactionId,
ctx: &RequestContext, ctx: &RequestContext,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
// Remove it from the directory entry // Remove it from the directory entry
let buf = self.get(TWOPHASEDIR_KEY, ctx).await?; let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
let newdirbuf = if self.tline.pg_version >= 17 { let mut dir = TwoPhaseDirectory::des(&buf)?;
let mut dir = TwoPhaseDirectoryV17::des(&buf)?;
if !dir.xids.remove(&xid) { if !dir.xids.remove(&xid) {
warn!("twophase file for xid {} does not exist", xid); warn!("twophase file for xid {} does not exist", xid);
} }
self.pending_directory_entries self.pending_directory_entries
.push((DirectoryKind::TwoPhase, dir.xids.len())); .push((DirectoryKind::TwoPhase, dir.xids.len()));
Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?) self.put(
} else { TWOPHASEDIR_KEY,
let xid: u32 = u32::try_from(xid)?; Value::Image(Bytes::from(TwoPhaseDirectory::ser(&dir)?)),
let mut dir = TwoPhaseDirectory::des(&buf)?; );
if !dir.xids.remove(&xid) {
warn!("twophase file for xid {} does not exist", xid);
}
self.pending_directory_entries
.push((DirectoryKind::TwoPhase, dir.xids.len()));
Bytes::from(TwoPhaseDirectory::ser(&dir)?)
};
self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));
// Delete it // Delete it
self.delete(twophase_key_range(xid)); self.delete(twophase_key_range(xid));
@@ -2191,21 +2124,11 @@ struct DbDirectory {
dbdirs: HashMap<(Oid, Oid), bool>, dbdirs: HashMap<(Oid, Oid), bool>,
} }
// The format of TwoPhaseDirectory changed in PostgreSQL v17, because the filenames of
// pg_twophase files was expanded from 32-bit XIDs to 64-bit XIDs. Previously, the files
// were named like "pg_twophase/000002E5", now they're like
// "pg_twophsae/0000000A000002E4".
#[derive(Debug, Serialize, Deserialize)] #[derive(Debug, Serialize, Deserialize)]
struct TwoPhaseDirectory { struct TwoPhaseDirectory {
xids: HashSet<TransactionId>, xids: HashSet<TransactionId>,
} }
#[derive(Debug, Serialize, Deserialize)]
struct TwoPhaseDirectoryV17 {
xids: HashSet<u64>,
}
#[derive(Debug, Serialize, Deserialize, Default)] #[derive(Debug, Serialize, Deserialize, Default)]
struct RelDirectory { struct RelDirectory {
// Set of relations that exist. (relfilenode, forknum) // Set of relations that exist. (relfilenode, forknum)

View File

@@ -18,6 +18,7 @@ use camino::Utf8Path;
use camino::Utf8PathBuf; use camino::Utf8PathBuf;
use enumset::EnumSet; use enumset::EnumSet;
use futures::stream::FuturesUnordered; use futures::stream::FuturesUnordered;
use futures::FutureExt;
use futures::StreamExt; use futures::StreamExt;
use pageserver_api::models; use pageserver_api::models;
use pageserver_api::models::AuxFilePolicy; use pageserver_api::models::AuxFilePolicy;
@@ -33,7 +34,6 @@ use remote_storage::GenericRemoteStorage;
use remote_storage::TimeoutOrCancel; use remote_storage::TimeoutOrCancel;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fmt; use std::fmt;
use std::future::Future;
use std::sync::Weak; use std::sync::Weak;
use std::time::SystemTime; use std::time::SystemTime;
use storage_broker::BrokerClientChannel; use storage_broker::BrokerClientChannel;
@@ -140,7 +140,6 @@ pub mod metadata;
pub mod remote_timeline_client; pub mod remote_timeline_client;
pub mod storage_layer; pub mod storage_layer;
pub mod checks;
pub mod config; pub mod config;
pub mod mgr; pub mod mgr;
pub mod secondary; pub mod secondary;
@@ -302,7 +301,7 @@ pub struct Tenant {
/// Throttle applied at the top of [`Timeline::get`]. /// Throttle applied at the top of [`Timeline::get`].
/// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance. /// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance.
pub(crate) timeline_get_throttle: pub(crate) timeline_get_throttle:
Arc<throttle::Throttle<crate::metrics::tenant_throttling::TimelineGet>>, Arc<throttle::Throttle<&'static crate::metrics::tenant_throttling::TimelineGet>>,
/// An ongoing timeline detach concurrency limiter. /// An ongoing timeline detach concurrency limiter.
/// ///
@@ -1031,9 +1030,13 @@ impl Tenant {
} }
Ok(TenantPreload { Ok(TenantPreload {
timelines: self timelines: Self::load_timeline_metadata(
.load_timelines_metadata(remote_timeline_ids, remote_storage, cancel) self,
.await?, remote_timeline_ids,
remote_storage,
cancel,
)
.await?,
}) })
} }
@@ -1299,7 +1302,7 @@ impl Tenant {
.await .await
} }
async fn load_timelines_metadata( async fn load_timeline_metadata(
self: &Arc<Tenant>, self: &Arc<Tenant>,
timeline_ids: HashSet<TimelineId>, timeline_ids: HashSet<TimelineId>,
remote_storage: &GenericRemoteStorage, remote_storage: &GenericRemoteStorage,
@@ -1307,10 +1310,33 @@ impl Tenant {
) -> anyhow::Result<HashMap<TimelineId, TimelinePreload>> { ) -> anyhow::Result<HashMap<TimelineId, TimelinePreload>> {
let mut part_downloads = JoinSet::new(); let mut part_downloads = JoinSet::new();
for timeline_id in timeline_ids { for timeline_id in timeline_ids {
let client = RemoteTimelineClient::new(
remote_storage.clone(),
self.deletion_queue_client.clone(),
self.conf,
self.tenant_shard_id,
timeline_id,
self.generation,
);
let cancel_clone = cancel.clone(); let cancel_clone = cancel.clone();
part_downloads.spawn( part_downloads.spawn(
self.load_timeline_metadata(timeline_id, remote_storage.clone(), cancel_clone) async move {
.instrument(info_span!("download_index_part", %timeline_id)), debug!("starting index part download");
let index_part = client.download_index_file(&cancel_clone).await;
debug!("finished index part download");
Result::<_, anyhow::Error>::Ok(TimelinePreload {
client,
timeline_id,
index_part,
})
}
.map(move |res| {
res.with_context(|| format!("download index part for timeline {timeline_id}"))
})
.instrument(info_span!("download_index_part", %timeline_id)),
); );
} }
@@ -1321,7 +1347,8 @@ impl Tenant {
next = part_downloads.join_next() => { next = part_downloads.join_next() => {
match next { match next {
Some(result) => { Some(result) => {
let preload = result.context("join preload task")?; let preload_result = result.context("join preload task")?;
let preload = preload_result?;
timeline_preloads.insert(preload.timeline_id, preload); timeline_preloads.insert(preload.timeline_id, preload);
}, },
None => { None => {
@@ -1338,36 +1365,6 @@ impl Tenant {
Ok(timeline_preloads) Ok(timeline_preloads)
} }
fn load_timeline_metadata(
self: &Arc<Tenant>,
timeline_id: TimelineId,
remote_storage: GenericRemoteStorage,
cancel: CancellationToken,
) -> impl Future<Output = TimelinePreload> {
let client = RemoteTimelineClient::new(
remote_storage.clone(),
self.deletion_queue_client.clone(),
self.conf,
self.tenant_shard_id,
timeline_id,
self.generation,
);
async move {
debug_assert_current_span_has_tenant_and_timeline_id();
debug!("starting index part download");
let index_part = client.download_index_file(&cancel).await;
debug!("finished index part download");
TimelinePreload {
client,
timeline_id,
index_part,
}
}
}
pub(crate) async fn apply_timeline_archival_config( pub(crate) async fn apply_timeline_archival_config(
&self, &self,
timeline_id: TimelineId, timeline_id: TimelineId,
@@ -1576,9 +1573,6 @@ impl Tenant {
image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>, image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>,
end_lsn: Lsn, end_lsn: Lsn,
) -> anyhow::Result<Arc<Timeline>> { ) -> anyhow::Result<Arc<Timeline>> {
use checks::check_valid_layermap;
use itertools::Itertools;
let tline = self let tline = self
.create_test_timeline(new_timeline_id, initdb_lsn, pg_version, ctx) .create_test_timeline(new_timeline_id, initdb_lsn, pg_version, ctx)
.await?; .await?;
@@ -1593,18 +1587,6 @@ impl Tenant {
.force_create_image_layer(lsn, images, Some(initdb_lsn), ctx) .force_create_image_layer(lsn, images, Some(initdb_lsn), ctx)
.await?; .await?;
} }
let layer_names = tline
.layers
.read()
.await
.layer_map()
.unwrap()
.iter_historic_layers()
.map(|layer| layer.layer_name())
.collect_vec();
if let Some(err) = check_valid_layermap(&layer_names) {
bail!("invalid layermap: {err}");
}
Ok(tline) Ok(tline)
} }
@@ -1968,6 +1950,9 @@ impl Tenant {
TenantState::Activating(_) | TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => { TenantState::Activating(_) | TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => {
panic!("caller is responsible for calling activate() only on Loading / Attaching tenants, got {state:?}", state = current_state); panic!("caller is responsible for calling activate() only on Loading / Attaching tenants, got {state:?}", state = current_state);
} }
TenantState::Loading => {
*current_state = TenantState::Activating(ActivatingFrom::Loading);
}
TenantState::Attaching => { TenantState::Attaching => {
*current_state = TenantState::Activating(ActivatingFrom::Attaching); *current_state = TenantState::Activating(ActivatingFrom::Attaching);
} }
@@ -2148,7 +2133,7 @@ impl Tenant {
async fn set_stopping( async fn set_stopping(
&self, &self,
progress: completion::Barrier, progress: completion::Barrier,
_allow_transition_from_loading: bool, allow_transition_from_loading: bool,
allow_transition_from_attaching: bool, allow_transition_from_attaching: bool,
) -> Result<(), SetStoppingError> { ) -> Result<(), SetStoppingError> {
let mut rx = self.state.subscribe(); let mut rx = self.state.subscribe();
@@ -2163,6 +2148,7 @@ impl Tenant {
); );
false false
} }
TenantState::Loading => allow_transition_from_loading,
TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => true, TenantState::Active | TenantState::Broken { .. } | TenantState::Stopping { .. } => true,
}) })
.await .await
@@ -2181,6 +2167,13 @@ impl Tenant {
*current_state = TenantState::Stopping { progress }; *current_state = TenantState::Stopping { progress };
true true
} }
TenantState::Loading => {
if !allow_transition_from_loading {
unreachable!("3we ensured above that we're done with activation, and, there is no re-activation")
};
*current_state = TenantState::Stopping { progress };
true
}
TenantState::Active => { TenantState::Active => {
// FIXME: due to time-of-check vs time-of-use issues, it can happen that new timelines // FIXME: due to time-of-check vs time-of-use issues, it can happen that new timelines
// are created after the transition to Stopping. That's harmless, as the Timelines // are created after the transition to Stopping. That's harmless, as the Timelines
@@ -2236,7 +2229,7 @@ impl Tenant {
// The load & attach routines own the tenant state until it has reached `Active`. // The load & attach routines own the tenant state until it has reached `Active`.
// So, wait until it's done. // So, wait until it's done.
rx.wait_for(|state| match state { rx.wait_for(|state| match state {
TenantState::Activating(_) | TenantState::Attaching => { TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => {
info!( info!(
"waiting for {} to turn Active|Broken|Stopping", "waiting for {} to turn Active|Broken|Stopping",
<&'static str>::from(state) <&'static str>::from(state)
@@ -2256,7 +2249,7 @@ impl Tenant {
let reason = reason.to_string(); let reason = reason.to_string();
self.state.send_modify(|current_state| { self.state.send_modify(|current_state| {
match *current_state { match *current_state {
TenantState::Activating(_) | TenantState::Attaching => { TenantState::Activating(_) | TenantState::Loading | TenantState::Attaching => {
unreachable!("we ensured above that we're done with activation, and, there is no re-activation") unreachable!("we ensured above that we're done with activation, and, there is no re-activation")
} }
TenantState::Active => { TenantState::Active => {
@@ -2300,7 +2293,7 @@ impl Tenant {
loop { loop {
let current_state = receiver.borrow_and_update().clone(); let current_state = receiver.borrow_and_update().clone();
match current_state { match current_state {
TenantState::Attaching | TenantState::Activating(_) => { TenantState::Loading | TenantState::Attaching | TenantState::Activating(_) => {
// in these states, there's a chance that we can reach ::Active // in these states, there's a chance that we can reach ::Active
self.activate_now(); self.activate_now();
match timeout_cancellable(timeout, &self.cancel, receiver.changed()).await { match timeout_cancellable(timeout, &self.cancel, receiver.changed()).await {
@@ -2822,7 +2815,7 @@ impl Tenant {
gate: Gate::default(), gate: Gate::default(),
timeline_get_throttle: Arc::new(throttle::Throttle::new( timeline_get_throttle: Arc::new(throttle::Throttle::new(
Tenant::get_timeline_get_throttle_config(conf, &attached_conf.tenant_conf), Tenant::get_timeline_get_throttle_config(conf, &attached_conf.tenant_conf),
crate::metrics::tenant_throttling::TimelineGet::new(&tenant_shard_id), &crate::metrics::tenant_throttling::TIMELINE_GET,
)), )),
tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)), tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)),
ongoing_timeline_detach: std::sync::Mutex::default(), ongoing_timeline_detach: std::sync::Mutex::default(),
@@ -3204,9 +3197,6 @@ impl Tenant {
image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>, image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>,
end_lsn: Lsn, end_lsn: Lsn,
) -> anyhow::Result<Arc<Timeline>> { ) -> anyhow::Result<Arc<Timeline>> {
use checks::check_valid_layermap;
use itertools::Itertools;
let tline = self let tline = self
.branch_timeline_test(src_timeline, dst_id, ancestor_lsn, ctx) .branch_timeline_test(src_timeline, dst_id, ancestor_lsn, ctx)
.await?; .await?;
@@ -3227,18 +3217,6 @@ impl Tenant {
.force_create_image_layer(lsn, images, Some(ancestor_lsn), ctx) .force_create_image_layer(lsn, images, Some(ancestor_lsn), ctx)
.await?; .await?;
} }
let layer_names = tline
.layers
.read()
.await
.layer_map()
.unwrap()
.iter_historic_layers()
.map(|layer| layer.layer_name())
.collect_vec();
if let Some(err) = check_valid_layermap(&layer_names) {
bail!("invalid layermap: {err}");
}
Ok(tline) Ok(tline)
} }
@@ -3616,7 +3594,7 @@ impl Tenant {
start_lsn: Lsn, start_lsn: Lsn,
ancestor: Option<Arc<Timeline>>, ancestor: Option<Arc<Timeline>>,
last_aux_file_policy: Option<AuxFilePolicy>, last_aux_file_policy: Option<AuxFilePolicy>,
) -> anyhow::Result<UninitializedTimeline<'a>> { ) -> anyhow::Result<UninitializedTimeline> {
let tenant_shard_id = self.tenant_shard_id; let tenant_shard_id = self.tenant_shard_id;
let resources = self.build_timeline_resources(new_timeline_id); let resources = self.build_timeline_resources(new_timeline_id);
@@ -4133,7 +4111,7 @@ pub(crate) mod harness {
let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager)); let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));
let tenant = Arc::new(Tenant::new( let tenant = Arc::new(Tenant::new(
TenantState::Attaching, TenantState::Loading,
self.conf, self.conf,
AttachedTenantConf::try_from(LocationConf::attached_single( AttachedTenantConf::try_from(LocationConf::attached_single(
TenantConfOpt::from(self.tenant_conf.clone()), TenantConfOpt::from(self.tenant_conf.clone()),
@@ -4186,18 +4164,9 @@ pub(crate) mod harness {
let records_neon = records.iter().all(|r| apply_neon::can_apply_in_neon(&r.1)); let records_neon = records.iter().all(|r| apply_neon::can_apply_in_neon(&r.1));
if records_neon { if records_neon {
// For Neon wal records, we can decode without spawning postgres, so do so. // For Neon wal records, we can decode without spawning postgres, so do so.
let mut page = match (base_img, records.first()) { let base_img = base_img.expect("Neon WAL redo requires base image").1;
(Some((_lsn, img)), _) => { let mut page = BytesMut::new();
let mut page = BytesMut::new(); page.extend_from_slice(&base_img);
page.extend_from_slice(&img);
page
}
(_, Some((_lsn, rec))) if rec.will_init() => BytesMut::new(),
_ => {
panic!("Neon WAL redo requires base image or will init record");
}
};
for (record_lsn, record) in records { for (record_lsn, record) in records {
apply_neon::apply_in_neon(&record, record_lsn, key, &mut page)?; apply_neon::apply_in_neon(&record, record_lsn, key, &mut page)?;
} }
@@ -8501,135 +8470,4 @@ mod tests {
Ok(()) Ok(())
} }
// Regression test for https://github.com/neondatabase/neon/issues/9012
// Create an image arrangement where we have to read at different LSN ranges
// from a delta layer. This is achieved by overlapping an image layer on top of
// a delta layer. Like so:
//
// A B
// +----------------+ -> delta_layer
// | | ^ lsn
// | =========|-> nested_image_layer |
// | C | |
// +----------------+ |
// ======== -> baseline_image_layer +-------> key
//
//
// When querying the key range [A, B) we need to read at different LSN ranges
// for [A, C) and [C, B). This test checks that the described edge case is handled correctly.
#[tokio::test]
async fn test_vectored_read_with_nested_image_layer() -> anyhow::Result<()> {
let harness = TenantHarness::create("test_vectored_read_with_nested_image_layer").await?;
let (tenant, ctx) = harness.load().await;
let will_init_keys = [2, 6];
fn get_key(id: u32) -> Key {
let mut key = Key::from_hex("110000000033333333444444445500000000").unwrap();
key.field6 = id;
key
}
let mut expected_key_values = HashMap::new();
let baseline_image_layer_lsn = Lsn(0x10);
let mut baseline_img_layer = Vec::new();
for i in 0..5 {
let key = get_key(i);
let value = format!("value {i}@{baseline_image_layer_lsn}");
let removed = expected_key_values.insert(key, value.clone());
assert!(removed.is_none());
baseline_img_layer.push((key, Bytes::from(value)));
}
let nested_image_layer_lsn = Lsn(0x50);
let mut nested_img_layer = Vec::new();
for i in 5..10 {
let key = get_key(i);
let value = format!("value {i}@{nested_image_layer_lsn}");
let removed = expected_key_values.insert(key, value.clone());
assert!(removed.is_none());
nested_img_layer.push((key, Bytes::from(value)));
}
let mut delta_layer_spec = Vec::default();
let delta_layer_start_lsn = Lsn(0x20);
let mut delta_layer_end_lsn = delta_layer_start_lsn;
for i in 0..10 {
let key = get_key(i);
let key_in_nested = nested_img_layer
.iter()
.any(|(key_with_img, _)| *key_with_img == key);
let lsn = {
if key_in_nested {
Lsn(nested_image_layer_lsn.0 + 0x10)
} else {
delta_layer_start_lsn
}
};
let will_init = will_init_keys.contains(&i);
if will_init {
delta_layer_spec.push((key, lsn, Value::WalRecord(NeonWalRecord::wal_init())));
expected_key_values.insert(key, "".to_string());
} else {
let delta = format!("@{lsn}");
delta_layer_spec.push((
key,
lsn,
Value::WalRecord(NeonWalRecord::wal_append(&delta)),
));
expected_key_values
.get_mut(&key)
.expect("An image exists for each key")
.push_str(delta.as_str());
}
delta_layer_end_lsn = std::cmp::max(delta_layer_start_lsn, lsn);
}
delta_layer_end_lsn = Lsn(delta_layer_end_lsn.0 + 1);
assert!(
nested_image_layer_lsn > delta_layer_start_lsn
&& nested_image_layer_lsn < delta_layer_end_lsn
);
let tline = tenant
.create_test_timeline_with_layers(
TIMELINE_ID,
baseline_image_layer_lsn,
DEFAULT_PG_VERSION,
&ctx,
vec![DeltaLayerTestDesc::new_with_inferred_key_range(
delta_layer_start_lsn..delta_layer_end_lsn,
delta_layer_spec,
)], // delta layers
vec![
(baseline_image_layer_lsn, baseline_img_layer),
(nested_image_layer_lsn, nested_img_layer),
], // image layers
delta_layer_end_lsn,
)
.await?;
let keyspace = KeySpace::single(get_key(0)..get_key(10));
let results = tline
.get_vectored(keyspace, delta_layer_end_lsn, &ctx)
.await
.expect("No vectored errors");
for (key, res) in results {
let value = res.expect("No key errors");
let expected_value = expected_key_values.remove(&key).expect("No unknown keys");
assert_eq!(value, Bytes::from(expected_value));
}
Ok(())
}
} }

View File

@@ -1,55 +0,0 @@
use std::collections::BTreeSet;
use itertools::Itertools;
use super::storage_layer::LayerName;
/// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong).
/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
///
/// ```plain
/// | | | |
/// | 1 | | 2 | | 3 |
/// | | | | | |
/// ```
///
/// This is not a valid layer map because the LSN range of layer 1 intersects with the LSN range of layer 2. 1 and 2 should have
/// the same LSN range.
///
/// The exception is that when layer 2 only contains a single key, it could be split over the LSN range. For example,
///
/// ```plain
/// | | | 2 | | |
/// | 1 | |-------| | 3 |
/// | | | 4 | | |
///
/// If layer 2 and 4 contain the same single key, this is also a valid layer map.
pub fn check_valid_layermap(metadata: &[LayerName]) -> Option<String> {
let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)
let mut all_delta_layers = Vec::new();
for name in metadata {
if let LayerName::Delta(layer) = name {
if layer.key_range.start.next() != layer.key_range.end {
all_delta_layers.push(layer.clone());
}
}
}
for layer in &all_delta_layers {
let lsn_range = &layer.lsn_range;
lsn_split_point.insert(lsn_range.start);
lsn_split_point.insert(lsn_range.end);
}
for layer in &all_delta_layers {
let lsn_range = layer.lsn_range.clone();
let intersects = lsn_split_point.range(lsn_range).collect_vec();
if intersects.len() > 1 {
let err = format!(
"layer violates the layer map LSN split assumption: layer {} intersects with LSN [{}]",
layer,
intersects.into_iter().map(|lsn| lsn.to_string()).join(", ")
);
return Some(err);
}
}
None
}

View File

@@ -1,29 +1,11 @@
use std::{collections::HashMap, time::Duration}; use std::collections::HashMap;
use super::remote_timeline_client::index::GcBlockingReason;
use tokio::time::Instant;
use utils::id::TimelineId; use utils::id::TimelineId;
type TimelinesBlocked = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>; use super::remote_timeline_client::index::GcBlockingReason;
#[derive(Default)] type Storage = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>;
struct Storage {
timelines_blocked: TimelinesBlocked,
/// The deadline before which we are blocked from GC so that
/// leases have a chance to be renewed.
lsn_lease_deadline: Option<Instant>,
}
impl Storage {
fn is_blocked_by_lsn_lease_deadline(&self) -> bool {
self.lsn_lease_deadline
.map(|d| Instant::now() < d)
.unwrap_or(false)
}
}
/// GcBlock provides persistent (per-timeline) gc blocking and facilitates transient time based gc
/// blocking.
#[derive(Default)] #[derive(Default)]
pub(crate) struct GcBlock { pub(crate) struct GcBlock {
/// The timelines which have current reasons to block gc. /// The timelines which have current reasons to block gc.
@@ -31,12 +13,6 @@ pub(crate) struct GcBlock {
/// LOCK ORDER: this is held locked while scheduling the next index_part update. This is done /// LOCK ORDER: this is held locked while scheduling the next index_part update. This is done
/// to keep the this field up to date with RemoteTimelineClient `upload_queue.dirty`. /// to keep the this field up to date with RemoteTimelineClient `upload_queue.dirty`.
reasons: std::sync::Mutex<Storage>, reasons: std::sync::Mutex<Storage>,
/// GC background task or manually run `Tenant::gc_iteration` holds a lock on this.
///
/// Do not add any more features taking and forbidding taking this lock. It should be
/// `tokio::sync::Notify`, but that is rarely used. On the other side, [`GcBlock::insert`]
/// synchronizes with gc attempts by locking and unlocking this mutex.
blocking: tokio::sync::Mutex<()>, blocking: tokio::sync::Mutex<()>,
} }
@@ -66,20 +42,6 @@ impl GcBlock {
} }
} }
/// Sets a deadline before which we cannot proceed to GC due to lsn lease.
///
/// We do this as the leases mapping are not persisted to disk. By delaying GC by lease
/// length, we guarantee that all the leases we granted before will have a chance to renew
/// when we run GC for the first time after restart / transition from AttachedMulti to AttachedSingle.
pub(super) fn set_lsn_lease_deadline(&self, lsn_lease_length: Duration) {
let deadline = Instant::now() + lsn_lease_length;
let mut g = self.reasons.lock().unwrap();
g.lsn_lease_deadline = Some(deadline);
}
/// Describe the current gc blocking reasons.
///
/// TODO: make this json serializable.
pub(crate) fn summary(&self) -> Option<BlockingReasons> { pub(crate) fn summary(&self) -> Option<BlockingReasons> {
let g = self.reasons.lock().unwrap(); let g = self.reasons.lock().unwrap();
@@ -102,7 +64,7 @@ impl GcBlock {
) -> anyhow::Result<bool> { ) -> anyhow::Result<bool> {
let (added, uploaded) = { let (added, uploaded) = {
let mut g = self.reasons.lock().unwrap(); let mut g = self.reasons.lock().unwrap();
let set = g.timelines_blocked.entry(timeline.timeline_id).or_default(); let set = g.entry(timeline.timeline_id).or_default();
let added = set.insert(reason); let added = set.insert(reason);
// LOCK ORDER: intentionally hold the lock, see self.reasons. // LOCK ORDER: intentionally hold the lock, see self.reasons.
@@ -133,7 +95,7 @@ impl GcBlock {
let (remaining_blocks, uploaded) = { let (remaining_blocks, uploaded) = {
let mut g = self.reasons.lock().unwrap(); let mut g = self.reasons.lock().unwrap();
match g.timelines_blocked.entry(timeline.timeline_id) { match g.entry(timeline.timeline_id) {
Entry::Occupied(mut oe) => { Entry::Occupied(mut oe) => {
let set = oe.get_mut(); let set = oe.get_mut();
set.remove(reason); set.remove(reason);
@@ -147,7 +109,7 @@ impl GcBlock {
} }
} }
let remaining_blocks = g.timelines_blocked.len(); let remaining_blocks = g.len();
// LOCK ORDER: intentionally hold the lock while scheduling; see self.reasons // LOCK ORDER: intentionally hold the lock while scheduling; see self.reasons
let uploaded = timeline let uploaded = timeline
@@ -172,11 +134,11 @@ impl GcBlock {
pub(crate) fn before_delete(&self, timeline: &super::Timeline) { pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
let unblocked = { let unblocked = {
let mut g = self.reasons.lock().unwrap(); let mut g = self.reasons.lock().unwrap();
if g.timelines_blocked.is_empty() { if g.is_empty() {
return; return;
} }
g.timelines_blocked.remove(&timeline.timeline_id); g.remove(&timeline.timeline_id);
BlockingReasons::clean_and_summarize(g).is_none() BlockingReasons::clean_and_summarize(g).is_none()
}; };
@@ -187,11 +149,10 @@ impl GcBlock {
} }
/// Initialize with the non-deleted timelines of this tenant. /// Initialize with the non-deleted timelines of this tenant.
pub(crate) fn set_scanned(&self, scanned: TimelinesBlocked) { pub(crate) fn set_scanned(&self, scanned: Storage) {
let mut g = self.reasons.lock().unwrap(); let mut g = self.reasons.lock().unwrap();
assert!(g.timelines_blocked.is_empty()); assert!(g.is_empty());
g.timelines_blocked g.extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));
.extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));
if let Some(reasons) = BlockingReasons::clean_and_summarize(g) { if let Some(reasons) = BlockingReasons::clean_and_summarize(g) {
tracing::info!(summary=?reasons, "initialized with gc blocked"); tracing::info!(summary=?reasons, "initialized with gc blocked");
@@ -205,7 +166,6 @@ pub(super) struct Guard<'a> {
#[derive(Debug)] #[derive(Debug)]
pub(crate) struct BlockingReasons { pub(crate) struct BlockingReasons {
tenant_blocked_by_lsn_lease_deadline: bool,
timelines: usize, timelines: usize,
reasons: enumset::EnumSet<GcBlockingReason>, reasons: enumset::EnumSet<GcBlockingReason>,
} }
@@ -214,8 +174,8 @@ impl std::fmt::Display for BlockingReasons {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!( write!(
f, f,
"tenant_blocked_by_lsn_lease_deadline: {}, {} timelines block for {:?}", "{} timelines block for {:?}",
self.tenant_blocked_by_lsn_lease_deadline, self.timelines, self.reasons self.timelines, self.reasons
) )
} }
} }
@@ -223,15 +183,13 @@ impl std::fmt::Display for BlockingReasons {
impl BlockingReasons { impl BlockingReasons {
fn clean_and_summarize(mut g: std::sync::MutexGuard<'_, Storage>) -> Option<Self> { fn clean_and_summarize(mut g: std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
let mut reasons = enumset::EnumSet::empty(); let mut reasons = enumset::EnumSet::empty();
g.timelines_blocked.retain(|_key, value| { g.retain(|_key, value| {
reasons = reasons.union(*value); reasons = reasons.union(*value);
!value.is_empty() !value.is_empty()
}); });
let blocked_by_lsn_lease_deadline = g.is_blocked_by_lsn_lease_deadline(); if !g.is_empty() {
if !g.timelines_blocked.is_empty() || blocked_by_lsn_lease_deadline {
Some(BlockingReasons { Some(BlockingReasons {
tenant_blocked_by_lsn_lease_deadline: blocked_by_lsn_lease_deadline, timelines: g.len(),
timelines: g.timelines_blocked.len(),
reasons, reasons,
}) })
} else { } else {
@@ -240,17 +198,14 @@ impl BlockingReasons {
} }
fn summarize(g: &std::sync::MutexGuard<'_, Storage>) -> Option<Self> { fn summarize(g: &std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
let blocked_by_lsn_lease_deadline = g.is_blocked_by_lsn_lease_deadline(); if g.is_empty() {
if g.timelines_blocked.is_empty() && !blocked_by_lsn_lease_deadline {
None None
} else { } else {
let reasons = g let reasons = g
.timelines_blocked
.values() .values()
.fold(enumset::EnumSet::empty(), |acc, next| acc.union(*next)); .fold(enumset::EnumSet::empty(), |acc, next| acc.union(*next));
Some(BlockingReasons { Some(BlockingReasons {
tenant_blocked_by_lsn_lease_deadline: blocked_by_lsn_lease_deadline, timelines: g.len(),
timelines: g.timelines_blocked.len(),
reasons, reasons,
}) })
} }

View File

@@ -949,12 +949,6 @@ impl TenantManager {
(LocationMode::Attached(attach_conf), Some(TenantSlot::Attached(tenant))) => { (LocationMode::Attached(attach_conf), Some(TenantSlot::Attached(tenant))) => {
match attach_conf.generation.cmp(&tenant.generation) { match attach_conf.generation.cmp(&tenant.generation) {
Ordering::Equal => { Ordering::Equal => {
if attach_conf.attach_mode == AttachmentMode::Single {
tenant
.gc_block
.set_lsn_lease_deadline(tenant.get_lsn_lease_length());
}
// A transition from Attached to Attached in the same generation, we may // A transition from Attached to Attached in the same generation, we may
// take our fast path and just provide the updated configuration // take our fast path and just provide the updated configuration
// to the tenant. // to the tenant.

View File

@@ -1,13 +1,13 @@
//! Common traits and structs for layers //! Common traits and structs for layers
pub mod delta_layer; pub mod delta_layer;
pub mod filter_iterator;
pub mod image_layer; pub mod image_layer;
pub mod inmemory_layer; pub mod inmemory_layer;
pub(crate) mod layer; pub(crate) mod layer;
mod layer_desc; mod layer_desc;
mod layer_name; mod layer_name;
pub mod merge_iterator; pub mod merge_iterator;
pub mod split_writer; pub mod split_writer;
use crate::context::{AccessStatsBehavior, RequestContext}; use crate::context::{AccessStatsBehavior, RequestContext};
@@ -276,16 +276,6 @@ pub(crate) enum LayerId {
InMemoryLayerId(InMemoryLayerFileId), InMemoryLayerId(InMemoryLayerFileId),
} }
/// Uniquely identify a layer visit by the layer
/// and LSN floor (or start LSN) of the reads.
/// The layer itself is not enough since we may
/// have different LSN lower bounds for delta layer reads.
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
struct LayerToVisitId {
layer_id: LayerId,
lsn_floor: Lsn,
}
/// Layer wrapper for the read path. Note that it is valid /// Layer wrapper for the read path. Note that it is valid
/// to use these layers even after external operations have /// to use these layers even after external operations have
/// been performed on them (compaction, freeze, etc.). /// been performed on them (compaction, freeze, etc.).
@@ -297,9 +287,9 @@ pub(crate) enum ReadableLayer {
/// A partial description of a read to be done. /// A partial description of a read to be done.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct LayerVisit { struct ReadDesc {
/// An id used to resolve the readable layer within the fringe /// An id used to resolve the readable layer within the fringe
layer_to_visit_id: LayerToVisitId, layer_id: LayerId,
/// Lsn range for the read, used for selecting the next read /// Lsn range for the read, used for selecting the next read
lsn_range: Range<Lsn>, lsn_range: Range<Lsn>,
} }
@@ -313,12 +303,12 @@ struct LayerVisit {
/// a two layer indexing scheme. /// a two layer indexing scheme.
#[derive(Debug)] #[derive(Debug)]
pub(crate) struct LayerFringe { pub(crate) struct LayerFringe {
planned_visits_by_lsn: BinaryHeap<LayerVisit>, planned_reads_by_lsn: BinaryHeap<ReadDesc>,
visit_reads: HashMap<LayerToVisitId, LayerVisitReads>, layers: HashMap<LayerId, LayerKeyspace>,
} }
#[derive(Debug)] #[derive(Debug)]
struct LayerVisitReads { struct LayerKeyspace {
layer: ReadableLayer, layer: ReadableLayer,
target_keyspace: KeySpaceRandomAccum, target_keyspace: KeySpaceRandomAccum,
} }
@@ -326,23 +316,23 @@ struct LayerVisitReads {
impl LayerFringe { impl LayerFringe {
pub(crate) fn new() -> Self { pub(crate) fn new() -> Self {
LayerFringe { LayerFringe {
planned_visits_by_lsn: BinaryHeap::new(), planned_reads_by_lsn: BinaryHeap::new(),
visit_reads: HashMap::new(), layers: HashMap::new(),
} }
} }
pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> { pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {
let read_desc = match self.planned_visits_by_lsn.pop() { let read_desc = match self.planned_reads_by_lsn.pop() {
Some(desc) => desc, Some(desc) => desc,
None => return None, None => return None,
}; };
let removed = self.visit_reads.remove_entry(&read_desc.layer_to_visit_id); let removed = self.layers.remove_entry(&read_desc.layer_id);
match removed { match removed {
Some(( Some((
_, _,
LayerVisitReads { LayerKeyspace {
layer, layer,
mut target_keyspace, mut target_keyspace,
}, },
@@ -361,24 +351,20 @@ impl LayerFringe {
keyspace: KeySpace, keyspace: KeySpace,
lsn_range: Range<Lsn>, lsn_range: Range<Lsn>,
) { ) {
let layer_to_visit_id = LayerToVisitId { let layer_id = layer.id();
layer_id: layer.id(), let entry = self.layers.entry(layer_id.clone());
lsn_floor: lsn_range.start,
};
let entry = self.visit_reads.entry(layer_to_visit_id.clone());
match entry { match entry {
Entry::Occupied(mut entry) => { Entry::Occupied(mut entry) => {
entry.get_mut().target_keyspace.add_keyspace(keyspace); entry.get_mut().target_keyspace.add_keyspace(keyspace);
} }
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
self.planned_visits_by_lsn.push(LayerVisit { self.planned_reads_by_lsn.push(ReadDesc {
lsn_range, lsn_range,
layer_to_visit_id: layer_to_visit_id.clone(), layer_id: layer_id.clone(),
}); });
let mut accum = KeySpaceRandomAccum::new(); let mut accum = KeySpaceRandomAccum::new();
accum.add_keyspace(keyspace); accum.add_keyspace(keyspace);
entry.insert(LayerVisitReads { entry.insert(LayerKeyspace {
layer, layer,
target_keyspace: accum, target_keyspace: accum,
}); });
@@ -393,7 +379,7 @@ impl Default for LayerFringe {
} }
} }
impl Ord for LayerVisit { impl Ord for ReadDesc {
fn cmp(&self, other: &Self) -> Ordering { fn cmp(&self, other: &Self) -> Ordering {
let ord = self.lsn_range.end.cmp(&other.lsn_range.end); let ord = self.lsn_range.end.cmp(&other.lsn_range.end);
if ord == std::cmp::Ordering::Equal { if ord == std::cmp::Ordering::Equal {
@@ -404,19 +390,19 @@ impl Ord for LayerVisit {
} }
} }
impl PartialOrd for LayerVisit { impl PartialOrd for ReadDesc {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other)) Some(self.cmp(other))
} }
} }
impl PartialEq for LayerVisit { impl PartialEq for ReadDesc {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
self.lsn_range == other.lsn_range self.lsn_range == other.lsn_range
} }
} }
impl Eq for LayerVisit {} impl Eq for ReadDesc {}
impl ReadableLayer { impl ReadableLayer {
pub(crate) fn id(&self) -> LayerId { pub(crate) fn id(&self) -> LayerId {

View File

@@ -39,7 +39,7 @@ use crate::tenant::disk_btree::{
use crate::tenant::storage_layer::layer::S3_UPLOAD_LIMIT; use crate::tenant::storage_layer::layer::S3_UPLOAD_LIMIT;
use crate::tenant::timeline::GetVectoredError; use crate::tenant::timeline::GetVectoredError;
use crate::tenant::vectored_blob_io::{ use crate::tenant::vectored_blob_io::{
BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
VectoredReadCoalesceMode, VectoredReadPlanner, VectoredReadCoalesceMode, VectoredReadPlanner,
}; };
use crate::tenant::PageReconstructError; use crate::tenant::PageReconstructError;
@@ -1021,30 +1021,13 @@ impl DeltaLayerInner {
continue; continue;
} }
}; };
let view = BufView::new_slice(&blobs_buf.buf);
for meta in blobs_buf.blobs.iter().rev() { for meta in blobs_buf.blobs.iter().rev() {
if Some(meta.meta.key) == ignore_key_with_err { if Some(meta.meta.key) == ignore_key_with_err {
continue; continue;
} }
let blob_read = meta.read(&view).await;
let blob_read = match blob_read {
Ok(buf) => buf,
Err(e) => {
reconstruct_state.on_key_error(
meta.meta.key,
PageReconstructError::Other(anyhow!(e).context(format!(
"Failed to decompress blob from virtual file {}",
self.file.path,
))),
);
ignore_key_with_err = Some(meta.meta.key);
continue;
}
};
let value = Value::des(&blob_read);
let value = Value::des(&blobs_buf.buf[meta.start..meta.end]);
let value = match value { let value = match value {
Ok(v) => v, Ok(v) => v,
Err(e) => { Err(e) => {
@@ -1260,21 +1243,21 @@ impl DeltaLayerInner {
buf.reserve(read.size()); buf.reserve(read.size());
let res = reader.read_blobs(&read, buf, ctx).await?; let res = reader.read_blobs(&read, buf, ctx).await?;
let view = BufView::new_slice(&res.buf);
for blob in res.blobs { for blob in res.blobs {
let key = blob.meta.key; let key = blob.meta.key;
let lsn = blob.meta.lsn; let lsn = blob.meta.lsn;
let data = &res.buf[blob.start..blob.end];
let data = blob.read(&view).await?;
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
Value::des(&data) Value::des(data)
.with_context(|| { .with_context(|| {
format!( format!(
"blob failed to deserialize for {}: {:?}", "blob failed to deserialize for {}@{}, {}..{}: {:?}",
blob, blob.meta.key,
utils::Hex(&data) blob.meta.lsn,
blob.start,
blob.end,
utils::Hex(data)
) )
}) })
.unwrap(); .unwrap();
@@ -1282,15 +1265,15 @@ impl DeltaLayerInner {
// is it an image or will_init walrecord? // is it an image or will_init walrecord?
// FIXME: this could be handled by threading the BlobRef to the // FIXME: this could be handled by threading the BlobRef to the
// VectoredReadBuilder // VectoredReadBuilder
let will_init = crate::repository::ValueBytes::will_init(&data) let will_init = crate::repository::ValueBytes::will_init(data)
.inspect_err(|_e| { .inspect_err(|_e| {
#[cfg(feature = "testing")] #[cfg(feature = "testing")]
tracing::error!(data=?utils::Hex(&data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value"); tracing::error!(data=?utils::Hex(data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value");
}) })
.unwrap_or(false); .unwrap_or(false);
per_blob_copy.clear(); per_blob_copy.clear();
per_blob_copy.extend_from_slice(&data); per_blob_copy.extend_from_slice(data);
let (tmp, res) = writer let (tmp, res) = writer
.put_value_bytes( .put_value_bytes(
@@ -1555,11 +1538,8 @@ impl<'a> DeltaLayerIterator<'a> {
.read_blobs(&plan, buf, self.ctx) .read_blobs(&plan, buf, self.ctx)
.await?; .await?;
let frozen_buf = blobs_buf.buf.freeze(); let frozen_buf = blobs_buf.buf.freeze();
let view = BufView::new_bytes(frozen_buf);
for meta in blobs_buf.blobs.iter() { for meta in blobs_buf.blobs.iter() {
let blob_read = meta.read(&view).await?; let value = Value::des(&frozen_buf[meta.start..meta.end])?;
let value = Value::des(&blob_read)?;
next_batch.push_back((meta.meta.key, meta.meta.lsn, value)); next_batch.push_back((meta.meta.key, meta.meta.lsn, value));
} }
self.key_values_batch = next_batch; self.key_values_batch = next_batch;
@@ -1936,13 +1916,9 @@ pub(crate) mod test {
let blobs_buf = vectored_blob_reader let blobs_buf = vectored_blob_reader
.read_blobs(&read, buf.take().expect("Should have a buffer"), &ctx) .read_blobs(&read, buf.take().expect("Should have a buffer"), &ctx)
.await?; .await?;
let view = BufView::new_slice(&blobs_buf.buf);
for meta in blobs_buf.blobs.iter() { for meta in blobs_buf.blobs.iter() {
let value = meta.read(&view).await?; let value = &blobs_buf.buf[meta.start..meta.end];
assert_eq!( assert_eq!(value, entries_meta.index[&(meta.meta.key, meta.meta.lsn)]);
&value[..],
&entries_meta.index[&(meta.meta.key, meta.meta.lsn)]
);
} }
buf = Some(blobs_buf.buf); buf = Some(blobs_buf.buf);

View File

@@ -1,205 +0,0 @@
use std::ops::Range;
use anyhow::bail;
use pageserver_api::{
key::Key,
keyspace::{KeySpace, SparseKeySpace},
};
use utils::lsn::Lsn;
use crate::repository::Value;
use super::merge_iterator::MergeIterator;
/// A filter iterator over merge iterators (and can be easily extended to other types of iterators).
///
/// The iterator will skip any keys not included in the keyspace filter. In other words, the keyspace filter contains the keys
/// to be retained.
pub struct FilterIterator<'a> {
inner: MergeIterator<'a>,
retain_key_filters: Vec<Range<Key>>,
current_filter_idx: usize,
}
impl<'a> FilterIterator<'a> {
pub fn create(
inner: MergeIterator<'a>,
dense_keyspace: KeySpace,
sparse_keyspace: SparseKeySpace,
) -> anyhow::Result<Self> {
let mut retain_key_filters = Vec::new();
retain_key_filters.extend(dense_keyspace.ranges);
retain_key_filters.extend(sparse_keyspace.0.ranges);
retain_key_filters.sort_by(|a, b| a.start.cmp(&b.start));
// Verify key filters are non-overlapping and sorted
for window in retain_key_filters.windows(2) {
if window[0].end > window[1].start {
bail!(
"Key filters are overlapping: {:?} and {:?}",
window[0],
window[1]
);
}
}
Ok(Self {
inner,
retain_key_filters,
current_filter_idx: 0,
})
}
pub async fn next(&mut self) -> anyhow::Result<Option<(Key, Lsn, Value)>> {
while let Some(item) = self.inner.next().await? {
while self.current_filter_idx < self.retain_key_filters.len()
&& item.0 >= self.retain_key_filters[self.current_filter_idx].end
{
// [filter region] [filter region] [filter region]
// ^ item
// ^ current filter
self.current_filter_idx += 1;
// [filter region] [filter region] [filter region]
// ^ item
// ^ current filter
}
if self.current_filter_idx >= self.retain_key_filters.len() {
// We already exhausted all filters, so we should return now
// [filter region] [filter region] [filter region]
// ^ item
// ^ current filter (nothing)
return Ok(None);
}
if self.retain_key_filters[self.current_filter_idx].contains(&item.0) {
// [filter region] [filter region] [filter region]
// ^ item
// ^ current filter
return Ok(Some(item));
}
// If the key is not contained in the key retaining filters, continue to the next item.
// [filter region] [filter region] [filter region]
// ^ item
// ^ current filter
}
Ok(None)
}
}
#[cfg(test)]
mod tests {
use super::*;
use itertools::Itertools;
use pageserver_api::key::Key;
use utils::lsn::Lsn;
use crate::{
tenant::{
harness::{TenantHarness, TIMELINE_ID},
storage_layer::delta_layer::test::produce_delta_layer,
},
DEFAULT_PG_VERSION,
};
async fn assert_filter_iter_equal(
filter_iter: &mut FilterIterator<'_>,
expect: &[(Key, Lsn, Value)],
) {
let mut expect_iter = expect.iter();
loop {
let o1 = filter_iter.next().await.unwrap();
let o2 = expect_iter.next();
assert_eq!(o1.is_some(), o2.is_some());
if o1.is_none() && o2.is_none() {
break;
}
let (k1, l1, v1) = o1.unwrap();
let (k2, l2, v2) = o2.unwrap();
assert_eq!(&k1, k2);
assert_eq!(l1, *l2);
assert_eq!(&v1, v2);
}
}
#[tokio::test]
async fn filter_keyspace_iterator() {
use crate::repository::Value;
use bytes::Bytes;
let harness = TenantHarness::create("filter_iterator_filter_keyspace_iterator")
.await
.unwrap();
let (tenant, ctx) = harness.load().await;
let tline = tenant
.create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
.await
.unwrap();
fn get_key(id: u32) -> Key {
let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap();
key.field6 = id;
key
}
const N: usize = 100;
let test_deltas1 = (0..N)
.map(|idx| {
(
get_key(idx as u32),
Lsn(0x20 * ((idx as u64) % 10 + 1)),
Value::Image(Bytes::from(format!("img{idx:05}"))),
)
})
.collect_vec();
let resident_layer_1 = produce_delta_layer(&tenant, &tline, test_deltas1.clone(), &ctx)
.await
.unwrap();
let merge_iter = MergeIterator::create(
&[resident_layer_1.get_as_delta(&ctx).await.unwrap()],
&[],
&ctx,
);
let mut filter_iter = FilterIterator::create(
merge_iter,
KeySpace {
ranges: vec![
get_key(5)..get_key(10),
get_key(20)..get_key(30),
get_key(90)..get_key(110),
get_key(1000)..get_key(2000),
],
},
SparseKeySpace(KeySpace::default()),
)
.unwrap();
let mut result = Vec::new();
result.extend(test_deltas1[5..10].iter().cloned());
result.extend(test_deltas1[20..30].iter().cloned());
result.extend(test_deltas1[90..100].iter().cloned());
assert_filter_iter_equal(&mut filter_iter, &result).await;
let merge_iter = MergeIterator::create(
&[resident_layer_1.get_as_delta(&ctx).await.unwrap()],
&[],
&ctx,
);
let mut filter_iter = FilterIterator::create(
merge_iter,
KeySpace {
ranges: vec![
get_key(0)..get_key(10),
get_key(20)..get_key(30),
get_key(90)..get_key(95),
],
},
SparseKeySpace(KeySpace::default()),
)
.unwrap();
let mut result = Vec::new();
result.extend(test_deltas1[0..10].iter().cloned());
result.extend(test_deltas1[20..30].iter().cloned());
result.extend(test_deltas1[90..95].iter().cloned());
assert_filter_iter_equal(&mut filter_iter, &result).await;
}
}

Some files were not shown because too many files have changed in this diff Show More